summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/9p/trans_common.c2
-rw-r--r--net/9p/trans_fd.c4
-rw-r--r--net/9p/trans_rdma.c4
-rw-r--r--net/9p/trans_virtio.c5
-rw-r--r--net/9p/trans_xen.c2
-rw-r--r--net/atm/lec.c9
-rw-r--r--net/bridge/br_if.c4
-rw-r--r--net/bridge/netfilter/ebtables.c11
-rw-r--r--net/caif/chnl_net.c2
-rw-r--r--net/ceph/messenger.c7
-rw-r--r--net/ceph/mon_client.c14
-rw-r--r--net/compat.c6
-rw-r--r--net/core/dev_addr_lists.c2
-rw-r--r--net/core/ethtool.c5
-rw-r--r--net/core/filter.c1
-rw-r--r--net/dccp/ccids/ccid2.c14
-rw-r--r--net/dccp/timer.c2
-rw-r--r--net/ieee802154/6lowpan/6lowpan_i.h4
-rw-r--r--net/ieee802154/6lowpan/reassembly.c14
-rw-r--r--net/ife/ife.c38
-rw-r--r--net/ipv4/netfilter/Makefile5
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic_main.c2
-rw-r--r--net/ipv4/route.c119
-rw-r--r--net/ipv4/tcp.c7
-rw-r--r--net/ipv4/tcp_bbr.c4
-rw-r--r--net/ipv4/tcp_input.c7
-rw-r--r--net/ipv4/udp.c4
-rw-r--r--net/ipv6/Kconfig9
-rw-r--r--net/ipv6/ip6_vti.c4
-rw-r--r--net/ipv6/netfilter/Kconfig55
-rw-r--r--net/ipv6/route.c9
-rw-r--r--net/ipv6/seg6_iptunnel.c2
-rw-r--r--net/ipv6/udp.c4
-rw-r--r--net/ipv6/xfrm6_tunnel.c3
-rw-r--r--net/key/af_key.c45
-rw-r--r--net/l2tp/l2tp_debugfs.c5
-rw-r--r--net/l2tp/l2tp_ppp.c12
-rw-r--r--net/llc/af_llc.c17
-rw-r--r--net/llc/llc_c_ac.c9
-rw-r--r--net/llc/llc_conn.c22
-rw-r--r--net/netfilter/Kconfig1
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c155
-rw-r--r--net/netfilter/nf_conntrack_expect.c5
-rw-r--r--net/netfilter/nf_conntrack_extend.c2
-rw-r--r--net/netfilter/nf_conntrack_sip.c16
-rw-r--r--net/netfilter/nf_tables_api.c69
-rw-r--r--net/netfilter/xt_connmark.c49
-rw-r--r--net/netlink/af_netlink.c6
-rw-r--r--net/nsh/nsh.c4
-rw-r--r--net/openvswitch/flow_netlink.c9
-rw-r--r--net/packet/af_packet.c60
-rw-r--r--net/packet/internal.h10
-rw-r--r--net/rds/ib_cm.c3
-rw-r--r--net/rds/recv.c1
-rw-r--r--net/sched/act_ife.c9
-rw-r--r--net/sched/sch_fq.c37
-rw-r--r--net/sctp/associola.c30
-rw-r--r--net/sctp/inqueue.c2
-rw-r--r--net/sctp/ipv6.c3
-rw-r--r--net/sctp/sm_make_chunk.c2
-rw-r--r--net/sctp/sm_statefuns.c94
-rw-r--r--net/sctp/stream.c2
-rw-r--r--net/smc/af_smc.c71
-rw-r--r--net/smc/smc_core.c22
-rw-r--r--net/smc/smc_core.h3
-rw-r--r--net/strparser/strparser.c2
-rw-r--r--net/sunrpc/clnt.c8
-rw-r--r--net/sunrpc/rpc_pipe.c1
-rw-r--r--net/sunrpc/sched.c10
-rw-r--r--net/sunrpc/stats.c16
-rw-r--r--net/sunrpc/sunrpc.h6
-rw-r--r--net/sunrpc/xdr.c82
-rw-r--r--net/sunrpc/xprt.c34
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c7
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c13
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c53
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c32
-rw-r--r--net/sunrpc/xprtrdma/transport.c43
-rw-r--r--net/sunrpc/xprtrdma/verbs.c44
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h4
-rw-r--r--net/sunrpc/xprtsock.c4
-rw-r--r--net/tipc/node.c17
-rw-r--r--net/tipc/socket.c3
-rw-r--r--net/tls/tls_main.c19
-rw-r--r--net/xfrm/xfrm_state.c6
86 files changed, 992 insertions, 579 deletions
diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c
index 38aa6345bdfa..b718db2085b2 100644
--- a/net/9p/trans_common.c
+++ b/net/9p/trans_common.c
@@ -16,7 +16,7 @@
#include <linux/module.h>
/**
- * p9_release_req_pages - Release pages after the transaction.
+ * p9_release_pages - Release pages after the transaction.
*/
void p9_release_pages(struct page **pages, int nr_pages)
{
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 0cfba919d167..848969fe7979 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -1092,8 +1092,8 @@ static struct p9_trans_module p9_fd_trans = {
};
/**
- * p9_poll_proc - poll worker thread
- * @a: thread state and arguments
+ * p9_poll_workfn - poll worker thread
+ * @work: work queue
*
* polls all v9fs transports for new events and queues the appropriate
* work to the work queue
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 6d8e3031978f..3d414acb7015 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -68,8 +68,6 @@
* @pd: Protection Domain pointer
* @qp: Queue Pair pointer
* @cq: Completion Queue pointer
- * @dm_mr: DMA Memory Region pointer
- * @lkey: The local access only memory region key
* @timeout: Number of uSecs to wait for connection management events
* @privport: Whether a privileged port may be used
* @port: The port to use
@@ -632,7 +630,7 @@ static int p9_rdma_bind_privport(struct p9_trans_rdma *rdma)
}
/**
- * trans_create_rdma - Transport method for creating atransport instance
+ * rdma_create_trans - Transport method for creating a transport instance
* @client: client instance
* @addr: IP address string
* @args: Mount options string
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 3aa5a93ad107..4d0372263e5d 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -60,7 +60,6 @@ static atomic_t vp_pinned = ATOMIC_INIT(0);
/**
* struct virtio_chan - per-instance transport information
- * @initialized: whether the channel is initialized
* @inuse: whether the channel is in use
* @lock: protects multiple elements within this structure
* @client: client instance
@@ -385,8 +384,8 @@ static int p9_get_mapped_pages(struct virtio_chan *chan,
* @uidata: user bffer that should be ued for zero copy read
* @uodata: user buffer that shoud be user for zero copy write
* @inlen: read buffer size
- * @olen: write buffer size
- * @hdrlen: reader header size, This is the size of response protocol data
+ * @outlen: write buffer size
+ * @in_hdr_len: reader header size, This is the size of response protocol data
*
*/
static int
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index 086a4abdfa7c..0f19960390a6 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -485,7 +485,7 @@ static int xen_9pfs_front_probe(struct xenbus_device *dev,
static int xen_9pfs_front_resume(struct xenbus_device *dev)
{
- dev_warn(&dev->dev, "suspsend/resume unsupported\n");
+ dev_warn(&dev->dev, "suspend/resume unsupported\n");
return 0;
}
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 01d5d20a6eb1..3138a869b5c0 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -41,6 +41,9 @@ static unsigned char bridge_ula_lec[] = { 0x01, 0x80, 0xc2, 0x00, 0x00 };
#include <linux/module.h>
#include <linux/init.h>
+/* Hardening for Spectre-v1 */
+#include <linux/nospec.h>
+
#include "lec.h"
#include "lec_arpc.h"
#include "resources.h"
@@ -687,8 +690,10 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg)
bytes_left = copy_from_user(&ioc_data, arg, sizeof(struct atmlec_ioc));
if (bytes_left != 0)
pr_info("copy from user failed for %d bytes\n", bytes_left);
- if (ioc_data.dev_num < 0 || ioc_data.dev_num >= MAX_LEC_ITF ||
- !dev_lec[ioc_data.dev_num])
+ if (ioc_data.dev_num < 0 || ioc_data.dev_num >= MAX_LEC_ITF)
+ return -EINVAL;
+ ioc_data.dev_num = array_index_nospec(ioc_data.dev_num, MAX_LEC_ITF);
+ if (!dev_lec[ioc_data.dev_num])
return -EINVAL;
vpriv = kmalloc(sizeof(struct lec_vcc_priv), GFP_KERNEL);
if (!vpriv)
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 82c1a6f430b3..5bb6681fa91e 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -518,8 +518,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
return -ELOOP;
}
- /* Device is already being bridged */
- if (br_port_exists(dev))
+ /* Device has master upper dev */
+ if (netdev_master_upper_dev_get(dev))
return -EBUSY;
/* No bridging devices that dislike that (e.g. wireless) */
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 032e0fe45940..28a4c3490359 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1825,13 +1825,14 @@ static int compat_table_info(const struct ebt_table_info *info,
{
unsigned int size = info->entries_size;
const void *entries = info->entries;
- int ret;
newinfo->entries_size = size;
-
- ret = xt_compat_init_offsets(NFPROTO_BRIDGE, info->nentries);
- if (ret)
- return ret;
+ if (info->nentries) {
+ int ret = xt_compat_init_offsets(NFPROTO_BRIDGE,
+ info->nentries);
+ if (ret)
+ return ret;
+ }
return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info,
entries, newinfo);
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 53ecda10b790..13e2ae6be620 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -174,7 +174,7 @@ static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow,
flow == CAIF_CTRLCMD_DEINIT_RSP ? "CLOSE/DEINIT" :
flow == CAIF_CTRLCMD_INIT_FAIL_RSP ? "OPEN_FAIL" :
flow == CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND ?
- "REMOTE_SHUTDOWN" : "UKNOWN CTRL COMMAND");
+ "REMOTE_SHUTDOWN" : "UNKNOWN CTRL COMMAND");
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index fcb40c12b1f8..3b3d33ea9ed8 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -2569,6 +2569,11 @@ static int try_write(struct ceph_connection *con)
int ret = 1;
dout("try_write start %p state %lu\n", con, con->state);
+ if (con->state != CON_STATE_PREOPEN &&
+ con->state != CON_STATE_CONNECTING &&
+ con->state != CON_STATE_NEGOTIATING &&
+ con->state != CON_STATE_OPEN)
+ return 0;
more:
dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes);
@@ -2594,6 +2599,8 @@ more:
}
more_kvec:
+ BUG_ON(!con->sock);
+
/* kvec data queued? */
if (con->out_kvec_left) {
ret = write_partial_kvec(con);
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index b3dac24412d3..21ac6e3b96bb 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -209,6 +209,14 @@ static void reopen_session(struct ceph_mon_client *monc)
__open_session(monc);
}
+static void un_backoff(struct ceph_mon_client *monc)
+{
+ monc->hunt_mult /= 2; /* reduce by 50% */
+ if (monc->hunt_mult < 1)
+ monc->hunt_mult = 1;
+ dout("%s hunt_mult now %d\n", __func__, monc->hunt_mult);
+}
+
/*
* Reschedule delayed work timer.
*/
@@ -963,6 +971,7 @@ static void delayed_work(struct work_struct *work)
if (!monc->hunting) {
ceph_con_keepalive(&monc->con);
__validate_auth(monc);
+ un_backoff(monc);
}
if (is_auth &&
@@ -1123,9 +1132,8 @@ static void finish_hunting(struct ceph_mon_client *monc)
dout("%s found mon%d\n", __func__, monc->cur_mon);
monc->hunting = false;
monc->had_a_connection = true;
- monc->hunt_mult /= 2; /* reduce by 50% */
- if (monc->hunt_mult < 1)
- monc->hunt_mult = 1;
+ un_backoff(monc);
+ __schedule_delayed(monc);
}
}
diff --git a/net/compat.c b/net/compat.c
index 5ae7437d3853..7242cce5631b 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -377,7 +377,8 @@ static int compat_sock_setsockopt(struct socket *sock, int level, int optname,
optname == SO_ATTACH_REUSEPORT_CBPF)
return do_set_attach_filter(sock, level, optname,
optval, optlen);
- if (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO)
+ if (!COMPAT_USE_64BIT_TIME &&
+ (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO))
return do_set_sock_timeout(sock, level, optname, optval, optlen);
return sock_setsockopt(sock, level, optname, optval, optlen);
@@ -448,7 +449,8 @@ static int do_get_sock_timeout(struct socket *sock, int level, int optname,
static int compat_sock_getsockopt(struct socket *sock, int level, int optname,
char __user *optval, int __user *optlen)
{
- if (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO)
+ if (!COMPAT_USE_64BIT_TIME &&
+ (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO))
return do_get_sock_timeout(sock, level, optname, optval, optlen);
return sock_getsockopt(sock, level, optname, optval, optlen);
}
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index e3e6a3e2ca22..d884d8f5f0e5 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -839,7 +839,7 @@ void dev_mc_flush(struct net_device *dev)
EXPORT_SYMBOL(dev_mc_flush);
/**
- * dev_mc_flush - Init multicast address list
+ * dev_mc_init - Init multicast address list
* @dev: device
*
* Init multicast address list.
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 03416e6dd5d7..ba02f0dfe85c 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1032,6 +1032,11 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
info_size = sizeof(info);
if (copy_from_user(&info, useraddr, info_size))
return -EFAULT;
+ /* Since malicious users may modify the original data,
+ * we need to check whether FLOW_RSS is still requested.
+ */
+ if (!(info.flow_type & FLOW_RSS))
+ return -EINVAL;
}
if (info.cmd == ETHTOOL_GRXCLSRLALL) {
diff --git a/net/core/filter.c b/net/core/filter.c
index d31aff93270d..e77c30ca491d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3240,6 +3240,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
skb_dst_set(skb, (struct dst_entry *) md);
info = &md->u.tun_info;
+ memset(info, 0, sizeof(*info));
info->mode = IP_TUNNEL_INFO_TX;
info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 92d016e87816..385f153fe031 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -126,6 +126,16 @@ static void ccid2_change_l_seq_window(struct sock *sk, u64 val)
DCCPF_SEQ_WMAX));
}
+static void dccp_tasklet_schedule(struct sock *sk)
+{
+ struct tasklet_struct *t = &dccp_sk(sk)->dccps_xmitlet;
+
+ if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
+ sock_hold(sk);
+ __tasklet_schedule(t);
+ }
+}
+
static void ccid2_hc_tx_rto_expire(struct timer_list *t)
{
struct ccid2_hc_tx_sock *hc = from_timer(hc, t, tx_rtotimer);
@@ -166,7 +176,7 @@ static void ccid2_hc_tx_rto_expire(struct timer_list *t)
/* if we were blocked before, we may now send cwnd=1 packet */
if (sender_was_blocked)
- tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
+ dccp_tasklet_schedule(sk);
/* restart backed-off timer */
sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
out:
@@ -706,7 +716,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
done:
/* check if incoming Acks allow pending packets to be sent */
if (sender_was_blocked && !ccid2_cwnd_network_limited(hc))
- tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
+ dccp_tasklet_schedule(sk);
dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks);
}
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index b50a8732ff43..1501a20a94ca 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -232,6 +232,7 @@ static void dccp_write_xmitlet(unsigned long data)
else
dccp_write_xmit(sk);
bh_unlock_sock(sk);
+ sock_put(sk);
}
static void dccp_write_xmit_timer(struct timer_list *t)
@@ -240,7 +241,6 @@ static void dccp_write_xmit_timer(struct timer_list *t)
struct sock *sk = &dp->dccps_inet_connection.icsk_inet.sk;
dccp_write_xmitlet((unsigned long)sk);
- sock_put(sk);
}
void dccp_init_xmit_timers(struct sock *sk)
diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h
index b8d95cb71c25..44a7e16bf3b5 100644
--- a/net/ieee802154/6lowpan/6lowpan_i.h
+++ b/net/ieee802154/6lowpan/6lowpan_i.h
@@ -20,8 +20,8 @@ typedef unsigned __bitwise lowpan_rx_result;
struct frag_lowpan_compare_key {
u16 tag;
u16 d_size;
- const struct ieee802154_addr src;
- const struct ieee802154_addr dst;
+ struct ieee802154_addr src;
+ struct ieee802154_addr dst;
};
/* Equivalent of ipv4 struct ipq
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 1790b65944b3..2cc224106b69 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -75,14 +75,14 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb,
{
struct netns_ieee802154_lowpan *ieee802154_lowpan =
net_ieee802154_lowpan(net);
- struct frag_lowpan_compare_key key = {
- .tag = cb->d_tag,
- .d_size = cb->d_size,
- .src = *src,
- .dst = *dst,
- };
+ struct frag_lowpan_compare_key key = {};
struct inet_frag_queue *q;
+ key.tag = cb->d_tag;
+ key.d_size = cb->d_size;
+ key.src = *src;
+ key.dst = *dst;
+
q = inet_frag_find(&ieee802154_lowpan->frags, &key);
if (!q)
return NULL;
@@ -372,7 +372,7 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
struct lowpan_frag_queue *fq;
struct net *net = dev_net(skb->dev);
struct lowpan_802154_cb *cb = lowpan_802154_cb(skb);
- struct ieee802154_hdr hdr;
+ struct ieee802154_hdr hdr = {};
int err;
if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0)
diff --git a/net/ife/ife.c b/net/ife/ife.c
index 7d1ec76e7f43..13bbf8cb6a39 100644
--- a/net/ife/ife.c
+++ b/net/ife/ife.c
@@ -69,6 +69,9 @@ void *ife_decode(struct sk_buff *skb, u16 *metalen)
int total_pull;
u16 ifehdrln;
+ if (!pskb_may_pull(skb, skb->dev->hard_header_len + IFE_METAHDRLEN))
+ return NULL;
+
ifehdr = (struct ifeheadr *) (skb->data + skb->dev->hard_header_len);
ifehdrln = ntohs(ifehdr->metalen);
total_pull = skb->dev->hard_header_len + ifehdrln;
@@ -92,12 +95,43 @@ struct meta_tlvhdr {
__be16 len;
};
+static bool __ife_tlv_meta_valid(const unsigned char *skbdata,
+ const unsigned char *ifehdr_end)
+{
+ const struct meta_tlvhdr *tlv;
+ u16 tlvlen;
+
+ if (unlikely(skbdata + sizeof(*tlv) > ifehdr_end))
+ return false;
+
+ tlv = (const struct meta_tlvhdr *)skbdata;
+ tlvlen = ntohs(tlv->len);
+
+ /* tlv length field is inc header, check on minimum */
+ if (tlvlen < NLA_HDRLEN)
+ return false;
+
+ /* overflow by NLA_ALIGN check */
+ if (NLA_ALIGN(tlvlen) < tlvlen)
+ return false;
+
+ if (unlikely(skbdata + NLA_ALIGN(tlvlen) > ifehdr_end))
+ return false;
+
+ return true;
+}
+
/* Caller takes care of presenting data in network order
*/
-void *ife_tlv_meta_decode(void *skbdata, u16 *attrtype, u16 *dlen, u16 *totlen)
+void *ife_tlv_meta_decode(void *skbdata, const void *ifehdr_end, u16 *attrtype,
+ u16 *dlen, u16 *totlen)
{
- struct meta_tlvhdr *tlv = (struct meta_tlvhdr *) skbdata;
+ struct meta_tlvhdr *tlv;
+
+ if (!__ife_tlv_meta_valid(skbdata, ifehdr_end))
+ return NULL;
+ tlv = (struct meta_tlvhdr *)skbdata;
*dlen = ntohs(tlv->len) - NLA_HDRLEN;
*attrtype = ntohs(tlv->type);
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 7523ddb2566b..0e5edd0c7926 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -28,10 +28,9 @@ obj-$(CONFIG_NF_REJECT_IPV4) += nf_reject_ipv4.o
obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o
obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o
-nf_nat_snmp_basic-y := nf_nat_snmp_basic-asn1.o nf_nat_snmp_basic_main.o
-$(obj)/nf_nat_snmp_basic_main.o: $(obj)/nf_nat_snmp_basic-asn1.h
+nf_nat_snmp_basic-y := nf_nat_snmp_basic.asn1.o nf_nat_snmp_basic_main.o
+$(obj)/nf_nat_snmp_basic_main.o: $(obj)/nf_nat_snmp_basic.asn1.h
obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
-clean-files := nf_nat_snmp_basic-asn1.c nf_nat_snmp_basic-asn1.h
obj-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c
index b6e277093e7e..ac110c1d55b5 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c
@@ -54,7 +54,7 @@
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <linux/netfilter/nf_conntrack_snmp.h>
-#include "nf_nat_snmp_basic-asn1.h"
+#include "nf_nat_snmp_basic.asn1.h"
MODULE_LICENSE("GPL");
MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ccb25d80f679..29268efad247 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -709,7 +709,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
fnhe->fnhe_gw = gw;
fnhe->fnhe_pmtu = pmtu;
fnhe->fnhe_mtu_locked = lock;
- fnhe->fnhe_expires = expires;
+ fnhe->fnhe_expires = max(1UL, expires);
/* Exception created; mark the cached routes for the nexthop
* stale, so anyone caching it rechecks if this exception
@@ -1297,6 +1297,36 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
}
+static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
+{
+ struct fnhe_hash_bucket *hash;
+ struct fib_nh_exception *fnhe, __rcu **fnhe_p;
+ u32 hval = fnhe_hashfun(daddr);
+
+ spin_lock_bh(&fnhe_lock);
+
+ hash = rcu_dereference_protected(nh->nh_exceptions,
+ lockdep_is_held(&fnhe_lock));
+ hash += hval;
+
+ fnhe_p = &hash->chain;
+ fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
+ while (fnhe) {
+ if (fnhe->fnhe_daddr == daddr) {
+ rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
+ fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
+ fnhe_flush_routes(fnhe);
+ kfree_rcu(fnhe, rcu);
+ break;
+ }
+ fnhe_p = &fnhe->fnhe_next;
+ fnhe = rcu_dereference_protected(fnhe->fnhe_next,
+ lockdep_is_held(&fnhe_lock));
+ }
+
+ spin_unlock_bh(&fnhe_lock);
+}
+
static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
{
struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions);
@@ -1310,8 +1340,14 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
fnhe = rcu_dereference(fnhe->fnhe_next)) {
- if (fnhe->fnhe_daddr == daddr)
+ if (fnhe->fnhe_daddr == daddr) {
+ if (fnhe->fnhe_expires &&
+ time_after(jiffies, fnhe->fnhe_expires)) {
+ ip_del_fnhe(nh, daddr);
+ break;
+ }
return fnhe;
+ }
}
return NULL;
}
@@ -1339,6 +1375,7 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
fnhe->fnhe_gw = 0;
fnhe->fnhe_pmtu = 0;
fnhe->fnhe_expires = 0;
+ fnhe->fnhe_mtu_locked = false;
fnhe_flush_routes(fnhe);
orig = NULL;
}
@@ -1636,36 +1673,6 @@ static void ip_handle_martian_source(struct net_device *dev,
#endif
}
-static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
-{
- struct fnhe_hash_bucket *hash;
- struct fib_nh_exception *fnhe, __rcu **fnhe_p;
- u32 hval = fnhe_hashfun(daddr);
-
- spin_lock_bh(&fnhe_lock);
-
- hash = rcu_dereference_protected(nh->nh_exceptions,
- lockdep_is_held(&fnhe_lock));
- hash += hval;
-
- fnhe_p = &hash->chain;
- fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
- while (fnhe) {
- if (fnhe->fnhe_daddr == daddr) {
- rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
- fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
- fnhe_flush_routes(fnhe);
- kfree_rcu(fnhe, rcu);
- break;
- }
- fnhe_p = &fnhe->fnhe_next;
- fnhe = rcu_dereference_protected(fnhe->fnhe_next,
- lockdep_is_held(&fnhe_lock));
- }
-
- spin_unlock_bh(&fnhe_lock);
-}
-
/* called in rcu_read_lock() section */
static int __mkroute_input(struct sk_buff *skb,
const struct fib_result *res,
@@ -1719,20 +1726,10 @@ static int __mkroute_input(struct sk_buff *skb,
fnhe = find_exception(&FIB_RES_NH(*res), daddr);
if (do_cache) {
- if (fnhe) {
+ if (fnhe)
rth = rcu_dereference(fnhe->fnhe_rth_input);
- if (rth && rth->dst.expires &&
- time_after(jiffies, rth->dst.expires)) {
- ip_del_fnhe(&FIB_RES_NH(*res), daddr);
- fnhe = NULL;
- } else {
- goto rt_cache;
- }
- }
-
- rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
-
-rt_cache:
+ else
+ rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
if (rt_cache_valid(rth)) {
skb_dst_set_noref(skb, &rth->dst);
goto out;
@@ -2216,39 +2213,31 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
* the loopback interface and the IP_PKTINFO ipi_ifindex will
* be set to the loopback interface as well.
*/
- fi = NULL;
+ do_cache = false;
}
fnhe = NULL;
do_cache &= fi != NULL;
- if (do_cache) {
+ if (fi) {
struct rtable __rcu **prth;
struct fib_nh *nh = &FIB_RES_NH(*res);
fnhe = find_exception(nh, fl4->daddr);
+ if (!do_cache)
+ goto add;
if (fnhe) {
prth = &fnhe->fnhe_rth_output;
- rth = rcu_dereference(*prth);
- if (rth && rth->dst.expires &&
- time_after(jiffies, rth->dst.expires)) {
- ip_del_fnhe(nh, fl4->daddr);
- fnhe = NULL;
- } else {
- goto rt_cache;
+ } else {
+ if (unlikely(fl4->flowi4_flags &
+ FLOWI_FLAG_KNOWN_NH &&
+ !(nh->nh_gw &&
+ nh->nh_scope == RT_SCOPE_LINK))) {
+ do_cache = false;
+ goto add;
}
+ prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
}
-
- if (unlikely(fl4->flowi4_flags &
- FLOWI_FLAG_KNOWN_NH &&
- !(nh->nh_gw &&
- nh->nh_scope == RT_SCOPE_LINK))) {
- do_cache = false;
- goto add;
- }
- prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
rth = rcu_dereference(*prth);
-
-rt_cache:
if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst))
return rth;
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9ce1c726185e..c9d00ef54dec 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -697,7 +697,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
{
return skb->len < size_goal &&
sock_net(sk)->ipv4.sysctl_tcp_autocorking &&
- skb != tcp_write_queue_head(sk) &&
+ !tcp_rtx_queue_empty(sk) &&
refcount_read(&sk->sk_wmem_alloc) > skb->truesize;
}
@@ -1204,7 +1204,8 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
uarg->zerocopy = 0;
}
- if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect)) {
+ if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect) &&
+ !tp->repair) {
err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size);
if (err == -EINPROGRESS && copied_syn > 0)
goto out;
@@ -2673,7 +2674,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_REPAIR_QUEUE:
if (!tp->repair)
err = -EPERM;
- else if (val < TCP_QUEUES_NR)
+ else if ((unsigned int)val < TCP_QUEUES_NR)
tp->repair_queue = val;
else
err = -EINVAL;
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 158d105e76da..58e2f479ffb4 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -806,7 +806,9 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
}
}
}
- bbr->idle_restart = 0;
+ /* Restart after idle ends only once we process a new S/ACK for data */
+ if (rs->delivered > 0)
+ bbr->idle_restart = 0;
}
static void bbr_update_model(struct sock *sk, const struct rate_sample *rs)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 367def6ddeda..e51c644484dc 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3868,11 +3868,8 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
int length = (th->doff << 2) - sizeof(*th);
const u8 *ptr = (const u8 *)(th + 1);
- /* If the TCP option is too short, we can short cut */
- if (length < TCPOLEN_MD5SIG)
- return NULL;
-
- while (length > 0) {
+ /* If not enough data remaining, we can short cut */
+ while (length >= TCPOLEN_MD5SIG) {
int opcode = *ptr++;
int opsize;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 24b5c59b1c53..c2a292dfd137 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -401,9 +401,9 @@ static int compute_score(struct sock *sk, struct net *net,
bool dev_match = (sk->sk_bound_dev_if == dif ||
sk->sk_bound_dev_if == sdif);
- if (exact_dif && !dev_match)
+ if (!dev_match)
return -1;
- if (sk->sk_bound_dev_if && dev_match)
+ if (sk->sk_bound_dev_if)
score += 4;
}
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 6794ddf0547c..11e4e80cf7e9 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -34,16 +34,15 @@ config IPV6_ROUTE_INFO
bool "IPv6: Route Information (RFC 4191) support"
depends on IPV6_ROUTER_PREF
---help---
- This is experimental support of Route Information.
+ Support of Route Information.
If unsure, say N.
config IPV6_OPTIMISTIC_DAD
bool "IPv6: Enable RFC 4429 Optimistic DAD"
---help---
- This is experimental support for optimistic Duplicate
- Address Detection. It allows for autoconfigured addresses
- to be used more quickly.
+ Support for optimistic Duplicate Address Detection. It allows for
+ autoconfigured addresses to be used more quickly.
If unsure, say N.
@@ -280,7 +279,7 @@ config IPV6_MROUTE
depends on IPV6
select IP_MROUTE_COMMON
---help---
- Experimental support for IPv6 multicast forwarding.
+ Support for IPv6 multicast forwarding.
If unsure, say N.
config IPV6_MROUTE_MULTIPLE_TABLES
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index c214ffec02f0..ca957dd93a29 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -669,7 +669,7 @@ static void vti6_link_config(struct ip6_tnl *t, bool keep_mtu)
else
mtu = ETH_DATA_LEN - LL_MAX_HEADER - sizeof(struct ipv6hdr);
- dev->mtu = max_t(int, mtu, IPV6_MIN_MTU);
+ dev->mtu = max_t(int, mtu, IPV4_MIN_MTU);
}
/**
@@ -881,7 +881,7 @@ static void vti6_dev_setup(struct net_device *dev)
dev->priv_destructor = vti6_dev_free;
dev->type = ARPHRD_TUNNEL6;
- dev->min_mtu = IPV6_MIN_MTU;
+ dev->min_mtu = IPV4_MIN_MTU;
dev->max_mtu = IP_MAX_MTU - sizeof(struct ipv6hdr);
dev->flags |= IFF_NOARP;
dev->addr_len = sizeof(struct in6_addr);
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index ccbfa83e4bb0..ce77bcc2490c 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -48,6 +48,34 @@ config NFT_CHAIN_ROUTE_IPV6
fields such as the source, destination, flowlabel, hop-limit and
the packet mark.
+if NF_NAT_IPV6
+
+config NFT_CHAIN_NAT_IPV6
+ tristate "IPv6 nf_tables nat chain support"
+ help
+ This option enables the "nat" chain for IPv6 in nf_tables. This
+ chain type is used to perform Network Address Translation (NAT)
+ packet transformations such as the source, destination address and
+ source and destination ports.
+
+config NFT_MASQ_IPV6
+ tristate "IPv6 masquerade support for nf_tables"
+ depends on NFT_MASQ
+ select NF_NAT_MASQUERADE_IPV6
+ help
+ This is the expression that provides IPv4 masquerading support for
+ nf_tables.
+
+config NFT_REDIR_IPV6
+ tristate "IPv6 redirect support for nf_tables"
+ depends on NFT_REDIR
+ select NF_NAT_REDIRECT
+ help
+ This is the expression that provides IPv4 redirect support for
+ nf_tables.
+
+endif # NF_NAT_IPV6
+
config NFT_REJECT_IPV6
select NF_REJECT_IPV6
default NFT_REJECT
@@ -107,39 +135,12 @@ config NF_NAT_IPV6
if NF_NAT_IPV6
-config NFT_CHAIN_NAT_IPV6
- depends on NF_TABLES_IPV6
- tristate "IPv6 nf_tables nat chain support"
- help
- This option enables the "nat" chain for IPv6 in nf_tables. This
- chain type is used to perform Network Address Translation (NAT)
- packet transformations such as the source, destination address and
- source and destination ports.
-
config NF_NAT_MASQUERADE_IPV6
tristate "IPv6 masquerade support"
help
This is the kernel functionality to provide NAT in the masquerade
flavour (automatic source address selection) for IPv6.
-config NFT_MASQ_IPV6
- tristate "IPv6 masquerade support for nf_tables"
- depends on NF_TABLES_IPV6
- depends on NFT_MASQ
- select NF_NAT_MASQUERADE_IPV6
- help
- This is the expression that provides IPv4 masquerading support for
- nf_tables.
-
-config NFT_REDIR_IPV6
- tristate "IPv6 redirect support for nf_tables"
- depends on NF_TABLES_IPV6
- depends on NFT_REDIR
- select NF_NAT_REDIRECT
- help
- This is the expression that provides IPv4 redirect support for
- nf_tables.
-
endif # NF_NAT_IPV6
config IP6_NF_IPTABLES
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 49b954d6d0fa..f4d61736c41a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1835,11 +1835,16 @@ static void ip6_multipath_l3_keys(const struct sk_buff *skb,
const struct ipv6hdr *inner_iph;
const struct icmp6hdr *icmph;
struct ipv6hdr _inner_iph;
+ struct icmp6hdr _icmph;
if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
goto out;
- icmph = icmp6_hdr(skb);
+ icmph = skb_header_pointer(skb, skb_transport_offset(skb),
+ sizeof(_icmph), &_icmph);
+ if (!icmph)
+ goto out;
+
if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
@@ -3975,6 +3980,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
[RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
+ [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
[RTA_OIF] = { .type = NLA_U32 },
[RTA_IIF] = { .type = NLA_U32 },
[RTA_PRIORITY] = { .type = NLA_U32 },
@@ -3986,6 +3992,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
[RTA_EXPIRES] = { .type = NLA_U32 },
[RTA_UID] = { .type = NLA_U32 },
[RTA_MARK] = { .type = NLA_U32 },
+ [RTA_TABLE] = { .type = NLA_U32 },
};
static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index f343e6f0fc95..5fe139484919 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -136,7 +136,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
isrh->nexthdr = proto;
hdr->daddr = isrh->segments[isrh->first_segment];
- set_tun_src(net, ip6_dst_idev(dst)->dev, &hdr->daddr, &hdr->saddr);
+ set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr);
#ifdef CONFIG_IPV6_SEG6_HMAC
if (sr_has_hmac(isrh)) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 4ec76a87aeb8..ea0730028e5d 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -148,9 +148,9 @@ static int compute_score(struct sock *sk, struct net *net,
bool dev_match = (sk->sk_bound_dev_if == dif ||
sk->sk_bound_dev_if == sdif);
- if (exact_dif && !dev_match)
+ if (!dev_match)
return -1;
- if (sk->sk_bound_dev_if && dev_match)
+ if (sk->sk_bound_dev_if)
score++;
}
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index f85f0d7480ac..4a46df8441c9 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -341,6 +341,9 @@ static void __net_exit xfrm6_tunnel_net_exit(struct net *net)
struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
unsigned int i;
+ xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
+ xfrm_flush_gc();
+
for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i]));
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 7e2e7188e7f4..e62e52e8f141 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -437,6 +437,24 @@ static int verify_address_len(const void *p)
return 0;
}
+static inline int sadb_key_len(const struct sadb_key *key)
+{
+ int key_bytes = DIV_ROUND_UP(key->sadb_key_bits, 8);
+
+ return DIV_ROUND_UP(sizeof(struct sadb_key) + key_bytes,
+ sizeof(uint64_t));
+}
+
+static int verify_key_len(const void *p)
+{
+ const struct sadb_key *key = p;
+
+ if (sadb_key_len(key) > key->sadb_key_len)
+ return -EINVAL;
+
+ return 0;
+}
+
static inline int pfkey_sec_ctx_len(const struct sadb_x_sec_ctx *sec_ctx)
{
return DIV_ROUND_UP(sizeof(struct sadb_x_sec_ctx) +
@@ -533,16 +551,25 @@ static int parse_exthdrs(struct sk_buff *skb, const struct sadb_msg *hdr, void *
return -EINVAL;
if (ext_hdrs[ext_type-1] != NULL)
return -EINVAL;
- if (ext_type == SADB_EXT_ADDRESS_SRC ||
- ext_type == SADB_EXT_ADDRESS_DST ||
- ext_type == SADB_EXT_ADDRESS_PROXY ||
- ext_type == SADB_X_EXT_NAT_T_OA) {
+ switch (ext_type) {
+ case SADB_EXT_ADDRESS_SRC:
+ case SADB_EXT_ADDRESS_DST:
+ case SADB_EXT_ADDRESS_PROXY:
+ case SADB_X_EXT_NAT_T_OA:
if (verify_address_len(p))
return -EINVAL;
- }
- if (ext_type == SADB_X_EXT_SEC_CTX) {
+ break;
+ case SADB_X_EXT_SEC_CTX:
if (verify_sec_ctx_len(p))
return -EINVAL;
+ break;
+ case SADB_EXT_KEY_AUTH:
+ case SADB_EXT_KEY_ENCRYPT:
+ if (verify_key_len(p))
+ return -EINVAL;
+ break;
+ default:
+ break;
}
ext_hdrs[ext_type-1] = (void *) p;
}
@@ -1104,14 +1131,12 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
key = ext_hdrs[SADB_EXT_KEY_AUTH - 1];
if (key != NULL &&
sa->sadb_sa_auth != SADB_X_AALG_NULL &&
- ((key->sadb_key_bits+7) / 8 == 0 ||
- (key->sadb_key_bits+7) / 8 > key->sadb_key_len * sizeof(uint64_t)))
+ key->sadb_key_bits == 0)
return ERR_PTR(-EINVAL);
key = ext_hdrs[SADB_EXT_KEY_ENCRYPT-1];
if (key != NULL &&
sa->sadb_sa_encrypt != SADB_EALG_NULL &&
- ((key->sadb_key_bits+7) / 8 == 0 ||
- (key->sadb_key_bits+7) / 8 > key->sadb_key_len * sizeof(uint64_t)))
+ key->sadb_key_bits == 0)
return ERR_PTR(-EINVAL);
x = xfrm_state_alloc(net);
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index b8f9d45bfeb1..7f1e842ef05a 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -106,8 +106,11 @@ static void l2tp_dfs_seq_stop(struct seq_file *p, void *v)
return;
/* Drop reference taken by last invocation of l2tp_dfs_next_tunnel() */
- if (pd->tunnel)
+ if (pd->tunnel) {
l2tp_tunnel_dec_refcount(pd->tunnel);
+ pd->tunnel = NULL;
+ pd->session = NULL;
+ }
}
static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v)
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 7d0c963680e6..1fd9e145076a 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -619,6 +619,13 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
lock_sock(sk);
error = -EINVAL;
+
+ if (sockaddr_len != sizeof(struct sockaddr_pppol2tp) &&
+ sockaddr_len != sizeof(struct sockaddr_pppol2tpv3) &&
+ sockaddr_len != sizeof(struct sockaddr_pppol2tpin6) &&
+ sockaddr_len != sizeof(struct sockaddr_pppol2tpv3in6))
+ goto end;
+
if (sp->sa_protocol != PX_PROTO_OL2TP)
goto end;
@@ -1618,8 +1625,11 @@ static void pppol2tp_seq_stop(struct seq_file *p, void *v)
return;
/* Drop reference taken by last invocation of pppol2tp_next_tunnel() */
- if (pd->tunnel)
+ if (pd->tunnel) {
l2tp_tunnel_dec_refcount(pd->tunnel);
+ pd->tunnel = NULL;
+ pd->session = NULL;
+ }
}
static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v)
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 01dcc0823d1f..1beeea9549fa 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -199,9 +199,19 @@ static int llc_ui_release(struct socket *sock)
llc->laddr.lsap, llc->daddr.lsap);
if (!llc_send_disc(sk))
llc_ui_wait_for_disc(sk, sk->sk_rcvtimeo);
- if (!sock_flag(sk, SOCK_ZAPPED))
+ if (!sock_flag(sk, SOCK_ZAPPED)) {
+ struct llc_sap *sap = llc->sap;
+
+ /* Hold this for release_sock(), so that llc_backlog_rcv()
+ * could still use it.
+ */
+ llc_sap_hold(sap);
llc_sap_remove_socket(llc->sap, sk);
- release_sock(sk);
+ release_sock(sk);
+ llc_sap_put(sap);
+ } else {
+ release_sock(sk);
+ }
if (llc->dev)
dev_put(llc->dev);
sock_put(sk);
@@ -920,6 +930,9 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
if (size > llc->dev->mtu)
size = llc->dev->mtu;
copied = size - hdrlen;
+ rc = -EINVAL;
+ if (copied < 0)
+ goto release;
release_sock(sk);
skb = sock_alloc_send_skb(sk, size, noblock, &rc);
lock_sock(sk);
diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c
index 163121192aca..4d78375f9872 100644
--- a/net/llc/llc_c_ac.c
+++ b/net/llc/llc_c_ac.c
@@ -1099,14 +1099,7 @@ int llc_conn_ac_inc_tx_win_size(struct sock *sk, struct sk_buff *skb)
int llc_conn_ac_stop_all_timers(struct sock *sk, struct sk_buff *skb)
{
- struct llc_sock *llc = llc_sk(sk);
-
- del_timer(&llc->pf_cycle_timer.timer);
- del_timer(&llc->ack_timer.timer);
- del_timer(&llc->rej_sent_timer.timer);
- del_timer(&llc->busy_state_timer.timer);
- llc->ack_must_be_send = 0;
- llc->ack_pf = 0;
+ llc_sk_stop_all_timers(sk, false);
return 0;
}
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 110e32bcb399..c0ac522b48a1 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -961,6 +961,26 @@ out:
return sk;
}
+void llc_sk_stop_all_timers(struct sock *sk, bool sync)
+{
+ struct llc_sock *llc = llc_sk(sk);
+
+ if (sync) {
+ del_timer_sync(&llc->pf_cycle_timer.timer);
+ del_timer_sync(&llc->ack_timer.timer);
+ del_timer_sync(&llc->rej_sent_timer.timer);
+ del_timer_sync(&llc->busy_state_timer.timer);
+ } else {
+ del_timer(&llc->pf_cycle_timer.timer);
+ del_timer(&llc->ack_timer.timer);
+ del_timer(&llc->rej_sent_timer.timer);
+ del_timer(&llc->busy_state_timer.timer);
+ }
+
+ llc->ack_must_be_send = 0;
+ llc->ack_pf = 0;
+}
+
/**
* llc_sk_free - Frees a LLC socket
* @sk - socket to free
@@ -973,7 +993,7 @@ void llc_sk_free(struct sock *sk)
llc->state = LLC_CONN_OUT_OF_SVC;
/* Stop all (possibly) running timers */
- llc_conn_ac_stop_all_timers(sk, NULL);
+ llc_sk_stop_all_timers(sk, true);
#ifdef DEBUG_LLC_CONN_ALLOC
printk(KERN_INFO "%s: unackq=%d, txq=%d\n", __func__,
skb_queue_len(&llc->pdu_unack_q),
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 704b3832dbad..44d8a55e9721 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -594,6 +594,7 @@ config NFT_QUOTA
config NFT_REJECT
default m if NETFILTER_ADVANCED=n
tristate "Netfilter nf_tables reject support"
+ depends on !NF_TABLES_INET || (IPV6!=m || m)
help
This option adds the "reject" expression that you can use to
explicitly deny and notify via TCP reset/ICMP informational errors
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 5ebde4b15810..f36098887ad0 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2384,11 +2384,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
strlcpy(cfg.mcast_ifn, dm->mcast_ifn,
sizeof(cfg.mcast_ifn));
cfg.syncid = dm->syncid;
- rtnl_lock();
- mutex_lock(&ipvs->sync_mutex);
ret = start_sync_thread(ipvs, &cfg, dm->state);
- mutex_unlock(&ipvs->sync_mutex);
- rtnl_unlock();
} else {
mutex_lock(&ipvs->sync_mutex);
ret = stop_sync_thread(ipvs, dm->state);
@@ -3481,12 +3477,8 @@ static int ip_vs_genl_new_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
if (ipvs->mixed_address_family_dests > 0)
return -EINVAL;
- rtnl_lock();
- mutex_lock(&ipvs->sync_mutex);
ret = start_sync_thread(ipvs, &c,
nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
- mutex_unlock(&ipvs->sync_mutex);
- rtnl_unlock();
return ret;
}
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index fbaf3bd05b2e..001501e25625 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -49,6 +49,7 @@
#include <linux/kthread.h>
#include <linux/wait.h>
#include <linux/kernel.h>
+#include <linux/sched/signal.h>
#include <asm/unaligned.h> /* Used for ntoh_seq and hton_seq */
@@ -1360,15 +1361,9 @@ static void set_mcast_pmtudisc(struct sock *sk, int val)
/*
* Specifiy default interface for outgoing multicasts
*/
-static int set_mcast_if(struct sock *sk, char *ifname)
+static int set_mcast_if(struct sock *sk, struct net_device *dev)
{
- struct net_device *dev;
struct inet_sock *inet = inet_sk(sk);
- struct net *net = sock_net(sk);
-
- dev = __dev_get_by_name(net, ifname);
- if (!dev)
- return -ENODEV;
if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
return -EINVAL;
@@ -1396,19 +1391,14 @@ static int set_mcast_if(struct sock *sk, char *ifname)
* in the in_addr structure passed in as a parameter.
*/
static int
-join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
+join_mcast_group(struct sock *sk, struct in_addr *addr, struct net_device *dev)
{
- struct net *net = sock_net(sk);
struct ip_mreqn mreq;
- struct net_device *dev;
int ret;
memset(&mreq, 0, sizeof(mreq));
memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
- dev = __dev_get_by_name(net, ifname);
- if (!dev)
- return -ENODEV;
if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
return -EINVAL;
@@ -1423,15 +1413,10 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
#ifdef CONFIG_IP_VS_IPV6
static int join_mcast_group6(struct sock *sk, struct in6_addr *addr,
- char *ifname)
+ struct net_device *dev)
{
- struct net *net = sock_net(sk);
- struct net_device *dev;
int ret;
- dev = __dev_get_by_name(net, ifname);
- if (!dev)
- return -ENODEV;
if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
return -EINVAL;
@@ -1443,24 +1428,18 @@ static int join_mcast_group6(struct sock *sk, struct in6_addr *addr,
}
#endif
-static int bind_mcastif_addr(struct socket *sock, char *ifname)
+static int bind_mcastif_addr(struct socket *sock, struct net_device *dev)
{
- struct net *net = sock_net(sock->sk);
- struct net_device *dev;
__be32 addr;
struct sockaddr_in sin;
- dev = __dev_get_by_name(net, ifname);
- if (!dev)
- return -ENODEV;
-
addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
if (!addr)
pr_err("You probably need to specify IP address on "
"multicast interface.\n");
IP_VS_DBG(7, "binding socket with (%s) %pI4\n",
- ifname, &addr);
+ dev->name, &addr);
/* Now bind the socket with the address of multicast interface */
sin.sin_family = AF_INET;
@@ -1493,7 +1472,8 @@ static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen,
/*
* Set up sending multicast socket over UDP
*/
-static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id)
+static int make_send_sock(struct netns_ipvs *ipvs, int id,
+ struct net_device *dev, struct socket **sock_ret)
{
/* multicast addr */
union ipvs_sockaddr mcast_addr;
@@ -1505,9 +1485,10 @@ static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id)
IPPROTO_UDP, &sock);
if (result < 0) {
pr_err("Error during creation of socket; terminating\n");
- return ERR_PTR(result);
+ goto error;
}
- result = set_mcast_if(sock->sk, ipvs->mcfg.mcast_ifn);
+ *sock_ret = sock;
+ result = set_mcast_if(sock->sk, dev);
if (result < 0) {
pr_err("Error setting outbound mcast interface\n");
goto error;
@@ -1522,7 +1503,7 @@ static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id)
set_sock_size(sock->sk, 1, result);
if (AF_INET == ipvs->mcfg.mcast_af)
- result = bind_mcastif_addr(sock, ipvs->mcfg.mcast_ifn);
+ result = bind_mcastif_addr(sock, dev);
else
result = 0;
if (result < 0) {
@@ -1538,19 +1519,18 @@ static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id)
goto error;
}
- return sock;
+ return 0;
error:
- sock_release(sock);
- return ERR_PTR(result);
+ return result;
}
/*
* Set up receiving multicast socket over UDP
*/
-static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id,
- int ifindex)
+static int make_receive_sock(struct netns_ipvs *ipvs, int id,
+ struct net_device *dev, struct socket **sock_ret)
{
/* multicast addr */
union ipvs_sockaddr mcast_addr;
@@ -1562,8 +1542,9 @@ static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id,
IPPROTO_UDP, &sock);
if (result < 0) {
pr_err("Error during creation of socket; terminating\n");
- return ERR_PTR(result);
+ goto error;
}
+ *sock_ret = sock;
/* it is equivalent to the REUSEADDR option in user-space */
sock->sk->sk_reuse = SK_CAN_REUSE;
result = sysctl_sync_sock_size(ipvs);
@@ -1571,7 +1552,7 @@ static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id,
set_sock_size(sock->sk, 0, result);
get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id);
- sock->sk->sk_bound_dev_if = ifindex;
+ sock->sk->sk_bound_dev_if = dev->ifindex;
result = sock->ops->bind(sock, (struct sockaddr *)&mcast_addr, salen);
if (result < 0) {
pr_err("Error binding to the multicast addr\n");
@@ -1582,21 +1563,20 @@ static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id,
#ifdef CONFIG_IP_VS_IPV6
if (ipvs->bcfg.mcast_af == AF_INET6)
result = join_mcast_group6(sock->sk, &mcast_addr.in6.sin6_addr,
- ipvs->bcfg.mcast_ifn);
+ dev);
else
#endif
result = join_mcast_group(sock->sk, &mcast_addr.in.sin_addr,
- ipvs->bcfg.mcast_ifn);
+ dev);
if (result < 0) {
pr_err("Error joining to the multicast group\n");
goto error;
}
- return sock;
+ return 0;
error:
- sock_release(sock);
- return ERR_PTR(result);
+ return result;
}
@@ -1778,13 +1758,12 @@ static int sync_thread_backup(void *data)
int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
int state)
{
- struct ip_vs_sync_thread_data *tinfo;
+ struct ip_vs_sync_thread_data *tinfo = NULL;
struct task_struct **array = NULL, *task;
- struct socket *sock;
struct net_device *dev;
char *name;
int (*threadfn)(void *data);
- int id, count, hlen;
+ int id = 0, count, hlen;
int result = -ENOMEM;
u16 mtu, min_mtu;
@@ -1792,6 +1771,18 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %zd bytes\n",
sizeof(struct ip_vs_sync_conn_v0));
+ /* Do not hold one mutex and then to block on another */
+ for (;;) {
+ rtnl_lock();
+ if (mutex_trylock(&ipvs->sync_mutex))
+ break;
+ rtnl_unlock();
+ mutex_lock(&ipvs->sync_mutex);
+ if (rtnl_trylock())
+ break;
+ mutex_unlock(&ipvs->sync_mutex);
+ }
+
if (!ipvs->sync_state) {
count = clamp(sysctl_sync_ports(ipvs), 1, IPVS_SYNC_PORTS_MAX);
ipvs->threads_mask = count - 1;
@@ -1810,7 +1801,8 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
dev = __dev_get_by_name(ipvs->net, c->mcast_ifn);
if (!dev) {
pr_err("Unknown mcast interface: %s\n", c->mcast_ifn);
- return -ENODEV;
+ result = -ENODEV;
+ goto out_early;
}
hlen = (AF_INET6 == c->mcast_af) ?
sizeof(struct ipv6hdr) + sizeof(struct udphdr) :
@@ -1827,26 +1819,30 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
c->sync_maxlen = mtu - hlen;
if (state == IP_VS_STATE_MASTER) {
+ result = -EEXIST;
if (ipvs->ms)
- return -EEXIST;
+ goto out_early;
ipvs->mcfg = *c;
name = "ipvs-m:%d:%d";
threadfn = sync_thread_master;
} else if (state == IP_VS_STATE_BACKUP) {
+ result = -EEXIST;
if (ipvs->backup_threads)
- return -EEXIST;
+ goto out_early;
ipvs->bcfg = *c;
name = "ipvs-b:%d:%d";
threadfn = sync_thread_backup;
} else {
- return -EINVAL;
+ result = -EINVAL;
+ goto out_early;
}
if (state == IP_VS_STATE_MASTER) {
struct ipvs_master_sync_state *ms;
+ result = -ENOMEM;
ipvs->ms = kcalloc(count, sizeof(ipvs->ms[0]), GFP_KERNEL);
if (!ipvs->ms)
goto out;
@@ -1862,39 +1858,38 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
} else {
array = kcalloc(count, sizeof(struct task_struct *),
GFP_KERNEL);
+ result = -ENOMEM;
if (!array)
goto out;
}
- tinfo = NULL;
for (id = 0; id < count; id++) {
- if (state == IP_VS_STATE_MASTER)
- sock = make_send_sock(ipvs, id);
- else
- sock = make_receive_sock(ipvs, id, dev->ifindex);
- if (IS_ERR(sock)) {
- result = PTR_ERR(sock);
- goto outtinfo;
- }
+ result = -ENOMEM;
tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
if (!tinfo)
- goto outsocket;
+ goto out;
tinfo->ipvs = ipvs;
- tinfo->sock = sock;
+ tinfo->sock = NULL;
if (state == IP_VS_STATE_BACKUP) {
tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen,
GFP_KERNEL);
if (!tinfo->buf)
- goto outtinfo;
+ goto out;
} else {
tinfo->buf = NULL;
}
tinfo->id = id;
+ if (state == IP_VS_STATE_MASTER)
+ result = make_send_sock(ipvs, id, dev, &tinfo->sock);
+ else
+ result = make_receive_sock(ipvs, id, dev, &tinfo->sock);
+ if (result < 0)
+ goto out;
task = kthread_run(threadfn, tinfo, name, ipvs->gen, id);
if (IS_ERR(task)) {
result = PTR_ERR(task);
- goto outtinfo;
+ goto out;
}
tinfo = NULL;
if (state == IP_VS_STATE_MASTER)
@@ -1911,20 +1906,20 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
ipvs->sync_state |= state;
spin_unlock_bh(&ipvs->sync_buff_lock);
+ mutex_unlock(&ipvs->sync_mutex);
+ rtnl_unlock();
+
/* increase the module use count */
ip_vs_use_count_inc();
return 0;
-outsocket:
- sock_release(sock);
-
-outtinfo:
- if (tinfo) {
- sock_release(tinfo->sock);
- kfree(tinfo->buf);
- kfree(tinfo);
- }
+out:
+ /* We do not need RTNL lock anymore, release it here so that
+ * sock_release below and in the kthreads can use rtnl_lock
+ * to leave the mcast group.
+ */
+ rtnl_unlock();
count = id;
while (count-- > 0) {
if (state == IP_VS_STATE_MASTER)
@@ -1932,13 +1927,23 @@ outtinfo:
else
kthread_stop(array[count]);
}
- kfree(array);
-
-out:
if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
kfree(ipvs->ms);
ipvs->ms = NULL;
}
+ mutex_unlock(&ipvs->sync_mutex);
+ if (tinfo) {
+ if (tinfo->sock)
+ sock_release(tinfo->sock);
+ kfree(tinfo->buf);
+ kfree(tinfo);
+ }
+ kfree(array);
+ return result;
+
+out_early:
+ mutex_unlock(&ipvs->sync_mutex);
+ rtnl_unlock();
return result;
}
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 8ef21d9f9a00..4b2b3d53acfc 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -252,7 +252,7 @@ static inline int expect_clash(const struct nf_conntrack_expect *a,
static inline int expect_matches(const struct nf_conntrack_expect *a,
const struct nf_conntrack_expect *b)
{
- return a->master == b->master && a->class == b->class &&
+ return a->master == b->master &&
nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) &&
@@ -421,6 +421,9 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
h = nf_ct_expect_dst_hash(net, &expect->tuple);
hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) {
if (expect_matches(i, expect)) {
+ if (i->class != expect->class)
+ return -EALREADY;
+
if (nf_ct_remove_expect(i))
break;
} else if (expect_clash(i, expect)) {
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 9fe0ddc333fb..277bbfe26478 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -9,6 +9,7 @@
* 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
+#include <linux/kmemleak.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/rcupdate.h>
@@ -71,6 +72,7 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
rcu_read_unlock();
alloc = max(newlen, NF_CT_EXT_PREALLOC);
+ kmemleak_not_leak(old);
new = __krealloc(old, alloc, gfp);
if (!new)
return NULL;
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 4dbb5bad4363..908e51e2dc2b 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -938,11 +938,19 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
datalen, rtp_exp, rtcp_exp,
mediaoff, medialen, daddr);
else {
- if (nf_ct_expect_related(rtp_exp) == 0) {
- if (nf_ct_expect_related(rtcp_exp) != 0)
- nf_ct_unexpect_related(rtp_exp);
- else
+ /* -EALREADY handling works around end-points that send
+ * SDP messages with identical port but different media type,
+ * we pretend expectation was set up.
+ */
+ int errp = nf_ct_expect_related(rtp_exp);
+
+ if (errp == 0 || errp == -EALREADY) {
+ int errcp = nf_ct_expect_related(rtcp_exp);
+
+ if (errcp == 0 || errcp == -EALREADY)
ret = NF_ACCEPT;
+ else if (errp == 0)
+ nf_ct_unexpect_related(rtp_exp);
}
}
nf_ct_expect_put(rtcp_exp);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 9134cc429ad4..04d4e3772584 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2361,41 +2361,46 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
}
if (nlh->nlmsg_flags & NLM_F_REPLACE) {
- if (nft_is_active_next(net, old_rule)) {
- trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE,
- old_rule);
- if (trans == NULL) {
- err = -ENOMEM;
- goto err2;
- }
- nft_deactivate_next(net, old_rule);
- chain->use--;
- list_add_tail_rcu(&rule->list, &old_rule->list);
- } else {
+ if (!nft_is_active_next(net, old_rule)) {
err = -ENOENT;
goto err2;
}
- } else if (nlh->nlmsg_flags & NLM_F_APPEND)
- if (old_rule)
- list_add_rcu(&rule->list, &old_rule->list);
- else
- list_add_tail_rcu(&rule->list, &chain->rules);
- else {
- if (old_rule)
- list_add_tail_rcu(&rule->list, &old_rule->list);
- else
- list_add_rcu(&rule->list, &chain->rules);
- }
+ trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE,
+ old_rule);
+ if (trans == NULL) {
+ err = -ENOMEM;
+ goto err2;
+ }
+ nft_deactivate_next(net, old_rule);
+ chain->use--;
- if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) {
- err = -ENOMEM;
- goto err3;
+ if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) {
+ err = -ENOMEM;
+ goto err2;
+ }
+
+ list_add_tail_rcu(&rule->list, &old_rule->list);
+ } else {
+ if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) {
+ err = -ENOMEM;
+ goto err2;
+ }
+
+ if (nlh->nlmsg_flags & NLM_F_APPEND) {
+ if (old_rule)
+ list_add_rcu(&rule->list, &old_rule->list);
+ else
+ list_add_tail_rcu(&rule->list, &chain->rules);
+ } else {
+ if (old_rule)
+ list_add_tail_rcu(&rule->list, &old_rule->list);
+ else
+ list_add_rcu(&rule->list, &chain->rules);
+ }
}
chain->use++;
return 0;
-err3:
- list_del_rcu(&rule->list);
err2:
nf_tables_rule_destroy(&ctx, rule);
err1:
@@ -3207,18 +3212,20 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
err = ops->init(set, &desc, nla);
if (err < 0)
- goto err2;
+ goto err3;
err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
if (err < 0)
- goto err3;
+ goto err4;
list_add_tail_rcu(&set->list, &table->sets);
table->use++;
return 0;
-err3:
+err4:
ops->destroy(set);
+err3:
+ kfree(set->name);
err2:
kvfree(set);
err1:
@@ -5738,7 +5745,7 @@ static void nft_chain_commit_update(struct nft_trans *trans)
struct nft_base_chain *basechain;
if (nft_trans_chain_name(trans))
- strcpy(trans->ctx.chain->name, nft_trans_chain_name(trans));
+ swap(trans->ctx.chain->name, nft_trans_chain_name(trans));
if (!nft_is_base_chain(trans->ctx.chain))
return;
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 773da82190dc..94df000abb92 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -36,11 +36,10 @@ MODULE_ALIAS("ipt_connmark");
MODULE_ALIAS("ip6t_connmark");
static unsigned int
-connmark_tg_shift(struct sk_buff *skb,
- const struct xt_connmark_tginfo1 *info,
- u8 shift_bits, u8 shift_dir)
+connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info)
{
enum ip_conntrack_info ctinfo;
+ u_int32_t new_targetmark;
struct nf_conn *ct;
u_int32_t newmark;
@@ -51,34 +50,39 @@ connmark_tg_shift(struct sk_buff *skb,
switch (info->mode) {
case XT_CONNMARK_SET:
newmark = (ct->mark & ~info->ctmask) ^ info->ctmark;
- if (shift_dir == D_SHIFT_RIGHT)
- newmark >>= shift_bits;
+ if (info->shift_dir == D_SHIFT_RIGHT)
+ newmark >>= info->shift_bits;
else
- newmark <<= shift_bits;
+ newmark <<= info->shift_bits;
+
if (ct->mark != newmark) {
ct->mark = newmark;
nf_conntrack_event_cache(IPCT_MARK, ct);
}
break;
case XT_CONNMARK_SAVE:
- newmark = (ct->mark & ~info->ctmask) ^
- (skb->mark & info->nfmask);
- if (shift_dir == D_SHIFT_RIGHT)
- newmark >>= shift_bits;
+ new_targetmark = (skb->mark & info->nfmask);
+ if (info->shift_dir == D_SHIFT_RIGHT)
+ new_targetmark >>= info->shift_bits;
else
- newmark <<= shift_bits;
+ new_targetmark <<= info->shift_bits;
+
+ newmark = (ct->mark & ~info->ctmask) ^
+ new_targetmark;
if (ct->mark != newmark) {
ct->mark = newmark;
nf_conntrack_event_cache(IPCT_MARK, ct);
}
break;
case XT_CONNMARK_RESTORE:
- newmark = (skb->mark & ~info->nfmask) ^
- (ct->mark & info->ctmask);
- if (shift_dir == D_SHIFT_RIGHT)
- newmark >>= shift_bits;
+ new_targetmark = (ct->mark & info->ctmask);
+ if (info->shift_dir == D_SHIFT_RIGHT)
+ new_targetmark >>= info->shift_bits;
else
- newmark <<= shift_bits;
+ new_targetmark <<= info->shift_bits;
+
+ newmark = (skb->mark & ~info->nfmask) ^
+ new_targetmark;
skb->mark = newmark;
break;
}
@@ -89,8 +93,14 @@ static unsigned int
connmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_connmark_tginfo1 *info = par->targinfo;
-
- return connmark_tg_shift(skb, info, 0, 0);
+ const struct xt_connmark_tginfo2 info2 = {
+ .ctmark = info->ctmark,
+ .ctmask = info->ctmask,
+ .nfmask = info->nfmask,
+ .mode = info->mode,
+ };
+
+ return connmark_tg_shift(skb, &info2);
}
static unsigned int
@@ -98,8 +108,7 @@ connmark_tg_v2(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_connmark_tginfo2 *info = par->targinfo;
- return connmark_tg_shift(skb, (const struct xt_connmark_tginfo1 *)info,
- info->shift_bits, info->shift_dir);
+ return connmark_tg_shift(skb, info);
}
static int connmark_tg_check(const struct xt_tgchk_param *par)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 55342c4d5cec..2e2dd88fc79f 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2606,13 +2606,13 @@ static int netlink_seq_show(struct seq_file *seq, void *v)
{
if (v == SEQ_START_TOKEN) {
seq_puts(seq,
- "sk Eth Pid Groups "
- "Rmem Wmem Dump Locks Drops Inode\n");
+ "sk Eth Pid Groups "
+ "Rmem Wmem Dump Locks Drops Inode\n");
} else {
struct sock *s = v;
struct netlink_sock *nlk = nlk_sk(s);
- seq_printf(seq, "%pK %-3d %-6u %08x %-8d %-8d %d %-8d %-8d %-8lu\n",
+ seq_printf(seq, "%pK %-3d %-10u %08x %-8d %-8d %-5d %-8d %-8d %-8lu\n",
s,
s->sk_protocol,
nlk->portid,
diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c
index d7da99a0b0b8..9696ef96b719 100644
--- a/net/nsh/nsh.c
+++ b/net/nsh/nsh.c
@@ -57,6 +57,8 @@ int nsh_pop(struct sk_buff *skb)
return -ENOMEM;
nh = (struct nshhdr *)(skb->data);
length = nsh_hdr_len(nh);
+ if (length < NSH_BASE_HDR_LEN)
+ return -EINVAL;
inner_proto = tun_p_to_eth_p(nh->np);
if (!pskb_may_pull(skb, length))
return -ENOMEM;
@@ -90,6 +92,8 @@ static struct sk_buff *nsh_gso_segment(struct sk_buff *skb,
if (unlikely(!pskb_may_pull(skb, NSH_BASE_HDR_LEN)))
goto out;
nsh_len = nsh_hdr_len(nsh_hdr(skb));
+ if (nsh_len < NSH_BASE_HDR_LEN)
+ goto out;
if (unlikely(!pskb_may_pull(skb, nsh_len)))
goto out;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 7322aa1e382e..492ab0c36f7c 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1712,13 +1712,10 @@ static void nlattr_set(struct nlattr *attr, u8 val,
/* The nlattr stream should already have been validated */
nla_for_each_nested(nla, attr, rem) {
- if (tbl[nla_type(nla)].len == OVS_ATTR_NESTED) {
- if (tbl[nla_type(nla)].next)
- tbl = tbl[nla_type(nla)].next;
- nlattr_set(nla, val, tbl);
- } else {
+ if (tbl[nla_type(nla)].len == OVS_ATTR_NESTED)
+ nlattr_set(nla, val, tbl[nla_type(nla)].next ? : tbl);
+ else
memset(nla_data(nla), val, nla_len(nla));
- }
if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE)
*(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index c31b0687396a..01f3515cada0 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -329,11 +329,11 @@ static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb)
skb_set_queue_mapping(skb, queue_index);
}
-/* register_prot_hook must be invoked with the po->bind_lock held,
+/* __register_prot_hook must be invoked through register_prot_hook
* or from a context in which asynchronous accesses to the packet
* socket is not possible (packet_create()).
*/
-static void register_prot_hook(struct sock *sk)
+static void __register_prot_hook(struct sock *sk)
{
struct packet_sock *po = pkt_sk(sk);
@@ -348,8 +348,13 @@ static void register_prot_hook(struct sock *sk)
}
}
-/* {,__}unregister_prot_hook() must be invoked with the po->bind_lock
- * held. If the sync parameter is true, we will temporarily drop
+static void register_prot_hook(struct sock *sk)
+{
+ lockdep_assert_held_once(&pkt_sk(sk)->bind_lock);
+ __register_prot_hook(sk);
+}
+
+/* If the sync parameter is true, we will temporarily drop
* the po->bind_lock and do a synchronize_net to make sure no
* asynchronous packet processing paths still refer to the elements
* of po->prot_hook. If the sync parameter is false, it is the
@@ -359,6 +364,8 @@ static void __unregister_prot_hook(struct sock *sk, bool sync)
{
struct packet_sock *po = pkt_sk(sk);
+ lockdep_assert_held_once(&po->bind_lock);
+
po->running = 0;
if (po->fanout)
@@ -3252,7 +3259,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
if (proto) {
po->prot_hook.type = proto;
- register_prot_hook(sk);
+ __register_prot_hook(sk);
}
mutex_lock(&net->packet.sklist_lock);
@@ -3732,12 +3739,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen != sizeof(val))
return -EINVAL;
- if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
- return -EBUSY;
if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;
- po->tp_loss = !!val;
- return 0;
+
+ lock_sock(sk);
+ if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
+ ret = -EBUSY;
+ } else {
+ po->tp_loss = !!val;
+ ret = 0;
+ }
+ release_sock(sk);
+ return ret;
}
case PACKET_AUXDATA:
{
@@ -3748,7 +3761,9 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;
+ lock_sock(sk);
po->auxdata = !!val;
+ release_sock(sk);
return 0;
}
case PACKET_ORIGDEV:
@@ -3760,7 +3775,9 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;
+ lock_sock(sk);
po->origdev = !!val;
+ release_sock(sk);
return 0;
}
case PACKET_VNET_HDR:
@@ -3769,15 +3786,20 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (sock->type != SOCK_RAW)
return -EINVAL;
- if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
- return -EBUSY;
if (optlen < sizeof(val))
return -EINVAL;
if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;
- po->has_vnet_hdr = !!val;
- return 0;
+ lock_sock(sk);
+ if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
+ ret = -EBUSY;
+ } else {
+ po->has_vnet_hdr = !!val;
+ ret = 0;
+ }
+ release_sock(sk);
+ return ret;
}
case PACKET_TIMESTAMP:
{
@@ -3815,11 +3837,17 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen != sizeof(val))
return -EINVAL;
- if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
- return -EBUSY;
if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;
- po->tp_tx_has_off = !!val;
+
+ lock_sock(sk);
+ if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
+ ret = -EBUSY;
+ } else {
+ po->tp_tx_has_off = !!val;
+ ret = 0;
+ }
+ release_sock(sk);
return 0;
}
case PACKET_QDISC_BYPASS:
diff --git a/net/packet/internal.h b/net/packet/internal.h
index a1d2b2319ae9..3bb7c5fb3bff 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -112,10 +112,12 @@ struct packet_sock {
int copy_thresh;
spinlock_t bind_lock;
struct mutex pg_vec_lock;
- unsigned int running:1, /* prot_hook is attached*/
- auxdata:1,
+ unsigned int running; /* bind_lock must be held */
+ unsigned int auxdata:1, /* writer must hold sock lock */
origdev:1,
- has_vnet_hdr:1;
+ has_vnet_hdr:1,
+ tp_loss:1,
+ tp_tx_has_off:1;
int pressure;
int ifindex; /* bound device */
__be16 num;
@@ -125,8 +127,6 @@ struct packet_sock {
enum tpacket_versions tp_version;
unsigned int tp_hdrlen;
unsigned int tp_reserve;
- unsigned int tp_loss:1;
- unsigned int tp_tx_has_off:1;
unsigned int tp_tstamp;
struct net_device __rcu *cached_dev;
int (*xmit)(struct sk_buff *skb);
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index eea1d8611b20..13b38ad0fa4a 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -547,7 +547,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
rdsdebug("conn %p pd %p cq %p %p\n", conn, ic->i_pd,
ic->i_send_cq, ic->i_recv_cq);
- return ret;
+ goto out;
sends_out:
vfree(ic->i_sends);
@@ -572,6 +572,7 @@ send_cq_out:
ic->i_send_cq = NULL;
rds_ibdev_out:
rds_ib_remove_conn(rds_ibdev, conn);
+out:
rds_ib_dev_put(rds_ibdev);
return ret;
diff --git a/net/rds/recv.c b/net/rds/recv.c
index de50e2126e40..dc67458b52f0 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -558,6 +558,7 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg,
struct rds_cmsg_rx_trace t;
int i, j;
+ memset(&t, 0, sizeof(t));
inc->i_rx_lat_trace[RDS_MSG_RX_CMSG] = local_clock();
t.rx_traces = rs->rs_rx_traces;
for (i = 0; i < rs->rs_rx_traces; i++) {
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index a5994cf0512b..8527cfdc446d 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -652,7 +652,7 @@ static int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife,
}
}
- return 0;
+ return -ENOENT;
}
static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
@@ -682,7 +682,12 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
u16 mtype;
u16 dlen;
- curr_data = ife_tlv_meta_decode(tlv_data, &mtype, &dlen, NULL);
+ curr_data = ife_tlv_meta_decode(tlv_data, ifehdr_end, &mtype,
+ &dlen, NULL);
+ if (!curr_data) {
+ qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats));
+ return TC_ACT_SHOT;
+ }
if (find_decode_metaid(skb, ife, mtype, dlen, curr_data)) {
/* abuse overlimits to count when we receive metadata
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index a366e4c9413a..4808713c73b9 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -128,6 +128,28 @@ static bool fq_flow_is_detached(const struct fq_flow *f)
return f->next == &detached;
}
+static bool fq_flow_is_throttled(const struct fq_flow *f)
+{
+ return f->next == &throttled;
+}
+
+static void fq_flow_add_tail(struct fq_flow_head *head, struct fq_flow *flow)
+{
+ if (head->first)
+ head->last->next = flow;
+ else
+ head->first = flow;
+ head->last = flow;
+ flow->next = NULL;
+}
+
+static void fq_flow_unset_throttled(struct fq_sched_data *q, struct fq_flow *f)
+{
+ rb_erase(&f->rate_node, &q->delayed);
+ q->throttled_flows--;
+ fq_flow_add_tail(&q->old_flows, f);
+}
+
static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f)
{
struct rb_node **p = &q->delayed.rb_node, *parent = NULL;
@@ -155,15 +177,6 @@ static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f)
static struct kmem_cache *fq_flow_cachep __read_mostly;
-static void fq_flow_add_tail(struct fq_flow_head *head, struct fq_flow *flow)
-{
- if (head->first)
- head->last->next = flow;
- else
- head->first = flow;
- head->last = flow;
- flow->next = NULL;
-}
/* limit number of collected flows per round */
#define FQ_GC_MAX 8
@@ -267,6 +280,8 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
f->socket_hash != sk->sk_hash)) {
f->credit = q->initial_quantum;
f->socket_hash = sk->sk_hash;
+ if (fq_flow_is_throttled(f))
+ fq_flow_unset_throttled(q, f);
f->time_next_packet = 0ULL;
}
return f;
@@ -438,9 +453,7 @@ static void fq_check_throttled(struct fq_sched_data *q, u64 now)
q->time_next_delayed_flow = f->time_next_packet;
break;
}
- rb_erase(p, &q->delayed);
- q->throttled_flows--;
- fq_flow_add_tail(&q->old_flows, f);
+ fq_flow_unset_throttled(q, f);
}
}
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 837806dd5799..a47179da24e6 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1024,8 +1024,9 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
struct sctp_endpoint *ep;
struct sctp_chunk *chunk;
struct sctp_inq *inqueue;
- int state;
+ int first_time = 1; /* is this the first time through the loop */
int error = 0;
+ int state;
/* The association should be held so we should be safe. */
ep = asoc->ep;
@@ -1036,6 +1037,30 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
state = asoc->state;
subtype = SCTP_ST_CHUNK(chunk->chunk_hdr->type);
+ /* If the first chunk in the packet is AUTH, do special
+ * processing specified in Section 6.3 of SCTP-AUTH spec
+ */
+ if (first_time && subtype.chunk == SCTP_CID_AUTH) {
+ struct sctp_chunkhdr *next_hdr;
+
+ next_hdr = sctp_inq_peek(inqueue);
+ if (!next_hdr)
+ goto normal;
+
+ /* If the next chunk is COOKIE-ECHO, skip the AUTH
+ * chunk while saving a pointer to it so we can do
+ * Authentication later (during cookie-echo
+ * processing).
+ */
+ if (next_hdr->type == SCTP_CID_COOKIE_ECHO) {
+ chunk->auth_chunk = skb_clone(chunk->skb,
+ GFP_ATOMIC);
+ chunk->auth = 1;
+ continue;
+ }
+ }
+
+normal:
/* SCTP-AUTH, Section 6.3:
* The receiver has a list of chunk types which it expects
* to be received only after an AUTH-chunk. This list has
@@ -1074,6 +1099,9 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
/* If there is an error on chunk, discard this packet. */
if (error && chunk)
chunk->pdiscard = 1;
+
+ if (first_time)
+ first_time = 0;
}
sctp_association_put(asoc);
}
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index 23ebc5318edc..eb93ffe2408b 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -217,7 +217,7 @@ new_skb:
skb_pull(chunk->skb, sizeof(*ch));
chunk->subh.v = NULL; /* Subheader is no longer valid. */
- if (chunk->chunk_end + sizeof(*ch) < skb_tail_pointer(chunk->skb)) {
+ if (chunk->chunk_end + sizeof(*ch) <= skb_tail_pointer(chunk->skb)) {
/* This is not a singleton */
chunk->singleton = 0;
} else if (chunk->chunk_end > skb_tail_pointer(chunk->skb)) {
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 2e3f7b75a8ec..42247110d842 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -895,6 +895,9 @@ static int sctp_inet6_cmp_addr(const union sctp_addr *addr1,
if (sctp_is_any(sk, addr1) || sctp_is_any(sk, addr2))
return 1;
+ if (addr1->sa.sa_family == AF_INET && addr2->sa.sa_family == AF_INET)
+ return addr1->v4.sin_addr.s_addr == addr2->v4.sin_addr.s_addr;
+
return __sctp_v6_cmp_addr(addr1, addr2);
}
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 5a4fb1dc8400..e62addb60434 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1152,7 +1152,7 @@ struct sctp_chunk *sctp_make_violation_max_retrans(
const struct sctp_association *asoc,
const struct sctp_chunk *chunk)
{
- static const char error[] = "Association exceeded its max_retans count";
+ static const char error[] = "Association exceeded its max_retrans count";
size_t payload_len = sizeof(error) + sizeof(struct sctp_errhdr);
struct sctp_chunk *retval;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index dd0594a10961..c9ae3404b1bb 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -153,10 +153,7 @@ static enum sctp_disposition sctp_sf_violation_chunk(
struct sctp_cmd_seq *commands);
static enum sctp_ierror sctp_sf_authenticate(
- struct net *net,
- const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
- const union sctp_subtype type,
struct sctp_chunk *chunk);
static enum sctp_disposition __sctp_sf_do_9_1_abort(
@@ -626,6 +623,38 @@ enum sctp_disposition sctp_sf_do_5_1C_ack(struct net *net,
return SCTP_DISPOSITION_CONSUME;
}
+static bool sctp_auth_chunk_verify(struct net *net, struct sctp_chunk *chunk,
+ const struct sctp_association *asoc)
+{
+ struct sctp_chunk auth;
+
+ if (!chunk->auth_chunk)
+ return true;
+
+ /* SCTP-AUTH: auth_chunk pointer is only set when the cookie-echo
+ * is supposed to be authenticated and we have to do delayed
+ * authentication. We've just recreated the association using
+ * the information in the cookie and now it's much easier to
+ * do the authentication.
+ */
+
+ /* Make sure that we and the peer are AUTH capable */
+ if (!net->sctp.auth_enable || !asoc->peer.auth_capable)
+ return false;
+
+ /* set-up our fake chunk so that we can process it */
+ auth.skb = chunk->auth_chunk;
+ auth.asoc = chunk->asoc;
+ auth.sctp_hdr = chunk->sctp_hdr;
+ auth.chunk_hdr = (struct sctp_chunkhdr *)
+ skb_push(chunk->auth_chunk,
+ sizeof(struct sctp_chunkhdr));
+ skb_pull(chunk->auth_chunk, sizeof(struct sctp_chunkhdr));
+ auth.transport = chunk->transport;
+
+ return sctp_sf_authenticate(asoc, &auth) == SCTP_IERROR_NO_ERROR;
+}
+
/*
* Respond to a normal COOKIE ECHO chunk.
* We are the side that is being asked for an association.
@@ -763,37 +792,9 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
if (error)
goto nomem_init;
- /* SCTP-AUTH: auth_chunk pointer is only set when the cookie-echo
- * is supposed to be authenticated and we have to do delayed
- * authentication. We've just recreated the association using
- * the information in the cookie and now it's much easier to
- * do the authentication.
- */
- if (chunk->auth_chunk) {
- struct sctp_chunk auth;
- enum sctp_ierror ret;
-
- /* Make sure that we and the peer are AUTH capable */
- if (!net->sctp.auth_enable || !new_asoc->peer.auth_capable) {
- sctp_association_free(new_asoc);
- return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
- }
-
- /* set-up our fake chunk so that we can process it */
- auth.skb = chunk->auth_chunk;
- auth.asoc = chunk->asoc;
- auth.sctp_hdr = chunk->sctp_hdr;
- auth.chunk_hdr = (struct sctp_chunkhdr *)
- skb_push(chunk->auth_chunk,
- sizeof(struct sctp_chunkhdr));
- skb_pull(chunk->auth_chunk, sizeof(struct sctp_chunkhdr));
- auth.transport = chunk->transport;
-
- ret = sctp_sf_authenticate(net, ep, new_asoc, type, &auth);
- if (ret != SCTP_IERROR_NO_ERROR) {
- sctp_association_free(new_asoc);
- return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
- }
+ if (!sctp_auth_chunk_verify(net, chunk, new_asoc)) {
+ sctp_association_free(new_asoc);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
repl = sctp_make_cookie_ack(new_asoc, chunk);
@@ -1794,13 +1795,18 @@ static enum sctp_disposition sctp_sf_do_dupcook_a(
GFP_ATOMIC))
goto nomem;
+ if (sctp_auth_asoc_init_active_key(new_asoc, GFP_ATOMIC))
+ goto nomem;
+
+ if (!sctp_auth_chunk_verify(net, chunk, new_asoc))
+ return SCTP_DISPOSITION_DISCARD;
+
/* Make sure no new addresses are being added during the
* restart. Though this is a pretty complicated attack
* since you'd have to get inside the cookie.
*/
- if (!sctp_sf_check_restart_addrs(new_asoc, asoc, chunk, commands)) {
+ if (!sctp_sf_check_restart_addrs(new_asoc, asoc, chunk, commands))
return SCTP_DISPOSITION_CONSUME;
- }
/* If the endpoint is in the SHUTDOWN-ACK-SENT state and recognizes
* the peer has restarted (Action A), it MUST NOT setup a new
@@ -1906,6 +1912,12 @@ static enum sctp_disposition sctp_sf_do_dupcook_b(
GFP_ATOMIC))
goto nomem;
+ if (sctp_auth_asoc_init_active_key(new_asoc, GFP_ATOMIC))
+ goto nomem;
+
+ if (!sctp_auth_chunk_verify(net, chunk, new_asoc))
+ return SCTP_DISPOSITION_DISCARD;
+
/* Update the content of current association. */
sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
@@ -2003,6 +2015,9 @@ static enum sctp_disposition sctp_sf_do_dupcook_d(
* a COOKIE ACK.
*/
+ if (!sctp_auth_chunk_verify(net, chunk, asoc))
+ return SCTP_DISPOSITION_DISCARD;
+
/* Don't accidentally move back into established state. */
if (asoc->state < SCTP_STATE_ESTABLISHED) {
sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
@@ -2050,7 +2065,7 @@ static enum sctp_disposition sctp_sf_do_dupcook_d(
}
}
- repl = sctp_make_cookie_ack(new_asoc, chunk);
+ repl = sctp_make_cookie_ack(asoc, chunk);
if (!repl)
goto nomem;
@@ -4165,10 +4180,7 @@ gen_shutdown:
* The return value is the disposition of the chunk.
*/
static enum sctp_ierror sctp_sf_authenticate(
- struct net *net,
- const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
- const union sctp_subtype type,
struct sctp_chunk *chunk)
{
struct sctp_shared_key *sh_key = NULL;
@@ -4269,7 +4281,7 @@ enum sctp_disposition sctp_sf_eat_auth(struct net *net,
commands);
auth_hdr = (struct sctp_authhdr *)chunk->skb->data;
- error = sctp_sf_authenticate(net, ep, asoc, type, chunk);
+ error = sctp_sf_authenticate(asoc, chunk);
switch (error) {
case SCTP_IERROR_AUTH_BAD_HMAC:
/* Generate the ERROR chunk and discard the rest
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index f799043abec9..f1f1d1b232ba 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -240,6 +240,8 @@ void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new)
new->out = NULL;
new->in = NULL;
+ new->outcnt = 0;
+ new->incnt = 0;
}
static int sctp_send_reconf(struct sctp_association *asoc,
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 5f8046c62d90..544bab42f925 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -292,6 +292,17 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
}
+/* register a new rmb */
+static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc)
+{
+ /* register memory region for new rmb */
+ if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) {
+ rmb_desc->regerr = 1;
+ return -EFAULT;
+ }
+ return 0;
+}
+
static int smc_clnt_conf_first_link(struct smc_sock *smc)
{
struct smc_link_group *lgr = smc->conn.lgr;
@@ -321,9 +332,7 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
smc_wr_remember_qp_attr(link);
- rc = smc_wr_reg_send(link,
- smc->conn.rmb_desc->mr_rx[SMC_SINGLE_LINK]);
- if (rc)
+ if (smc_reg_rmb(link, smc->conn.rmb_desc))
return SMC_CLC_DECL_INTERR;
/* send CONFIRM LINK response over RoCE fabric */
@@ -473,13 +482,8 @@ static int smc_connect_rdma(struct smc_sock *smc)
goto decline_rdma_unlock;
}
} else {
- struct smc_buf_desc *buf_desc = smc->conn.rmb_desc;
-
- if (!buf_desc->reused) {
- /* register memory region for new rmb */
- rc = smc_wr_reg_send(link,
- buf_desc->mr_rx[SMC_SINGLE_LINK]);
- if (rc) {
+ if (!smc->conn.rmb_desc->reused) {
+ if (smc_reg_rmb(link, smc->conn.rmb_desc)) {
reason_code = SMC_CLC_DECL_INTERR;
goto decline_rdma_unlock;
}
@@ -719,9 +723,7 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
link = &lgr->lnk[SMC_SINGLE_LINK];
- rc = smc_wr_reg_send(link,
- smc->conn.rmb_desc->mr_rx[SMC_SINGLE_LINK]);
- if (rc)
+ if (smc_reg_rmb(link, smc->conn.rmb_desc))
return SMC_CLC_DECL_INTERR;
/* send CONFIRM LINK request to client over the RoCE fabric */
@@ -854,13 +856,8 @@ static void smc_listen_work(struct work_struct *work)
smc_rx_init(new_smc);
if (local_contact != SMC_FIRST_CONTACT) {
- struct smc_buf_desc *buf_desc = new_smc->conn.rmb_desc;
-
- if (!buf_desc->reused) {
- /* register memory region for new rmb */
- rc = smc_wr_reg_send(link,
- buf_desc->mr_rx[SMC_SINGLE_LINK]);
- if (rc) {
+ if (!new_smc->conn.rmb_desc->reused) {
+ if (smc_reg_rmb(link, new_smc->conn.rmb_desc)) {
reason_code = SMC_CLC_DECL_INTERR;
goto decline_rdma_unlock;
}
@@ -978,10 +975,6 @@ static void smc_tcp_listen_work(struct work_struct *work)
}
out:
- if (lsmc->clcsock) {
- sock_release(lsmc->clcsock);
- lsmc->clcsock = NULL;
- }
release_sock(lsk);
sock_put(&lsmc->sk); /* sock_hold in smc_listen */
}
@@ -1170,13 +1163,15 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
/* delegate to CLC child sock */
release_sock(sk);
mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
- /* if non-blocking connect finished ... */
lock_sock(sk);
- if ((sk->sk_state == SMC_INIT) && (mask & EPOLLOUT)) {
- sk->sk_err = smc->clcsock->sk->sk_err;
- if (sk->sk_err) {
- mask |= EPOLLERR;
- } else {
+ sk->sk_err = smc->clcsock->sk->sk_err;
+ if (sk->sk_err) {
+ mask |= EPOLLERR;
+ } else {
+ /* if non-blocking connect finished ... */
+ if (sk->sk_state == SMC_INIT &&
+ mask & EPOLLOUT &&
+ smc->clcsock->sk->sk_state != TCP_CLOSE) {
rc = smc_connect_rdma(smc);
if (rc < 0)
mask |= EPOLLERR;
@@ -1259,14 +1254,12 @@ static int smc_shutdown(struct socket *sock, int how)
rc = smc_close_shutdown_write(smc);
break;
case SHUT_RD:
- if (sk->sk_state == SMC_LISTEN)
- rc = smc_close_active(smc);
- else
- rc = 0;
- /* nothing more to do because peer is not involved */
+ rc = 0;
+ /* nothing more to do because peer is not involved */
break;
}
- rc1 = kernel_sock_shutdown(smc->clcsock, how);
+ if (smc->clcsock)
+ rc1 = kernel_sock_shutdown(smc->clcsock, how);
/* map sock_shutdown_cmd constants to sk_shutdown value range */
sk->sk_shutdown |= how + 1;
@@ -1322,8 +1315,11 @@ static ssize_t smc_sendpage(struct socket *sock, struct page *page,
smc = smc_sk(sk);
lock_sock(sk);
- if (sk->sk_state != SMC_ACTIVE)
+ if (sk->sk_state != SMC_ACTIVE) {
+ release_sock(sk);
goto out;
+ }
+ release_sock(sk);
if (smc->use_fallback)
rc = kernel_sendpage(smc->clcsock, page, offset,
size, flags);
@@ -1331,7 +1327,6 @@ static ssize_t smc_sendpage(struct socket *sock, struct page *page,
rc = sock_no_sendpage(sock, page, offset, size, flags);
out:
- release_sock(sk);
return rc;
}
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index f44f6803f7ff..d4bd01bb44e1 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -32,6 +32,9 @@
static u32 smc_lgr_num; /* unique link group number */
+static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk,
+ bool is_rmb);
+
static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
{
/* client link group creation always follows the server link group
@@ -234,9 +237,22 @@ static void smc_buf_unuse(struct smc_connection *conn)
conn->sndbuf_size = 0;
}
if (conn->rmb_desc) {
- conn->rmb_desc->reused = true;
- conn->rmb_desc->used = 0;
- conn->rmbe_size = 0;
+ if (!conn->rmb_desc->regerr) {
+ conn->rmb_desc->reused = 1;
+ conn->rmb_desc->used = 0;
+ conn->rmbe_size = 0;
+ } else {
+ /* buf registration failed, reuse not possible */
+ struct smc_link_group *lgr = conn->lgr;
+ struct smc_link *lnk;
+
+ write_lock_bh(&lgr->rmbs_lock);
+ list_del(&conn->rmb_desc->list);
+ write_unlock_bh(&lgr->rmbs_lock);
+
+ lnk = &lgr->lnk[SMC_SINGLE_LINK];
+ smc_buf_free(conn->rmb_desc, lnk, true);
+ }
}
}
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 07e2a393e6d9..5dfcb15d529f 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -123,7 +123,8 @@ struct smc_buf_desc {
*/
u32 order; /* allocation order */
u32 used; /* currently used / unused */
- bool reused; /* new created / reused */
+ u8 reused : 1; /* new created / reused */
+ u8 regerr : 1; /* err during registration */
};
struct smc_rtoken { /* address/key of remote RMB */
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index 805b139756db..092bebc70048 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -67,7 +67,7 @@ static void strp_abort_strp(struct strparser *strp, int err)
static void strp_start_timer(struct strparser *strp, long timeo)
{
- if (timeo)
+ if (timeo && timeo != LONG_MAX)
mod_delayed_work(strp_wq, &strp->msg_timer_work, timeo);
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 806395687bb6..c2266f387213 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1887,7 +1887,7 @@ call_connect_status(struct rpc_task *task)
dprint_status(task);
- trace_rpc_connect_status(task, status);
+ trace_rpc_connect_status(task);
task->tk_status = 0;
switch (status) {
case -ECONNREFUSED:
@@ -2014,6 +2014,9 @@ call_transmit_status(struct rpc_task *task)
case -EPERM:
if (RPC_IS_SOFTCONN(task)) {
xprt_end_transmit(task);
+ if (!task->tk_msg.rpc_proc->p_proc)
+ trace_xprt_ping(task->tk_xprt,
+ task->tk_status);
rpc_exit(task, task->tk_status);
break;
}
@@ -2112,6 +2115,9 @@ call_status(struct rpc_task *task)
struct rpc_rqst *req = task->tk_rqstp;
int status;
+ if (!task->tk_msg.rpc_proc->p_proc)
+ trace_xprt_ping(task->tk_xprt, task->tk_status);
+
if (req->rq_reply_bytes_recvd > 0 && !req->rq_bytes_sent)
task->tk_status = req->rq_reply_bytes_recvd;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 0f08934b2cea..c81ef5e6c981 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -1375,6 +1375,7 @@ rpc_gssd_dummy_depopulate(struct dentry *pipe_dentry)
struct dentry *clnt_dir = pipe_dentry->d_parent;
struct dentry *gssd_dir = clnt_dir->d_parent;
+ dget(pipe_dentry);
__rpc_rmpipe(d_inode(clnt_dir), pipe_dentry);
__rpc_depopulate(clnt_dir, gssd_dummy_info_file, 0, 1);
__rpc_depopulate(gssd_dir, gssd_dummy_clnt_dir, 0, 1);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index d9db2eab3a8d..3fe5d60ab0e2 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -276,7 +276,7 @@ static void rpc_set_active(struct rpc_task *task)
{
rpc_task_set_debuginfo(task);
set_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
- trace_rpc_task_begin(task->tk_client, task, NULL);
+ trace_rpc_task_begin(task, NULL);
}
/*
@@ -291,7 +291,7 @@ static int rpc_complete_task(struct rpc_task *task)
unsigned long flags;
int ret;
- trace_rpc_task_complete(task->tk_client, task, NULL);
+ trace_rpc_task_complete(task, NULL);
spin_lock_irqsave(&wq->lock, flags);
clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
@@ -358,7 +358,7 @@ static void __rpc_sleep_on_priority(struct rpc_wait_queue *q,
dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n",
task->tk_pid, rpc_qname(q), jiffies);
- trace_rpc_task_sleep(task->tk_client, task, q);
+ trace_rpc_task_sleep(task, q);
__rpc_add_wait_queue(q, task, queue_priority);
@@ -428,7 +428,7 @@ static void __rpc_do_wake_up_task_on_wq(struct workqueue_struct *wq,
return;
}
- trace_rpc_task_wakeup(task->tk_client, task, queue);
+ trace_rpc_task_wakeup(task, queue);
__rpc_remove_wait_queue(queue, task);
@@ -780,7 +780,7 @@ static void __rpc_execute(struct rpc_task *task)
}
if (!do_action)
break;
- trace_rpc_task_run_action(task->tk_client, task, do_action);
+ trace_rpc_task_run_action(task, do_action);
do_action(task);
/*
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 1e671333c3d5..f68aa46c9dd7 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -24,6 +24,8 @@
#include <linux/sunrpc/metrics.h>
#include <linux/rcupdate.h>
+#include <trace/events/sunrpc.h>
+
#include "netns.h"
#define RPCDBG_FACILITY RPCDBG_MISC
@@ -148,7 +150,7 @@ void rpc_count_iostats_metrics(const struct rpc_task *task,
struct rpc_iostats *op_metrics)
{
struct rpc_rqst *req = task->tk_rqstp;
- ktime_t delta, now;
+ ktime_t backlog, execute, now;
if (!op_metrics || !req)
return;
@@ -164,16 +166,20 @@ void rpc_count_iostats_metrics(const struct rpc_task *task,
op_metrics->om_bytes_sent += req->rq_xmit_bytes_sent;
op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd;
+ backlog = 0;
if (ktime_to_ns(req->rq_xtime)) {
- delta = ktime_sub(req->rq_xtime, task->tk_start);
- op_metrics->om_queue = ktime_add(op_metrics->om_queue, delta);
+ backlog = ktime_sub(req->rq_xtime, task->tk_start);
+ op_metrics->om_queue = ktime_add(op_metrics->om_queue, backlog);
}
+
op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, req->rq_rtt);
- delta = ktime_sub(now, task->tk_start);
- op_metrics->om_execute = ktime_add(op_metrics->om_execute, delta);
+ execute = ktime_sub(now, task->tk_start);
+ op_metrics->om_execute = ktime_add(op_metrics->om_execute, execute);
spin_unlock(&op_metrics->om_lock);
+
+ trace_rpc_stats_latency(req->rq_task, backlog, req->rq_rtt, execute);
}
EXPORT_SYMBOL_GPL(rpc_count_iostats_metrics);
diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h
index f2b7cb540e61..09a0315ea77b 100644
--- a/net/sunrpc/sunrpc.h
+++ b/net/sunrpc/sunrpc.h
@@ -37,12 +37,6 @@ struct rpc_buffer {
char data[];
};
-static inline int rpc_reply_expected(struct rpc_task *task)
-{
- return (task->tk_msg.rpc_proc != NULL) &&
- (task->tk_msg.rpc_proc->p_decode != NULL);
-}
-
static inline int sock_is_loopback(struct sock *sk)
{
struct dst_entry *dst;
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index e34f4ee7f2b6..30afbd236656 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -1519,6 +1519,88 @@ out:
EXPORT_SYMBOL_GPL(xdr_process_buf);
/**
+ * xdr_stream_decode_opaque - Decode variable length opaque
+ * @xdr: pointer to xdr_stream
+ * @ptr: location to store opaque data
+ * @size: size of storage buffer @ptr
+ *
+ * Return values:
+ * On success, returns size of object stored in *@ptr
+ * %-EBADMSG on XDR buffer overflow
+ * %-EMSGSIZE on overflow of storage buffer @ptr
+ */
+ssize_t xdr_stream_decode_opaque(struct xdr_stream *xdr, void *ptr, size_t size)
+{
+ ssize_t ret;
+ void *p;
+
+ ret = xdr_stream_decode_opaque_inline(xdr, &p, size);
+ if (ret <= 0)
+ return ret;
+ memcpy(ptr, p, ret);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xdr_stream_decode_opaque);
+
+/**
+ * xdr_stream_decode_opaque_dup - Decode and duplicate variable length opaque
+ * @xdr: pointer to xdr_stream
+ * @ptr: location to store pointer to opaque data
+ * @maxlen: maximum acceptable object size
+ * @gfp_flags: GFP mask to use
+ *
+ * Return values:
+ * On success, returns size of object stored in *@ptr
+ * %-EBADMSG on XDR buffer overflow
+ * %-EMSGSIZE if the size of the object would exceed @maxlen
+ * %-ENOMEM on memory allocation failure
+ */
+ssize_t xdr_stream_decode_opaque_dup(struct xdr_stream *xdr, void **ptr,
+ size_t maxlen, gfp_t gfp_flags)
+{
+ ssize_t ret;
+ void *p;
+
+ ret = xdr_stream_decode_opaque_inline(xdr, &p, maxlen);
+ if (ret > 0) {
+ *ptr = kmemdup(p, ret, gfp_flags);
+ if (*ptr != NULL)
+ return ret;
+ ret = -ENOMEM;
+ }
+ *ptr = NULL;
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xdr_stream_decode_opaque_dup);
+
+/**
+ * xdr_stream_decode_string - Decode variable length string
+ * @xdr: pointer to xdr_stream
+ * @str: location to store string
+ * @size: size of storage buffer @str
+ *
+ * Return values:
+ * On success, returns length of NUL-terminated string stored in *@str
+ * %-EBADMSG on XDR buffer overflow
+ * %-EMSGSIZE on overflow of storage buffer @str
+ */
+ssize_t xdr_stream_decode_string(struct xdr_stream *xdr, char *str, size_t size)
+{
+ ssize_t ret;
+ void *p;
+
+ ret = xdr_stream_decode_opaque_inline(xdr, &p, size);
+ if (ret > 0) {
+ memcpy(str, p, ret);
+ str[ret] = '\0';
+ return strlen(str);
+ }
+ *str = '\0';
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xdr_stream_decode_string);
+
+/**
* xdr_stream_decode_string_dup - Decode and duplicate variable length string
* @xdr: pointer to xdr_stream
* @str: location to store pointer to string
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 8f0ad4f268da..70f005044f06 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -826,6 +826,7 @@ static void xprt_connect_status(struct rpc_task *task)
* @xprt: transport on which the original request was transmitted
* @xid: RPC XID of incoming reply
*
+ * Caller holds xprt->recv_lock.
*/
struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
{
@@ -834,6 +835,7 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
list_for_each_entry(entry, &xprt->recv, rq_list)
if (entry->rq_xid == xid) {
trace_xprt_lookup_rqst(xprt, xid, 0);
+ entry->rq_rtt = ktime_sub(ktime_get(), entry->rq_xtime);
return entry;
}
@@ -889,7 +891,13 @@ __must_hold(&req->rq_xprt->recv_lock)
}
}
-static void xprt_update_rtt(struct rpc_task *task)
+/**
+ * xprt_update_rtt - Update RPC RTT statistics
+ * @task: RPC request that recently completed
+ *
+ * Caller holds xprt->recv_lock.
+ */
+void xprt_update_rtt(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_rtt *rtt = task->tk_client->cl_rtt;
@@ -902,13 +910,14 @@ static void xprt_update_rtt(struct rpc_task *task)
rpc_set_timeo(rtt, timer, req->rq_ntrans - 1);
}
}
+EXPORT_SYMBOL_GPL(xprt_update_rtt);
/**
* xprt_complete_rqst - called when reply processing is complete
* @task: RPC request that recently completed
* @copied: actual number of bytes received from the transport
*
- * Caller holds transport lock.
+ * Caller holds xprt->recv_lock.
*/
void xprt_complete_rqst(struct rpc_task *task, int copied)
{
@@ -920,9 +929,6 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
trace_xprt_complete_rqst(xprt, req->rq_xid, copied);
xprt->stat.recvs++;
- req->rq_rtt = ktime_sub(ktime_get(), req->rq_xtime);
- if (xprt->ops->timer != NULL)
- xprt_update_rtt(task);
list_del_init(&req->rq_list);
req->rq_private_buf.len = copied;
@@ -1003,7 +1009,7 @@ void xprt_transmit(struct rpc_task *task)
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
unsigned int connect_cookie;
- int status, numreqs;
+ int status;
dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
@@ -1027,7 +1033,6 @@ void xprt_transmit(struct rpc_task *task)
return;
connect_cookie = xprt->connect_cookie;
- req->rq_xtime = ktime_get();
status = xprt->ops->send_request(task);
trace_xprt_transmit(xprt, req->rq_xid, status);
if (status != 0) {
@@ -1042,9 +1047,6 @@ void xprt_transmit(struct rpc_task *task)
xprt->ops->set_retrans_timeout(task);
- numreqs = atomic_read(&xprt->num_reqs);
- if (numreqs > xprt->stat.max_slots)
- xprt->stat.max_slots = numreqs;
xprt->stat.sends++;
xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
xprt->stat.bklog_u += xprt->backlog.qlen;
@@ -1106,14 +1108,15 @@ static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt)
{
struct rpc_rqst *req = ERR_PTR(-EAGAIN);
- if (!atomic_add_unless(&xprt->num_reqs, 1, xprt->max_reqs))
+ if (xprt->num_reqs >= xprt->max_reqs)
goto out;
+ ++xprt->num_reqs;
spin_unlock(&xprt->reserve_lock);
req = kzalloc(sizeof(struct rpc_rqst), GFP_NOFS);
spin_lock(&xprt->reserve_lock);
if (req != NULL)
goto out;
- atomic_dec(&xprt->num_reqs);
+ --xprt->num_reqs;
req = ERR_PTR(-ENOMEM);
out:
return req;
@@ -1121,7 +1124,8 @@ out:
static bool xprt_dynamic_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
{
- if (atomic_add_unless(&xprt->num_reqs, -1, xprt->min_reqs)) {
+ if (xprt->num_reqs > xprt->min_reqs) {
+ --xprt->num_reqs;
kfree(req);
return true;
}
@@ -1157,6 +1161,8 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
spin_unlock(&xprt->reserve_lock);
return;
out_init_req:
+ xprt->stat.max_slots = max_t(unsigned int, xprt->stat.max_slots,
+ xprt->num_reqs);
task->tk_status = 0;
task->tk_rqstp = req;
xprt_request_init(task, xprt);
@@ -1224,7 +1230,7 @@ struct rpc_xprt *xprt_alloc(struct net *net, size_t size,
else
xprt->max_reqs = num_prealloc;
xprt->min_reqs = num_prealloc;
- atomic_set(&xprt->num_reqs, num_prealloc);
+ xprt->num_reqs = num_prealloc;
return xprt;
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index ed1a4a3065ee..47ebac949769 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -44,13 +44,6 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
if (IS_ERR(req))
return PTR_ERR(req);
- rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
- DMA_TO_DEVICE, GFP_KERNEL);
- if (IS_ERR(rb))
- goto out_fail;
- req->rl_rdmabuf = rb;
- xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb));
-
size = r_xprt->rx_data.inline_rsize;
rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL);
if (IS_ERR(rb))
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index d5f95bb39300..5cc68a824f45 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -191,7 +191,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
mr = rpcrdma_mr_get(r_xprt);
if (!mr)
- return ERR_PTR(-ENOBUFS);
+ return ERR_PTR(-EAGAIN);
pageoff = offset_in_page(seg1->mr_offset);
seg1->mr_offset -= pageoff; /* start of page */
@@ -251,6 +251,16 @@ out_maperr:
return ERR_PTR(-EIO);
}
+/* Post Send WR containing the RPC Call message.
+ */
+static int
+fmr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
+{
+ struct ib_send_wr *bad_wr;
+
+ return ib_post_send(ia->ri_id->qp, &req->rl_sendctx->sc_wr, &bad_wr);
+}
+
/* Invalidate all memory regions that were registered for "req".
*
* Sleeps until it is safe for the host CPU to access the
@@ -305,6 +315,7 @@ out_reset:
const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
.ro_map = fmr_op_map,
+ .ro_send = fmr_op_send,
.ro_unmap_sync = fmr_op_unmap_sync,
.ro_recover_mr = fmr_op_recover_mr,
.ro_open = fmr_op_open,
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 90f688f19783..c5743a0960be 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -357,8 +357,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
struct rpcrdma_mr *mr;
struct ib_mr *ibmr;
struct ib_reg_wr *reg_wr;
- struct ib_send_wr *bad_wr;
- int rc, i, n;
+ int i, n;
u8 key;
mr = NULL;
@@ -367,7 +366,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
rpcrdma_mr_defer_recovery(mr);
mr = rpcrdma_mr_get(r_xprt);
if (!mr)
- return ERR_PTR(-ENOBUFS);
+ return ERR_PTR(-EAGAIN);
} while (mr->frwr.fr_state != FRWR_IS_INVALID);
frwr = &mr->frwr;
frwr->fr_state = FRWR_IS_VALID;
@@ -407,22 +406,12 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
ib_update_fast_reg_key(ibmr, ++key);
reg_wr = &frwr->fr_regwr;
- reg_wr->wr.next = NULL;
- reg_wr->wr.opcode = IB_WR_REG_MR;
- frwr->fr_cqe.done = frwr_wc_fastreg;
- reg_wr->wr.wr_cqe = &frwr->fr_cqe;
- reg_wr->wr.num_sge = 0;
- reg_wr->wr.send_flags = 0;
reg_wr->mr = ibmr;
reg_wr->key = ibmr->rkey;
reg_wr->access = writing ?
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
IB_ACCESS_REMOTE_READ;
- rc = ib_post_send(ia->ri_id->qp, &reg_wr->wr, &bad_wr);
- if (rc)
- goto out_senderr;
-
mr->mr_handle = ibmr->rkey;
mr->mr_length = ibmr->length;
mr->mr_offset = ibmr->iova;
@@ -442,11 +431,40 @@ out_mapmr_err:
frwr->fr_mr, n, mr->mr_nents);
rpcrdma_mr_defer_recovery(mr);
return ERR_PTR(-EIO);
+}
-out_senderr:
- pr_err("rpcrdma: FRWR registration ib_post_send returned %i\n", rc);
- rpcrdma_mr_defer_recovery(mr);
- return ERR_PTR(-ENOTCONN);
+/* Post Send WR containing the RPC Call message.
+ *
+ * For FRMR, chain any FastReg WRs to the Send WR. Only a
+ * single ib_post_send call is needed to register memory
+ * and then post the Send WR.
+ */
+static int
+frwr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
+{
+ struct ib_send_wr *post_wr, *bad_wr;
+ struct rpcrdma_mr *mr;
+
+ post_wr = &req->rl_sendctx->sc_wr;
+ list_for_each_entry(mr, &req->rl_registered, mr_list) {
+ struct rpcrdma_frwr *frwr;
+
+ frwr = &mr->frwr;
+
+ frwr->fr_cqe.done = frwr_wc_fastreg;
+ frwr->fr_regwr.wr.next = post_wr;
+ frwr->fr_regwr.wr.wr_cqe = &frwr->fr_cqe;
+ frwr->fr_regwr.wr.num_sge = 0;
+ frwr->fr_regwr.wr.opcode = IB_WR_REG_MR;
+ frwr->fr_regwr.wr.send_flags = 0;
+
+ post_wr = &frwr->fr_regwr.wr;
+ }
+
+ /* If ib_post_send fails, the next ->send_request for
+ * @req will queue these MWs for recovery.
+ */
+ return ib_post_send(ia->ri_id->qp, post_wr, &bad_wr);
}
/* Handle a remotely invalidated mr on the @mrs list
@@ -561,6 +579,7 @@ reset_mrs:
const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
.ro_map = frwr_op_map,
+ .ro_send = frwr_op_send,
.ro_reminv = frwr_op_reminv,
.ro_unmap_sync = frwr_op_unmap_sync,
.ro_recover_mr = frwr_op_recover_mr,
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index f0855a959a27..e8adad33d0bb 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -365,7 +365,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
false, &mr);
if (IS_ERR(seg))
- return PTR_ERR(seg);
+ goto out_maperr;
rpcrdma_mr_push(mr, &req->rl_registered);
if (encode_read_segment(xdr, mr, pos) < 0)
@@ -377,6 +377,11 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
} while (nsegs);
return 0;
+
+out_maperr:
+ if (PTR_ERR(seg) == -EAGAIN)
+ xprt_wait_for_buffer_space(rqst->rq_task, NULL);
+ return PTR_ERR(seg);
}
/* Register and XDR encode the Write list. Supports encoding a list
@@ -423,7 +428,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
true, &mr);
if (IS_ERR(seg))
- return PTR_ERR(seg);
+ goto out_maperr;
rpcrdma_mr_push(mr, &req->rl_registered);
if (encode_rdma_segment(xdr, mr) < 0)
@@ -440,6 +445,11 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
*segcount = cpu_to_be32(nchunks);
return 0;
+
+out_maperr:
+ if (PTR_ERR(seg) == -EAGAIN)
+ xprt_wait_for_buffer_space(rqst->rq_task, NULL);
+ return PTR_ERR(seg);
}
/* Register and XDR encode the Reply chunk. Supports encoding an array
@@ -481,7 +491,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
true, &mr);
if (IS_ERR(seg))
- return PTR_ERR(seg);
+ goto out_maperr;
rpcrdma_mr_push(mr, &req->rl_registered);
if (encode_rdma_segment(xdr, mr) < 0)
@@ -498,6 +508,11 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
*segcount = cpu_to_be32(nchunks);
return 0;
+
+out_maperr:
+ if (PTR_ERR(seg) == -EAGAIN)
+ xprt_wait_for_buffer_space(rqst->rq_task, NULL);
+ return PTR_ERR(seg);
}
/**
@@ -724,8 +739,8 @@ rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
* Returns:
* %0 if the RPC was sent successfully,
* %-ENOTCONN if the connection was lost,
- * %-EAGAIN if not enough pages are available for on-demand reply buffer,
- * %-ENOBUFS if no MRs are available to register chunks,
+ * %-EAGAIN if the caller should call again with the same arguments,
+ * %-ENOBUFS if the caller should call again after a delay,
* %-EMSGSIZE if the transport header is too small,
* %-EIO if a permanent problem occurred while marshaling.
*/
@@ -868,10 +883,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
return 0;
out_err:
- if (ret != -ENOBUFS) {
- pr_err("rpcrdma: header marshaling failed (%d)\n", ret);
- r_xprt->rx_stats.failed_marshal_count++;
- }
+ r_xprt->rx_stats.failed_marshal_count++;
return ret;
}
@@ -1366,7 +1378,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
- queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work);
+ queue_work(rpcrdma_receive_wq, &rep->rr_work);
return;
out_badstatus:
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 4b1ecfe979cf..cc1aad325496 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -52,7 +52,6 @@
#include <linux/slab.h>
#include <linux/seq_file.h>
#include <linux/sunrpc/addr.h>
-#include <linux/smp.h>
#include "xprt_rdma.h"
@@ -237,8 +236,6 @@ rpcrdma_connect_worker(struct work_struct *work)
struct rpc_xprt *xprt = &r_xprt->rx_xprt;
spin_lock_bh(&xprt->transport_lock);
- if (++xprt->connect_cookie == 0) /* maintain a reserved value */
- ++xprt->connect_cookie;
if (ep->rep_connected > 0) {
if (!xprt_test_and_set_connected(xprt))
xprt_wake_pending_tasks(xprt, 0);
@@ -540,29 +537,6 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
}
}
-/* Allocate a fixed-size buffer in which to construct and send the
- * RPC-over-RDMA header for this request.
- */
-static bool
-rpcrdma_get_rdmabuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
- gfp_t flags)
-{
- size_t size = RPCRDMA_HDRBUF_SIZE;
- struct rpcrdma_regbuf *rb;
-
- if (req->rl_rdmabuf)
- return true;
-
- rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags);
- if (IS_ERR(rb))
- return false;
-
- r_xprt->rx_stats.hardway_register_count += size;
- req->rl_rdmabuf = rb;
- xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb));
- return true;
-}
-
static bool
rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
size_t size, gfp_t flags)
@@ -644,15 +618,11 @@ xprt_rdma_allocate(struct rpc_task *task)
if (RPC_IS_SWAPPER(task))
flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
- if (!rpcrdma_get_rdmabuf(r_xprt, req, flags))
- goto out_fail;
if (!rpcrdma_get_sendbuf(r_xprt, req, rqst->rq_callsize, flags))
goto out_fail;
if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags))
goto out_fail;
- req->rl_cpu = smp_processor_id();
- req->rl_connect_cookie = 0; /* our reserved value */
rpcrdma_set_xprtdata(rqst, req);
rqst->rq_buffer = req->rl_sendbuf->rg_base;
rqst->rq_rbuffer = req->rl_recvbuf->rg_base;
@@ -694,7 +664,8 @@ xprt_rdma_free(struct rpc_task *task)
* Returns:
* %0 if the RPC message has been sent
* %-ENOTCONN if the caller should reconnect and call again
- * %-ENOBUFS if the caller should call again later
+ * %-EAGAIN if the caller should call again
+ * %-ENOBUFS if the caller should call again after a delay
* %-EIO if a permanent error occurred and the request was not
* sent. Do not try to send this message again.
*/
@@ -723,9 +694,9 @@ xprt_rdma_send_request(struct rpc_task *task)
rpcrdma_recv_buffer_get(req);
/* Must suppress retransmit to maintain credits */
- if (req->rl_connect_cookie == xprt->connect_cookie)
+ if (rqst->rq_connect_cookie == xprt->connect_cookie)
goto drop_connection;
- req->rl_connect_cookie = xprt->connect_cookie;
+ rqst->rq_xtime = ktime_get();
__set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
@@ -733,6 +704,12 @@ xprt_rdma_send_request(struct rpc_task *task)
rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len;
rqst->rq_bytes_sent = 0;
+
+ /* An RPC with no reply will throw off credit accounting,
+ * so drop the connection to reset the credit grant.
+ */
+ if (!rpc_reply_expected(task))
+ goto drop_connection;
return 0;
failed_marshal:
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index e6f84a6434a0..fe5eaca2d197 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -250,11 +250,11 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
wait_for_completion(&ia->ri_remove_done);
ia->ri_id = NULL;
- ia->ri_pd = NULL;
ia->ri_device = NULL;
/* Return 1 to ensure the core destroys the id. */
return 1;
case RDMA_CM_EVENT_ESTABLISHED:
+ ++xprt->rx_xprt.connect_cookie;
connstate = 1;
rpcrdma_update_connect_private(xprt, &event->param.conn);
goto connected;
@@ -273,6 +273,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
connstate = -EAGAIN;
goto connected;
case RDMA_CM_EVENT_DISCONNECTED:
+ ++xprt->rx_xprt.connect_cookie;
connstate = -ECONNABORTED;
connected:
xprt->rx_buf.rb_credits = 1;
@@ -445,7 +446,9 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
ia->ri_id->qp = NULL;
}
ib_free_cq(ep->rep_attr.recv_cq);
+ ep->rep_attr.recv_cq = NULL;
ib_free_cq(ep->rep_attr.send_cq);
+ ep->rep_attr.send_cq = NULL;
/* The ULP is responsible for ensuring all DMA
* mappings and MRs are gone.
@@ -458,6 +461,8 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
rpcrdma_dma_unmap_regbuf(req->rl_recvbuf);
}
rpcrdma_mrs_destroy(buf);
+ ib_dealloc_pd(ia->ri_pd);
+ ia->ri_pd = NULL;
/* Allow waiters to continue */
complete(&ia->ri_remove_done);
@@ -589,11 +594,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
/* Client offers RDMA Read but does not initiate */
ep->rep_remote_cma.initiator_depth = 0;
- if (ia->ri_device->attrs.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
- ep->rep_remote_cma.responder_resources = 32;
- else
- ep->rep_remote_cma.responder_resources =
- ia->ri_device->attrs.max_qp_rd_atom;
+ ep->rep_remote_cma.responder_resources =
+ min_t(int, U8_MAX, ia->ri_device->attrs.max_qp_rd_atom);
/* Limit transport retries so client can detect server
* GID changes quickly. RPC layer handles re-establishing
@@ -628,14 +630,16 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
cancel_delayed_work_sync(&ep->rep_connect_worker);
- if (ia->ri_id->qp) {
+ if (ia->ri_id && ia->ri_id->qp) {
rpcrdma_ep_disconnect(ep, ia);
rdma_destroy_qp(ia->ri_id);
ia->ri_id->qp = NULL;
}
- ib_free_cq(ep->rep_attr.recv_cq);
- ib_free_cq(ep->rep_attr.send_cq);
+ if (ep->rep_attr.recv_cq)
+ ib_free_cq(ep->rep_attr.recv_cq);
+ if (ep->rep_attr.send_cq)
+ ib_free_cq(ep->rep_attr.send_cq);
}
/* Re-establish a connection after a device removal event.
@@ -1024,7 +1028,7 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
LIST_HEAD(free);
LIST_HEAD(all);
- for (count = 0; count < 32; count++) {
+ for (count = 0; count < 3; count++) {
struct rpcrdma_mr *mr;
int rc;
@@ -1049,8 +1053,9 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
list_splice(&all, &buf->rb_all);
r_xprt->rx_stats.mrs_allocated += count;
spin_unlock(&buf->rb_mrlock);
-
trace_xprtrdma_createmrs(r_xprt, count);
+
+ xprt_write_space(&r_xprt->rx_xprt);
}
static void
@@ -1068,17 +1073,27 @@ struct rpcrdma_req *
rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buffer = &r_xprt->rx_buf;
+ struct rpcrdma_regbuf *rb;
struct rpcrdma_req *req;
req = kzalloc(sizeof(*req), GFP_KERNEL);
if (req == NULL)
return ERR_PTR(-ENOMEM);
+ rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
+ DMA_TO_DEVICE, GFP_KERNEL);
+ if (IS_ERR(rb)) {
+ kfree(req);
+ return ERR_PTR(-ENOMEM);
+ }
+ req->rl_rdmabuf = rb;
+ xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb));
+ req->rl_buffer = buffer;
+ INIT_LIST_HEAD(&req->rl_registered);
+
spin_lock(&buffer->rb_reqslock);
list_add(&req->rl_all, &buffer->rb_allreqs);
spin_unlock(&buffer->rb_reqslock);
- req->rl_buffer = &r_xprt->rx_buf;
- INIT_LIST_HEAD(&req->rl_registered);
return req;
}
@@ -1535,7 +1550,6 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
struct rpcrdma_req *req)
{
struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr;
- struct ib_send_wr *send_wr_fail;
int rc;
if (req->rl_reply) {
@@ -1554,7 +1568,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
--ep->rep_send_count;
}
- rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail);
+ rc = ia->ri_ops->ro_send(ia, req);
trace_xprtrdma_post_send(req, rc);
if (rc)
return -ENOTCONN;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 69883a960a3f..3d3b423fa9c1 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -334,8 +334,6 @@ enum {
struct rpcrdma_buffer;
struct rpcrdma_req {
struct list_head rl_list;
- int rl_cpu;
- unsigned int rl_connect_cookie;
struct rpcrdma_buffer *rl_buffer;
struct rpcrdma_rep *rl_reply;
struct xdr_stream rl_stream;
@@ -474,6 +472,8 @@ struct rpcrdma_memreg_ops {
(*ro_map)(struct rpcrdma_xprt *,
struct rpcrdma_mr_seg *, int, bool,
struct rpcrdma_mr **);
+ int (*ro_send)(struct rpcrdma_ia *ia,
+ struct rpcrdma_req *req);
void (*ro_reminv)(struct rpcrdma_rep *rep,
struct list_head *mrs);
void (*ro_unmap_sync)(struct rpcrdma_xprt *,
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 956e29c1438d..c8902f11efdd 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -527,6 +527,7 @@ static int xs_local_send_request(struct rpc_task *task)
xs_pktdump("packet data:",
req->rq_svec->iov_base, req->rq_svec->iov_len);
+ req->rq_xtime = ktime_get();
status = xs_sendpages(transport->sock, NULL, 0, xdr, req->rq_bytes_sent,
true, &sent);
dprintk("RPC: %s(%u) = %d\n",
@@ -589,6 +590,7 @@ static int xs_udp_send_request(struct rpc_task *task)
if (!xprt_bound(xprt))
return -ENOTCONN;
+ req->rq_xtime = ktime_get();
status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen,
xdr, req->rq_bytes_sent, true, &sent);
@@ -678,6 +680,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
/* Continue transmitting the packet/record. We must be careful
* to cope with writespace callbacks arriving _after_ we have
* called sendmsg(). */
+ req->rq_xtime = ktime_get();
while (1) {
sent = 0;
status = xs_sendpages(transport->sock, NULL, 0, xdr,
@@ -1060,6 +1063,7 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
if (!rovr)
goto out_unlock;
xprt_pin_rqst(rovr);
+ xprt_update_rtt(rovr->rq_task);
spin_unlock(&xprt->recv_lock);
task = rovr->rq_task;
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 6f98b56dd48e..f29549de9245 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1950,6 +1950,7 @@ out:
int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = genl_info_net(info);
+ struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1];
struct tipc_nl_msg msg;
char *name;
int err;
@@ -1957,9 +1958,19 @@ int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info)
msg.portid = info->snd_portid;
msg.seq = info->snd_seq;
- if (!info->attrs[TIPC_NLA_LINK_NAME])
+ if (!info->attrs[TIPC_NLA_LINK])
return -EINVAL;
- name = nla_data(info->attrs[TIPC_NLA_LINK_NAME]);
+
+ err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX,
+ info->attrs[TIPC_NLA_LINK],
+ tipc_nl_link_policy, info->extack);
+ if (err)
+ return err;
+
+ if (!attrs[TIPC_NLA_LINK_NAME])
+ return -EINVAL;
+
+ name = nla_data(attrs[TIPC_NLA_LINK_NAME]);
msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
if (!msg.skb)
@@ -2244,7 +2255,7 @@ int tipc_nl_node_dump_monitor(struct sk_buff *skb, struct netlink_callback *cb)
rtnl_lock();
for (bearer_id = prev_bearer; bearer_id < MAX_BEARERS; bearer_id++) {
- err = __tipc_nl_add_monitor(net, &msg, prev_bearer);
+ err = __tipc_nl_add_monitor(net, &msg, bearer_id);
if (err)
break;
}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 252a52ae0893..6be21575503a 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1516,10 +1516,10 @@ static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb)
srcaddr->sock.family = AF_TIPC;
srcaddr->sock.addrtype = TIPC_ADDR_ID;
+ srcaddr->sock.scope = 0;
srcaddr->sock.addr.id.ref = msg_origport(hdr);
srcaddr->sock.addr.id.node = msg_orignode(hdr);
srcaddr->sock.addr.name.domain = 0;
- srcaddr->sock.scope = 0;
m->msg_namelen = sizeof(struct sockaddr_tipc);
if (!msg_in_group(hdr))
@@ -1528,6 +1528,7 @@ static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb)
/* Group message users may also want to know sending member's id */
srcaddr->member.family = AF_TIPC;
srcaddr->member.addrtype = TIPC_ADDR_NAME;
+ srcaddr->member.scope = 0;
srcaddr->member.addr.name.name.type = msg_nametype(hdr);
srcaddr->member.addr.name.name.instance = TIPC_SKB_CB(skb)->orig_member;
srcaddr->member.addr.name.domain = 0;
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 0d379970960e..20cd93be6236 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -114,6 +114,7 @@ int tls_push_sg(struct sock *sk,
size = sg->length - offset;
offset += sg->offset;
+ ctx->in_tcp_sendpages = true;
while (1) {
if (sg_is_last(sg))
sendpage_flags = flags;
@@ -134,6 +135,7 @@ retry:
offset -= sg->offset;
ctx->partially_sent_offset = offset;
ctx->partially_sent_record = (void *)sg;
+ ctx->in_tcp_sendpages = false;
return ret;
}
@@ -148,6 +150,8 @@ retry:
}
clear_bit(TLS_PENDING_CLOSED_RECORD, &ctx->flags);
+ ctx->in_tcp_sendpages = false;
+ ctx->sk_write_space(sk);
return 0;
}
@@ -217,6 +221,10 @@ static void tls_write_space(struct sock *sk)
{
struct tls_context *ctx = tls_get_ctx(sk);
+ /* We are already sending pages, ignore notification */
+ if (ctx->in_tcp_sendpages)
+ return;
+
if (!sk->sk_write_pending && tls_is_pending_closed_record(ctx)) {
gfp_t sk_allocation = sk->sk_allocation;
int rc;
@@ -241,16 +249,13 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
struct tls_context *ctx = tls_get_ctx(sk);
long timeo = sock_sndtimeo(sk, 0);
void (*sk_proto_close)(struct sock *sk, long timeout);
+ bool free_ctx = false;
lock_sock(sk);
sk_proto_close = ctx->sk_proto_close;
- if (ctx->conf == TLS_HW_RECORD)
- goto skip_tx_cleanup;
-
- if (ctx->conf == TLS_BASE) {
- kfree(ctx);
- ctx = NULL;
+ if (ctx->conf == TLS_BASE || ctx->conf == TLS_HW_RECORD) {
+ free_ctx = true;
goto skip_tx_cleanup;
}
@@ -287,7 +292,7 @@ skip_tx_cleanup:
/* free ctx for TLS_HW_RECORD, used by tcp_set_state
* for sk->sk_prot->unhash [tls_hw_unhash]
*/
- if (ctx && ctx->conf == TLS_HW_RECORD)
+ if (free_ctx)
kfree(ctx);
}
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index f9d2f2233f09..6c177ae7a6d9 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -2175,6 +2175,12 @@ struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)
return afinfo;
}
+void xfrm_flush_gc(void)
+{
+ flush_work(&xfrm_state_gc_work);
+}
+EXPORT_SYMBOL(xfrm_flush_gc);
+
/* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
void xfrm_state_delete_tunnel(struct xfrm_state *x)
{