summaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/datagram.c8
-rw-r--r--net/core/dev.c219
-rw-r--r--net/core/devlink.c862
-rw-r--r--net/core/drop_monitor.c5
-rw-r--r--net/core/ethtool.c4
-rw-r--r--net/core/fib_rules.c30
-rw-r--r--net/core/filter.c167
-rw-r--r--net/core/flow.c29
-rw-r--r--net/core/flow_dissector.c445
-rw-r--r--net/core/gro_cells.c2
-rw-r--r--net/core/lwt_bpf.c5
-rw-r--r--net/core/lwtunnel.c9
-rw-r--r--net/core/neighbour.c49
-rw-r--r--net/core/net_namespace.c13
-rw-r--r--net/core/netprio_cgroup.c1
-rw-r--r--net/core/rtnetlink.c156
-rw-r--r--net/core/secure_seq.c13
-rw-r--r--net/core/skbuff.c2
-rw-r--r--net/core/sock.c154
-rw-r--r--net/core/sock_diag.c15
-rw-r--r--net/core/sock_reuseport.c4
-rw-r--r--net/core/sysctl_net_core.c8
-rw-r--r--net/core/utils.c2
23 files changed, 1735 insertions, 467 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
index d797baa69e43..db1866f2ffcf 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -256,8 +256,12 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
}
spin_unlock_irqrestore(&queue->lock, cpu_flags);
- } while (sk_can_busy_loop(sk) &&
- sk_busy_loop(sk, flags & MSG_DONTWAIT));
+
+ if (!sk_can_busy_loop(sk))
+ break;
+
+ sk_busy_loop(sk, flags & MSG_DONTWAIT);
+ } while (!skb_queue_empty(&sk->sk_receive_queue));
error = -EAGAIN;
diff --git a/net/core/dev.c b/net/core/dev.c
index c57878be09d2..d07aa5ffb511 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -95,6 +95,7 @@
#include <linux/notifier.h>
#include <linux/skbuff.h>
#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/busy_poll.h>
@@ -2975,6 +2976,9 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
__skb_linearize(skb))
goto out_kfree_skb;
+ if (validate_xmit_xfrm(skb, features))
+ goto out_kfree_skb;
+
/* If packet is not checksummed and device does not
* support checksumming for this protocol, complete
* checksumming here.
@@ -3444,6 +3448,7 @@ EXPORT_SYMBOL(netdev_max_backlog);
int netdev_tstamp_prequeue __read_mostly = 1;
int netdev_budget __read_mostly = 300;
+unsigned int __read_mostly netdev_budget_usecs = 2000;
int weight_p __read_mostly = 64; /* old backlog weight */
int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */
int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */
@@ -4250,6 +4255,125 @@ static int __netif_receive_skb(struct sk_buff *skb)
return ret;
}
+static struct static_key generic_xdp_needed __read_mostly;
+
+static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp)
+{
+ struct bpf_prog *new = xdp->prog;
+ int ret = 0;
+
+ switch (xdp->command) {
+ case XDP_SETUP_PROG: {
+ struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
+
+ rcu_assign_pointer(dev->xdp_prog, new);
+ if (old)
+ bpf_prog_put(old);
+
+ if (old && !new) {
+ static_key_slow_dec(&generic_xdp_needed);
+ } else if (new && !old) {
+ static_key_slow_inc(&generic_xdp_needed);
+ dev_disable_lro(dev);
+ }
+ break;
+ }
+
+ case XDP_QUERY_PROG:
+ xdp->prog_attached = !!rcu_access_pointer(dev->xdp_prog);
+ break;
+
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static u32 netif_receive_generic_xdp(struct sk_buff *skb,
+ struct bpf_prog *xdp_prog)
+{
+ struct xdp_buff xdp;
+ u32 act = XDP_DROP;
+ void *orig_data;
+ int hlen, off;
+ u32 mac_len;
+
+ /* Reinjected packets coming from act_mirred or similar should
+ * not get XDP generic processing.
+ */
+ if (skb_cloned(skb))
+ return XDP_PASS;
+
+ if (skb_linearize(skb))
+ goto do_drop;
+
+ /* The XDP program wants to see the packet starting at the MAC
+ * header.
+ */
+ mac_len = skb->data - skb_mac_header(skb);
+ hlen = skb_headlen(skb) + mac_len;
+ xdp.data = skb->data - mac_len;
+ xdp.data_end = xdp.data + hlen;
+ xdp.data_hard_start = skb->data - skb_headroom(skb);
+ orig_data = xdp.data;
+
+ act = bpf_prog_run_xdp(xdp_prog, &xdp);
+
+ off = xdp.data - orig_data;
+ if (off > 0)
+ __skb_pull(skb, off);
+ else if (off < 0)
+ __skb_push(skb, -off);
+
+ switch (act) {
+ case XDP_TX:
+ __skb_push(skb, mac_len);
+ /* fall through */
+ case XDP_PASS:
+ break;
+
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ /* fall through */
+ case XDP_ABORTED:
+ trace_xdp_exception(skb->dev, xdp_prog, act);
+ /* fall through */
+ case XDP_DROP:
+ do_drop:
+ kfree_skb(skb);
+ break;
+ }
+
+ return act;
+}
+
+/* When doing generic XDP we have to bypass the qdisc layer and the
+ * network taps in order to match in-driver-XDP behavior.
+ */
+static void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
+{
+ struct net_device *dev = skb->dev;
+ struct netdev_queue *txq;
+ bool free_skb = true;
+ int cpu, rc;
+
+ txq = netdev_pick_tx(dev, skb, NULL);
+ cpu = smp_processor_id();
+ HARD_TX_LOCK(dev, txq, cpu);
+ if (!netif_xmit_stopped(txq)) {
+ rc = netdev_start_xmit(skb, dev, txq, 0);
+ if (dev_xmit_complete(rc))
+ free_skb = false;
+ }
+ HARD_TX_UNLOCK(dev, txq);
+ if (free_skb) {
+ trace_xdp_exception(dev, xdp_prog, XDP_TX);
+ kfree_skb(skb);
+ }
+}
+
static int netif_receive_skb_internal(struct sk_buff *skb)
{
int ret;
@@ -4261,6 +4385,21 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
rcu_read_lock();
+ if (static_key_false(&generic_xdp_needed)) {
+ struct bpf_prog *xdp_prog = rcu_dereference(skb->dev->xdp_prog);
+
+ if (xdp_prog) {
+ u32 act = netif_receive_generic_xdp(skb, xdp_prog);
+
+ if (act != XDP_PASS) {
+ rcu_read_unlock();
+ if (act == XDP_TX)
+ generic_xdp_tx(skb, xdp_prog);
+ return NET_RX_DROP;
+ }
+ }
+ }
+
#ifdef CONFIG_RPS
if (static_key_false(&rps_needed)) {
struct rps_dev_flow voidflow, *rflow = &voidflow;
@@ -4493,7 +4632,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
enum gro_result ret;
int grow;
- if (!(skb->dev->features & NETIF_F_GRO))
+ if (netif_elide_gro(skb->dev))
goto normal;
if (skb->csum_bad)
@@ -5063,27 +5202,28 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
do_softirq();
}
-bool sk_busy_loop(struct sock *sk, int nonblock)
+void napi_busy_loop(unsigned int napi_id,
+ bool (*loop_end)(void *, unsigned long),
+ void *loop_end_arg)
{
- unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0;
+ unsigned long start_time = loop_end ? busy_loop_current_time() : 0;
int (*napi_poll)(struct napi_struct *napi, int budget);
void *have_poll_lock = NULL;
struct napi_struct *napi;
- int rc;
restart:
- rc = false;
napi_poll = NULL;
rcu_read_lock();
- napi = napi_by_id(sk->sk_napi_id);
+ napi = napi_by_id(napi_id);
if (!napi)
goto out;
preempt_disable();
for (;;) {
- rc = 0;
+ int work = 0;
+
local_bh_disable();
if (!napi_poll) {
unsigned long val = READ_ONCE(napi->state);
@@ -5101,16 +5241,15 @@ restart:
have_poll_lock = netpoll_poll_lock(napi);
napi_poll = napi->poll;
}
- rc = napi_poll(napi, BUSY_POLL_BUDGET);
- trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
+ work = napi_poll(napi, BUSY_POLL_BUDGET);
+ trace_napi_poll(napi, work, BUSY_POLL_BUDGET);
count:
- if (rc > 0)
- __NET_ADD_STATS(sock_net(sk),
- LINUX_MIB_BUSYPOLLRXPACKETS, rc);
+ if (work > 0)
+ __NET_ADD_STATS(dev_net(napi->dev),
+ LINUX_MIB_BUSYPOLLRXPACKETS, work);
local_bh_enable();
- if (nonblock || !skb_queue_empty(&sk->sk_receive_queue) ||
- busy_loop_timeout(end_time))
+ if (!loop_end || loop_end(loop_end_arg, start_time))
break;
if (unlikely(need_resched())) {
@@ -5119,9 +5258,8 @@ count:
preempt_enable();
rcu_read_unlock();
cond_resched();
- rc = !skb_queue_empty(&sk->sk_receive_queue);
- if (rc || busy_loop_timeout(end_time))
- return rc;
+ if (loop_end(loop_end_arg, start_time))
+ return;
goto restart;
}
cpu_relax();
@@ -5129,12 +5267,10 @@ count:
if (napi_poll)
busy_poll_stop(napi, have_poll_lock);
preempt_enable();
- rc = !skb_queue_empty(&sk->sk_receive_queue);
out:
rcu_read_unlock();
- return rc;
}
-EXPORT_SYMBOL(sk_busy_loop);
+EXPORT_SYMBOL(napi_busy_loop);
#endif /* CONFIG_NET_RX_BUSY_POLL */
@@ -5146,10 +5282,10 @@ static void napi_hash_add(struct napi_struct *napi)
spin_lock(&napi_hash_lock);
- /* 0..NR_CPUS+1 range is reserved for sender_cpu use */
+ /* 0..NR_CPUS range is reserved for sender_cpu use */
do {
- if (unlikely(++napi_gen_id < NR_CPUS + 1))
- napi_gen_id = NR_CPUS + 1;
+ if (unlikely(++napi_gen_id < MIN_NAPI_ID))
+ napi_gen_id = MIN_NAPI_ID;
} while (napi_by_id(napi_gen_id));
napi->napi_id = napi_gen_id;
@@ -5313,7 +5449,8 @@ out_unlock:
static __latent_entropy void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
- unsigned long time_limit = jiffies + 2;
+ unsigned long time_limit = jiffies +
+ usecs_to_jiffies(netdev_budget_usecs);
int budget = netdev_budget;
LIST_HEAD(list);
LIST_HEAD(repoll);
@@ -6717,13 +6854,16 @@ EXPORT_SYMBOL(dev_change_proto_down);
/**
* dev_change_xdp_fd - set or clear a bpf program for a device rx path
* @dev: device
+ * @extack: netlink extended ack
* @fd: new program fd or negative value to clear
* @flags: xdp-related flags
*
* Set or clear a bpf program for a device
*/
-int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags)
+int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
+ int fd, u32 flags)
{
+ int (*xdp_op)(struct net_device *dev, struct netdev_xdp *xdp);
const struct net_device_ops *ops = dev->netdev_ops;
struct bpf_prog *prog = NULL;
struct netdev_xdp xdp;
@@ -6731,14 +6871,16 @@ int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags)
ASSERT_RTNL();
- if (!ops->ndo_xdp)
- return -EOPNOTSUPP;
+ xdp_op = ops->ndo_xdp;
+ if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE))
+ xdp_op = generic_xdp_install;
+
if (fd >= 0) {
if (flags & XDP_FLAGS_UPDATE_IF_NOEXIST) {
memset(&xdp, 0, sizeof(xdp));
xdp.command = XDP_QUERY_PROG;
- err = ops->ndo_xdp(dev, &xdp);
+ err = xdp_op(dev, &xdp);
if (err < 0)
return err;
if (xdp.prog_attached)
@@ -6752,9 +6894,10 @@ int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags)
memset(&xdp, 0, sizeof(xdp));
xdp.command = XDP_SETUP_PROG;
+ xdp.extack = extack;
xdp.prog = prog;
- err = ops->ndo_xdp(dev, &xdp);
+ err = xdp_op(dev, &xdp);
if (err < 0 && prog)
bpf_prog_put(prog);
@@ -7105,13 +7248,10 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
else
netif_dormant_off(dev);
- if (netif_carrier_ok(rootdev)) {
- if (!netif_carrier_ok(dev))
- netif_carrier_on(dev);
- } else {
- if (netif_carrier_ok(dev))
- netif_carrier_off(dev);
- }
+ if (netif_carrier_ok(rootdev))
+ netif_carrier_on(dev);
+ else
+ netif_carrier_off(dev);
}
EXPORT_SYMBOL(netif_stacked_transfer_operstate);
@@ -7794,6 +7934,7 @@ EXPORT_SYMBOL(alloc_netdev_mqs);
void free_netdev(struct net_device *dev)
{
struct napi_struct *p, *n;
+ struct bpf_prog *prog;
might_sleep();
netif_free_tx_queues(dev);
@@ -7812,6 +7953,12 @@ void free_netdev(struct net_device *dev)
free_percpu(dev->pcpu_refcnt);
dev->pcpu_refcnt = NULL;
+ prog = rcu_dereference_protected(dev->xdp_prog, 1);
+ if (prog) {
+ bpf_prog_put(prog);
+ static_key_slow_dec(&generic_xdp_needed);
+ }
+
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED) {
netdev_freemem(dev);
diff --git a/net/core/devlink.c b/net/core/devlink.c
index e9c1e6acfb6d..b0b87a292e7c 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -1397,10 +1397,10 @@ static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
u32 seq, int flags)
{
const struct devlink_ops *ops = devlink->ops;
+ u8 inline_mode, encap_mode;
void *hdr;
int err = 0;
u16 mode;
- u8 inline_mode;
hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
if (!hdr)
@@ -1429,6 +1429,15 @@ static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
goto nla_put_failure;
}
+ if (ops->eswitch_encap_mode_get) {
+ err = ops->eswitch_encap_mode_get(devlink, &encap_mode);
+ if (err)
+ goto nla_put_failure;
+ err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_ENCAP_MODE, encap_mode);
+ if (err)
+ goto nla_put_failure;
+ }
+
genlmsg_end(msg, hdr);
return 0;
@@ -1468,9 +1477,9 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb,
{
struct devlink *devlink = info->user_ptr[0];
const struct devlink_ops *ops = devlink->ops;
- u16 mode;
- u8 inline_mode;
+ u8 inline_mode, encap_mode;
int err = 0;
+ u16 mode;
if (!ops)
return -EOPNOTSUPP;
@@ -1494,7 +1503,695 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb,
return err;
}
+ if (info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]) {
+ if (!ops->eswitch_encap_mode_set)
+ return -EOPNOTSUPP;
+ encap_mode = nla_get_u8(info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]);
+ err = ops->eswitch_encap_mode_set(devlink, encap_mode);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int devlink_dpipe_match_put(struct sk_buff *skb,
+ struct devlink_dpipe_match *match)
+{
+ struct devlink_dpipe_header *header = match->header;
+ struct devlink_dpipe_field *field = &header->fields[match->field_id];
+ struct nlattr *match_attr;
+
+ match_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_MATCH);
+ if (!match_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_MATCH_TYPE, match->type) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_INDEX, match->header_index) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
+ nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, match_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, match_attr);
+ return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_match_put);
+
+static int devlink_dpipe_matches_put(struct devlink_dpipe_table *table,
+ struct sk_buff *skb)
+{
+ struct nlattr *matches_attr;
+
+ matches_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE_MATCHES);
+ if (!matches_attr)
+ return -EMSGSIZE;
+
+ if (table->table_ops->matches_dump(table->priv, skb))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, matches_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, matches_attr);
+ return -EMSGSIZE;
+}
+
+int devlink_dpipe_action_put(struct sk_buff *skb,
+ struct devlink_dpipe_action *action)
+{
+ struct devlink_dpipe_header *header = action->header;
+ struct devlink_dpipe_field *field = &header->fields[action->field_id];
+ struct nlattr *action_attr;
+
+ action_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_ACTION);
+ if (!action_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_ACTION_TYPE, action->type) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_INDEX, action->header_index) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
+ nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, action_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, action_attr);
+ return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_action_put);
+
+static int devlink_dpipe_actions_put(struct devlink_dpipe_table *table,
+ struct sk_buff *skb)
+{
+ struct nlattr *actions_attr;
+
+ actions_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE_ACTIONS);
+ if (!actions_attr)
+ return -EMSGSIZE;
+
+ if (table->table_ops->actions_dump(table->priv, skb))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, actions_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, actions_attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_dpipe_table_put(struct sk_buff *skb,
+ struct devlink_dpipe_table *table)
+{
+ struct nlattr *table_attr;
+
+ table_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_TABLE_NAME, table->name) ||
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_SIZE, table->size,
+ DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+ if (nla_put_u8(skb, DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED,
+ table->counters_enabled))
+ goto nla_put_failure;
+
+ if (devlink_dpipe_matches_put(table, skb))
+ goto nla_put_failure;
+
+ if (devlink_dpipe_actions_put(table, skb))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, table_attr);
return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, table_attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_dpipe_send_and_alloc_skb(struct sk_buff **pskb,
+ struct genl_info *info)
+{
+ int err;
+
+ if (*pskb) {
+ err = genlmsg_reply(*pskb, info);
+ if (err)
+ return err;
+ }
+ *pskb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!*pskb)
+ return -ENOMEM;
+ return 0;
+}
+
+static int devlink_dpipe_tables_fill(struct genl_info *info,
+ enum devlink_command cmd, int flags,
+ struct list_head *dpipe_tables,
+ const char *table_name)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_dpipe_table *table;
+ struct nlattr *tables_attr;
+ struct sk_buff *skb = NULL;
+ struct nlmsghdr *nlh;
+ bool incomplete;
+ void *hdr;
+ int i;
+ int err;
+
+ table = list_first_entry(dpipe_tables,
+ struct devlink_dpipe_table, list);
+start_again:
+ err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+ if (err)
+ return err;
+
+ hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+ &devlink_nl_family, NLM_F_MULTI, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(skb, devlink))
+ goto nla_put_failure;
+ tables_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLES);
+ if (!tables_attr)
+ goto nla_put_failure;
+
+ i = 0;
+ incomplete = false;
+ list_for_each_entry_from(table, dpipe_tables, list) {
+ if (!table_name) {
+ err = devlink_dpipe_table_put(skb, table);
+ if (err) {
+ if (!i)
+ goto err_table_put;
+ incomplete = true;
+ break;
+ }
+ } else {
+ if (!strcmp(table->name, table_name)) {
+ err = devlink_dpipe_table_put(skb, table);
+ if (err)
+ break;
+ }
+ }
+ i++;
+ }
+
+ nla_nest_end(skb, tables_attr);
+ genlmsg_end(skb, hdr);
+ if (incomplete)
+ goto start_again;
+
+send_done:
+ nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
+ NLMSG_DONE, 0, flags | NLM_F_MULTI);
+ if (!nlh) {
+ err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+ if (err)
+ goto err_skb_send_alloc;
+ goto send_done;
+ }
+
+ return genlmsg_reply(skb, info);
+
+nla_put_failure:
+ err = -EMSGSIZE;
+err_table_put:
+err_skb_send_alloc:
+ genlmsg_cancel(skb, hdr);
+ nlmsg_free(skb);
+ return err;
+}
+
+static int devlink_nl_cmd_dpipe_table_get(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ const char *table_name = NULL;
+
+ if (info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME])
+ table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
+
+ return devlink_dpipe_tables_fill(info, DEVLINK_CMD_DPIPE_TABLE_GET, 0,
+ &devlink->dpipe_table_list,
+ table_name);
+}
+
+static int devlink_dpipe_value_put(struct sk_buff *skb,
+ struct devlink_dpipe_value *value)
+{
+ if (nla_put(skb, DEVLINK_ATTR_DPIPE_VALUE,
+ value->value_size, value->value))
+ return -EMSGSIZE;
+ if (value->mask)
+ if (nla_put(skb, DEVLINK_ATTR_DPIPE_VALUE_MASK,
+ value->value_size, value->mask))
+ return -EMSGSIZE;
+ if (value->mapping_valid)
+ if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_VALUE_MAPPING,
+ value->mapping_value))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int devlink_dpipe_action_value_put(struct sk_buff *skb,
+ struct devlink_dpipe_value *value)
+{
+ if (!value->action)
+ return -EINVAL;
+ if (devlink_dpipe_action_put(skb, value->action))
+ return -EMSGSIZE;
+ if (devlink_dpipe_value_put(skb, value))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int devlink_dpipe_action_values_put(struct sk_buff *skb,
+ struct devlink_dpipe_value *values,
+ unsigned int values_count)
+{
+ struct nlattr *action_attr;
+ int i;
+ int err;
+
+ for (i = 0; i < values_count; i++) {
+ action_attr = nla_nest_start(skb,
+ DEVLINK_ATTR_DPIPE_ACTION_VALUE);
+ if (!action_attr)
+ return -EMSGSIZE;
+ err = devlink_dpipe_action_value_put(skb, &values[i]);
+ if (err)
+ goto err_action_value_put;
+ nla_nest_end(skb, action_attr);
+ }
+ return 0;
+
+err_action_value_put:
+ nla_nest_cancel(skb, action_attr);
+ return err;
+}
+
+static int devlink_dpipe_match_value_put(struct sk_buff *skb,
+ struct devlink_dpipe_value *value)
+{
+ if (!value->match)
+ return -EINVAL;
+ if (devlink_dpipe_match_put(skb, value->match))
+ return -EMSGSIZE;
+ if (devlink_dpipe_value_put(skb, value))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int devlink_dpipe_match_values_put(struct sk_buff *skb,
+ struct devlink_dpipe_value *values,
+ unsigned int values_count)
+{
+ struct nlattr *match_attr;
+ int i;
+ int err;
+
+ for (i = 0; i < values_count; i++) {
+ match_attr = nla_nest_start(skb,
+ DEVLINK_ATTR_DPIPE_MATCH_VALUE);
+ if (!match_attr)
+ return -EMSGSIZE;
+ err = devlink_dpipe_match_value_put(skb, &values[i]);
+ if (err)
+ goto err_match_value_put;
+ nla_nest_end(skb, match_attr);
+ }
+ return 0;
+
+err_match_value_put:
+ nla_nest_cancel(skb, match_attr);
+ return err;
+}
+
+static int devlink_dpipe_entry_put(struct sk_buff *skb,
+ struct devlink_dpipe_entry *entry)
+{
+ struct nlattr *entry_attr, *matches_attr, *actions_attr;
+ int err;
+
+ entry_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_ENTRY);
+ if (!entry_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_INDEX, entry->index,
+ DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+ if (entry->counter_valid)
+ if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_COUNTER,
+ entry->counter, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ matches_attr = nla_nest_start(skb,
+ DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES);
+ if (!matches_attr)
+ goto nla_put_failure;
+
+ err = devlink_dpipe_match_values_put(skb, entry->match_values,
+ entry->match_values_count);
+ if (err) {
+ nla_nest_cancel(skb, matches_attr);
+ goto err_match_values_put;
+ }
+ nla_nest_end(skb, matches_attr);
+
+ actions_attr = nla_nest_start(skb,
+ DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES);
+ if (!actions_attr)
+ goto nla_put_failure;
+
+ err = devlink_dpipe_action_values_put(skb, entry->action_values,
+ entry->action_values_count);
+ if (err) {
+ nla_nest_cancel(skb, actions_attr);
+ goto err_action_values_put;
+ }
+ nla_nest_end(skb, actions_attr);
+
+ nla_nest_end(skb, entry_attr);
+ return 0;
+
+nla_put_failure:
+ err = -EMSGSIZE;
+err_match_values_put:
+err_action_values_put:
+ nla_nest_cancel(skb, entry_attr);
+ return err;
+}
+
+static struct devlink_dpipe_table *
+devlink_dpipe_table_find(struct list_head *dpipe_tables,
+ const char *table_name)
+{
+ struct devlink_dpipe_table *table;
+
+ list_for_each_entry_rcu(table, dpipe_tables, list) {
+ if (!strcmp(table->name, table_name))
+ return table;
+ }
+ return NULL;
+}
+
+int devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx)
+{
+ struct devlink *devlink;
+ int err;
+
+ err = devlink_dpipe_send_and_alloc_skb(&dump_ctx->skb,
+ dump_ctx->info);
+ if (err)
+ return err;
+
+ dump_ctx->hdr = genlmsg_put(dump_ctx->skb,
+ dump_ctx->info->snd_portid,
+ dump_ctx->info->snd_seq,
+ &devlink_nl_family, NLM_F_MULTI,
+ dump_ctx->cmd);
+ if (!dump_ctx->hdr)
+ goto nla_put_failure;
+
+ devlink = dump_ctx->info->user_ptr[0];
+ if (devlink_nl_put_handle(dump_ctx->skb, devlink))
+ goto nla_put_failure;
+ dump_ctx->nest = nla_nest_start(dump_ctx->skb,
+ DEVLINK_ATTR_DPIPE_ENTRIES);
+ if (!dump_ctx->nest)
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(dump_ctx->skb, dump_ctx->hdr);
+ nlmsg_free(dump_ctx->skb);
+ return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_prepare);
+
+int devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx,
+ struct devlink_dpipe_entry *entry)
+{
+ return devlink_dpipe_entry_put(dump_ctx->skb, entry);
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_append);
+
+int devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx)
+{
+ nla_nest_end(dump_ctx->skb, dump_ctx->nest);
+ genlmsg_end(dump_ctx->skb, dump_ctx->hdr);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_close);
+
+static int devlink_dpipe_entries_fill(struct genl_info *info,
+ enum devlink_command cmd, int flags,
+ struct devlink_dpipe_table *table)
+{
+ struct devlink_dpipe_dump_ctx dump_ctx;
+ struct nlmsghdr *nlh;
+ int err;
+
+ dump_ctx.skb = NULL;
+ dump_ctx.cmd = cmd;
+ dump_ctx.info = info;
+
+ err = table->table_ops->entries_dump(table->priv,
+ table->counters_enabled,
+ &dump_ctx);
+ if (err)
+ goto err_entries_dump;
+
+send_done:
+ nlh = nlmsg_put(dump_ctx.skb, info->snd_portid, info->snd_seq,
+ NLMSG_DONE, 0, flags | NLM_F_MULTI);
+ if (!nlh) {
+ err = devlink_dpipe_send_and_alloc_skb(&dump_ctx.skb, info);
+ if (err)
+ goto err_skb_send_alloc;
+ goto send_done;
+ }
+ return genlmsg_reply(dump_ctx.skb, info);
+
+err_entries_dump:
+err_skb_send_alloc:
+ genlmsg_cancel(dump_ctx.skb, dump_ctx.hdr);
+ nlmsg_free(dump_ctx.skb);
+ return err;
+}
+
+static int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_dpipe_table *table;
+ const char *table_name;
+
+ if (!info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME])
+ return -EINVAL;
+
+ table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
+ table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+ table_name);
+ if (!table)
+ return -EINVAL;
+
+ if (!table->table_ops->entries_dump)
+ return -EINVAL;
+
+ return devlink_dpipe_entries_fill(info, DEVLINK_CMD_DPIPE_ENTRIES_GET,
+ 0, table);
+}
+
+static int devlink_dpipe_fields_put(struct sk_buff *skb,
+ const struct devlink_dpipe_header *header)
+{
+ struct devlink_dpipe_field *field;
+ struct nlattr *field_attr;
+ int i;
+
+ for (i = 0; i < header->fields_count; i++) {
+ field = &header->fields[i];
+ field_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_FIELD);
+ if (!field_attr)
+ return -EMSGSIZE;
+ if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_FIELD_NAME, field->name) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_BITWIDTH, field->bitwidth) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_MAPPING_TYPE, field->mapping_type))
+ goto nla_put_failure;
+ nla_nest_end(skb, field_attr);
+ }
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, field_attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_dpipe_header_put(struct sk_buff *skb,
+ struct devlink_dpipe_header *header)
+{
+ struct nlattr *fields_attr, *header_attr;
+ int err;
+
+ header_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADER);
+ if (!header_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_HEADER_NAME, header->name) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
+ nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
+ goto nla_put_failure;
+
+ fields_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADER_FIELDS);
+ if (!fields_attr)
+ goto nla_put_failure;
+
+ err = devlink_dpipe_fields_put(skb, header);
+ if (err) {
+ nla_nest_cancel(skb, fields_attr);
+ goto nla_put_failure;
+ }
+ nla_nest_end(skb, fields_attr);
+ nla_nest_end(skb, header_attr);
+ return 0;
+
+nla_put_failure:
+ err = -EMSGSIZE;
+ nla_nest_cancel(skb, header_attr);
+ return err;
+}
+
+static int devlink_dpipe_headers_fill(struct genl_info *info,
+ enum devlink_command cmd, int flags,
+ struct devlink_dpipe_headers *
+ dpipe_headers)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct nlattr *headers_attr;
+ struct sk_buff *skb = NULL;
+ struct nlmsghdr *nlh;
+ void *hdr;
+ int i, j;
+ int err;
+
+ i = 0;
+start_again:
+ err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+ if (err)
+ return err;
+
+ hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+ &devlink_nl_family, NLM_F_MULTI, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(skb, devlink))
+ goto nla_put_failure;
+ headers_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADERS);
+ if (!headers_attr)
+ goto nla_put_failure;
+
+ j = 0;
+ for (; i < dpipe_headers->headers_count; i++) {
+ err = devlink_dpipe_header_put(skb, dpipe_headers->headers[i]);
+ if (err) {
+ if (!j)
+ goto err_table_put;
+ break;
+ }
+ j++;
+ }
+ nla_nest_end(skb, headers_attr);
+ genlmsg_end(skb, hdr);
+ if (i != dpipe_headers->headers_count)
+ goto start_again;
+
+send_done:
+ nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
+ NLMSG_DONE, 0, flags | NLM_F_MULTI);
+ if (!nlh) {
+ err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+ if (err)
+ goto err_skb_send_alloc;
+ goto send_done;
+ }
+ return genlmsg_reply(skb, info);
+
+nla_put_failure:
+ err = -EMSGSIZE;
+err_table_put:
+err_skb_send_alloc:
+ genlmsg_cancel(skb, hdr);
+ nlmsg_free(skb);
+ return err;
+}
+
+static int devlink_nl_cmd_dpipe_headers_get(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+
+ if (!devlink->dpipe_headers)
+ return -EOPNOTSUPP;
+ return devlink_dpipe_headers_fill(info, DEVLINK_CMD_DPIPE_HEADERS_GET,
+ 0, devlink->dpipe_headers);
+}
+
+static int devlink_dpipe_table_counters_set(struct devlink *devlink,
+ const char *table_name,
+ bool enable)
+{
+ struct devlink_dpipe_table *table;
+
+ table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+ table_name);
+ if (!table)
+ return -EINVAL;
+
+ if (table->counter_control_extern)
+ return -EOPNOTSUPP;
+
+ if (!(table->counters_enabled ^ enable))
+ return 0;
+
+ table->counters_enabled = enable;
+ if (table->table_ops->counters_set_update)
+ table->table_ops->counters_set_update(table->priv, enable);
+ return 0;
+}
+
+static int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ const char *table_name;
+ bool counters_enable;
+
+ if (!info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME] ||
+ !info->attrs[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED])
+ return -EINVAL;
+
+ table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
+ counters_enable = !!nla_get_u8(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED]);
+
+ return devlink_dpipe_table_counters_set(devlink, table_name,
+ counters_enable);
}
static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
@@ -1512,6 +2209,9 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 },
[DEVLINK_ATTR_ESWITCH_MODE] = { .type = NLA_U16 },
[DEVLINK_ATTR_ESWITCH_INLINE_MODE] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8 },
};
static const struct genl_ops devlink_nl_ops[] = {
@@ -1644,6 +2344,34 @@ static const struct genl_ops devlink_nl_ops[] = {
.flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
+ {
+ .cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
+ .doit = devlink_nl_cmd_dpipe_table_get,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
+ {
+ .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET,
+ .doit = devlink_nl_cmd_dpipe_entries_get,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
+ {
+ .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET,
+ .doit = devlink_nl_cmd_dpipe_headers_get,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
+ {
+ .cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET,
+ .doit = devlink_nl_cmd_dpipe_table_counters_set,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
};
static struct genl_family devlink_nl_family __ro_after_init = {
@@ -1680,6 +2408,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
devlink_net_set(devlink, &init_net);
INIT_LIST_HEAD(&devlink->port_list);
INIT_LIST_HEAD(&devlink->sb_list);
+ INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list);
return devlink;
}
EXPORT_SYMBOL_GPL(devlink_alloc);
@@ -1880,6 +2609,133 @@ void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index)
}
EXPORT_SYMBOL_GPL(devlink_sb_unregister);
+/**
+ * devlink_dpipe_headers_register - register dpipe headers
+ *
+ * @devlink: devlink
+ * @dpipe_headers: dpipe header array
+ *
+ * Register the headers supported by hardware.
+ */
+int devlink_dpipe_headers_register(struct devlink *devlink,
+ struct devlink_dpipe_headers *dpipe_headers)
+{
+ mutex_lock(&devlink_mutex);
+ devlink->dpipe_headers = dpipe_headers;
+ mutex_unlock(&devlink_mutex);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_headers_register);
+
+/**
+ * devlink_dpipe_headers_unregister - unregister dpipe headers
+ *
+ * @devlink: devlink
+ *
+ * Unregister the headers supported by hardware.
+ */
+void devlink_dpipe_headers_unregister(struct devlink *devlink)
+{
+ mutex_lock(&devlink_mutex);
+ devlink->dpipe_headers = NULL;
+ mutex_unlock(&devlink_mutex);
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_headers_unregister);
+
+/**
+ * devlink_dpipe_table_counter_enabled - check if counter allocation
+ * required
+ * @devlink: devlink
+ * @table_name: tables name
+ *
+ * Used by driver to check if counter allocation is required.
+ * After counter allocation is turned on the table entries
+ * are updated to include counter statistics.
+ *
+ * After that point on the driver must respect the counter
+ * state so that each entry added to the table is added
+ * with a counter.
+ */
+bool devlink_dpipe_table_counter_enabled(struct devlink *devlink,
+ const char *table_name)
+{
+ struct devlink_dpipe_table *table;
+ bool enabled;
+
+ rcu_read_lock();
+ table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+ table_name);
+ enabled = false;
+ if (table)
+ enabled = table->counters_enabled;
+ rcu_read_unlock();
+ return enabled;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_table_counter_enabled);
+
+/**
+ * devlink_dpipe_table_register - register dpipe table
+ *
+ * @devlink: devlink
+ * @table_name: table name
+ * @table_ops: table ops
+ * @priv: priv
+ * @size: size
+ * @counter_control_extern: external control for counters
+ */
+int devlink_dpipe_table_register(struct devlink *devlink,
+ const char *table_name,
+ struct devlink_dpipe_table_ops *table_ops,
+ void *priv, u64 size,
+ bool counter_control_extern)
+{
+ struct devlink_dpipe_table *table;
+
+ if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name))
+ return -EEXIST;
+
+ table = kzalloc(sizeof(*table), GFP_KERNEL);
+ if (!table)
+ return -ENOMEM;
+
+ table->name = table_name;
+ table->table_ops = table_ops;
+ table->priv = priv;
+ table->size = size;
+ table->counter_control_extern = counter_control_extern;
+
+ mutex_lock(&devlink_mutex);
+ list_add_tail_rcu(&table->list, &devlink->dpipe_table_list);
+ mutex_unlock(&devlink_mutex);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_table_register);
+
+/**
+ * devlink_dpipe_table_unregister - unregister dpipe table
+ *
+ * @devlink: devlink
+ * @table_name: table name
+ */
+void devlink_dpipe_table_unregister(struct devlink *devlink,
+ const char *table_name)
+{
+ struct devlink_dpipe_table *table;
+
+ mutex_lock(&devlink_mutex);
+ table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+ table_name);
+ if (!table)
+ goto unlock;
+ list_del_rcu(&table->list);
+ mutex_unlock(&devlink_mutex);
+ kfree_rcu(table, rcu);
+ return;
+unlock:
+ mutex_unlock(&devlink_mutex);
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister);
+
static int __init devlink_module_init(void)
{
return genl_register_family(&devlink_nl_family);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index fb55327dcfea..70ccda233bd1 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -412,9 +412,8 @@ static int __init init_net_drop_monitor(void)
for_each_possible_cpu(cpu) {
data = &per_cpu(dm_cpu_data, cpu);
INIT_WORK(&data->dm_alert_work, send_dm_alert);
- init_timer(&data->send_timer);
- data->send_timer.data = (unsigned long)data;
- data->send_timer.function = sched_send_work;
+ setup_timer(&data->send_timer, sched_send_work,
+ (unsigned long)data);
spin_lock_init(&data->lock);
reset_per_cpu_data(data);
}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index aecb2c7241b6..03111a2d6653 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -90,6 +90,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation",
[NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial",
[NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation",
+ [NETIF_F_GSO_ESP_BIT] = "tx-esp-segmentation",
[NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
[NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp",
@@ -103,12 +104,15 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_RXALL_BIT] = "rx-all",
[NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload",
[NETIF_F_HW_TC_BIT] = "hw-tc-offload",
+ [NETIF_F_HW_ESP_BIT] = "esp-hw-offload",
+ [NETIF_F_HW_ESP_TX_CSUM_BIT] = "esp-tx-csum-hw-offload",
};
static const char
rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN] = {
[ETH_RSS_HASH_TOP_BIT] = "toeplitz",
[ETH_RSS_HASH_XOR_BIT] = "xor",
+ [ETH_RSS_HASH_CRC32_BIT] = "crc32",
};
static const char
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index b6791d94841d..f21c4d3aeae0 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -23,6 +23,20 @@ static const struct fib_kuid_range fib_kuid_range_unset = {
KUIDT_INIT(~0),
};
+bool fib_rule_matchall(const struct fib_rule *rule)
+{
+ if (rule->iifindex || rule->oifindex || rule->mark || rule->tun_id ||
+ rule->flags)
+ return false;
+ if (rule->suppress_ifgroup != -1 || rule->suppress_prefixlen != -1)
+ return false;
+ if (!uid_eq(rule->uid_range.start, fib_kuid_range_unset.start) ||
+ !uid_eq(rule->uid_range.end, fib_kuid_range_unset.end))
+ return false;
+ return true;
+}
+EXPORT_SYMBOL_GPL(fib_rule_matchall);
+
int fib_default_rule_add(struct fib_rules_ops *ops,
u32 pref, u32 table, u32 flags)
{
@@ -354,7 +368,8 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
return 0;
}
-int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh)
+int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct fib_rule_hdr *frh = nlmsg_data(nlh);
@@ -372,7 +387,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh)
goto errout;
}
- err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
+ err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack);
if (err < 0)
goto errout;
@@ -425,6 +440,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh)
if (tb[FRA_TUN_ID])
rule->tun_id = nla_get_be64(tb[FRA_TUN_ID]);
+ err = -EINVAL;
if (tb[FRA_L3MDEV]) {
#ifdef CONFIG_NET_L3_MASTER_DEV
rule->l3mdev = nla_get_u8(tb[FRA_L3MDEV]);
@@ -446,7 +462,6 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh)
else
rule->suppress_ifgroup = -1;
- err = -EINVAL;
if (tb[FRA_GOTO]) {
if (rule->action != FR_ACT_GOTO)
goto errout_free;
@@ -547,7 +562,8 @@ errout:
}
EXPORT_SYMBOL_GPL(fib_nl_newrule);
-int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh)
+int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct fib_rule_hdr *frh = nlmsg_data(nlh);
@@ -566,7 +582,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh)
goto errout;
}
- err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
+ err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack);
if (err < 0)
goto errout;
@@ -576,8 +592,10 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh)
if (tb[FRA_UID_RANGE]) {
range = nla_get_kuid_range(tb);
- if (!uid_range_set(&range))
+ if (!uid_range_set(&range)) {
+ err = -EINVAL;
goto errout;
+ }
} else {
range = fib_kuid_range_unset;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index ebaeaf2e46e8..a253a6197e6b 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -26,6 +26,7 @@
#include <linux/mm.h>
#include <linux/fcntl.h>
#include <linux/socket.h>
+#include <linux/sock_diag.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
@@ -52,6 +53,7 @@
#include <net/dst_metadata.h>
#include <net/dst.h>
#include <net/sock_reuseport.h>
+#include <net/busy_poll.h>
/**
* sk_filter_trim_cap - run a packet through a socket filter
@@ -91,7 +93,12 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
rcu_read_lock();
filter = rcu_dereference(sk->sk_filter);
if (filter) {
- unsigned int pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
+ struct sock *save_sk = skb->sk;
+ unsigned int pkt_len;
+
+ skb->sk = sk;
+ pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
+ skb->sk = save_sk;
err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
}
rcu_read_unlock();
@@ -348,7 +355,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
* @new_prog: buffer where converted program will be stored
* @new_len: pointer to store length of converted program
*
- * Remap 'sock_filter' style BPF instruction set to 'sock_filter_ext' style.
+ * Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn'
+ * style extended BPF (eBPF).
* Conversion workflow:
*
* 1) First pass for calculating the new program length:
@@ -928,7 +936,7 @@ static void sk_filter_release_rcu(struct rcu_head *rcu)
*/
static void sk_filter_release(struct sk_filter *fp)
{
- if (atomic_dec_and_test(&fp->refcnt))
+ if (refcount_dec_and_test(&fp->refcnt))
call_rcu(&fp->rcu, sk_filter_release_rcu);
}
@@ -943,20 +951,27 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
/* try to charge the socket memory if there is space available
* return true on success
*/
-bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
+static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
{
u32 filter_size = bpf_prog_size(fp->prog->len);
/* same check as in sock_kmalloc() */
if (filter_size <= sysctl_optmem_max &&
atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) {
- atomic_inc(&fp->refcnt);
atomic_add(filter_size, &sk->sk_omem_alloc);
return true;
}
return false;
}
+bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
+{
+ bool ret = __sk_filter_charge(sk, fp);
+ if (ret)
+ refcount_inc(&fp->refcnt);
+ return ret;
+}
+
static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
{
struct sock_filter *old_prog;
@@ -1179,12 +1194,12 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
return -ENOMEM;
fp->prog = prog;
- atomic_set(&fp->refcnt, 0);
- if (!sk_filter_charge(sk, fp)) {
+ if (!__sk_filter_charge(sk, fp)) {
kfree(fp);
return -ENOMEM;
}
+ refcount_set(&fp->refcnt, 1);
old_fp = rcu_dereference_protected(sk->sk_filter,
lockdep_sock_is_held(sk));
@@ -2599,6 +2614,36 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = {
.arg5_type = ARG_CONST_SIZE,
};
+BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
+{
+ return skb->sk ? sock_gen_cookie(skb->sk) : 0;
+}
+
+static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
+ .func = bpf_get_socket_cookie,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
+BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
+{
+ struct sock *sk = sk_to_full_sk(skb->sk);
+ kuid_t kuid;
+
+ if (!sk || !sk_fullsock(sk))
+ return overflowuid;
+ kuid = sock_net_uid(sock_net(sk), sk);
+ return from_kuid_munged(sock_net(sk)->user_ns, kuid);
+}
+
+static const struct bpf_func_proto bpf_get_socket_uid_proto = {
+ .func = bpf_get_socket_uid,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
static const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id)
{
@@ -2633,6 +2678,10 @@ sk_filter_func_proto(enum bpf_func_id func_id)
switch (func_id) {
case BPF_FUNC_skb_load_bytes:
return &bpf_skb_load_bytes_proto;
+ case BPF_FUNC_get_socket_cookie:
+ return &bpf_get_socket_cookie_proto;
+ case BPF_FUNC_get_socket_uid:
+ return &bpf_get_socket_uid_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -2692,6 +2741,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return &bpf_get_smp_processor_id_proto;
case BPF_FUNC_skb_under_cgroup:
return &bpf_skb_under_cgroup_proto;
+ case BPF_FUNC_get_socket_cookie:
+ return &bpf_get_socket_cookie_proto;
+ case BPF_FUNC_get_socket_uid:
+ return &bpf_get_socket_uid_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -2715,12 +2768,7 @@ xdp_func_proto(enum bpf_func_id func_id)
static const struct bpf_func_proto *
cg_skb_func_proto(enum bpf_func_id func_id)
{
- switch (func_id) {
- case BPF_FUNC_skb_load_bytes:
- return &bpf_skb_load_bytes_proto;
- default:
- return bpf_base_func_proto(func_id);
- }
+ return sk_filter_func_proto(func_id);
}
static const struct bpf_func_proto *
@@ -3156,6 +3204,19 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
*insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
#endif
break;
+
+ case offsetof(struct __sk_buff, napi_id):
+#if defined(CONFIG_NET_RX_BUSY_POLL)
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, napi_id) != 4);
+
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+ offsetof(struct sk_buff, napi_id));
+ *insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1);
+ *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
+#else
+ *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
+#endif
+ break;
}
return insn - insn_buf;
@@ -3252,111 +3313,55 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf;
}
-static const struct bpf_verifier_ops sk_filter_ops = {
+const struct bpf_verifier_ops sk_filter_prog_ops = {
.get_func_proto = sk_filter_func_proto,
.is_valid_access = sk_filter_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
};
-static const struct bpf_verifier_ops tc_cls_act_ops = {
+const struct bpf_verifier_ops tc_cls_act_prog_ops = {
.get_func_proto = tc_cls_act_func_proto,
.is_valid_access = tc_cls_act_is_valid_access,
.convert_ctx_access = tc_cls_act_convert_ctx_access,
.gen_prologue = tc_cls_act_prologue,
+ .test_run = bpf_prog_test_run_skb,
};
-static const struct bpf_verifier_ops xdp_ops = {
+const struct bpf_verifier_ops xdp_prog_ops = {
.get_func_proto = xdp_func_proto,
.is_valid_access = xdp_is_valid_access,
.convert_ctx_access = xdp_convert_ctx_access,
+ .test_run = bpf_prog_test_run_xdp,
};
-static const struct bpf_verifier_ops cg_skb_ops = {
+const struct bpf_verifier_ops cg_skb_prog_ops = {
.get_func_proto = cg_skb_func_proto,
.is_valid_access = sk_filter_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
+ .test_run = bpf_prog_test_run_skb,
};
-static const struct bpf_verifier_ops lwt_inout_ops = {
+const struct bpf_verifier_ops lwt_inout_prog_ops = {
.get_func_proto = lwt_inout_func_proto,
.is_valid_access = lwt_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
+ .test_run = bpf_prog_test_run_skb,
};
-static const struct bpf_verifier_ops lwt_xmit_ops = {
+const struct bpf_verifier_ops lwt_xmit_prog_ops = {
.get_func_proto = lwt_xmit_func_proto,
.is_valid_access = lwt_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
.gen_prologue = tc_cls_act_prologue,
+ .test_run = bpf_prog_test_run_skb,
};
-static const struct bpf_verifier_ops cg_sock_ops = {
+const struct bpf_verifier_ops cg_sock_prog_ops = {
.get_func_proto = bpf_base_func_proto,
.is_valid_access = sock_filter_is_valid_access,
.convert_ctx_access = sock_filter_convert_ctx_access,
};
-static struct bpf_prog_type_list sk_filter_type __ro_after_init = {
- .ops = &sk_filter_ops,
- .type = BPF_PROG_TYPE_SOCKET_FILTER,
-};
-
-static struct bpf_prog_type_list sched_cls_type __ro_after_init = {
- .ops = &tc_cls_act_ops,
- .type = BPF_PROG_TYPE_SCHED_CLS,
-};
-
-static struct bpf_prog_type_list sched_act_type __ro_after_init = {
- .ops = &tc_cls_act_ops,
- .type = BPF_PROG_TYPE_SCHED_ACT,
-};
-
-static struct bpf_prog_type_list xdp_type __ro_after_init = {
- .ops = &xdp_ops,
- .type = BPF_PROG_TYPE_XDP,
-};
-
-static struct bpf_prog_type_list cg_skb_type __ro_after_init = {
- .ops = &cg_skb_ops,
- .type = BPF_PROG_TYPE_CGROUP_SKB,
-};
-
-static struct bpf_prog_type_list lwt_in_type __ro_after_init = {
- .ops = &lwt_inout_ops,
- .type = BPF_PROG_TYPE_LWT_IN,
-};
-
-static struct bpf_prog_type_list lwt_out_type __ro_after_init = {
- .ops = &lwt_inout_ops,
- .type = BPF_PROG_TYPE_LWT_OUT,
-};
-
-static struct bpf_prog_type_list lwt_xmit_type __ro_after_init = {
- .ops = &lwt_xmit_ops,
- .type = BPF_PROG_TYPE_LWT_XMIT,
-};
-
-static struct bpf_prog_type_list cg_sock_type __ro_after_init = {
- .ops = &cg_sock_ops,
- .type = BPF_PROG_TYPE_CGROUP_SOCK
-};
-
-static int __init register_sk_filter_ops(void)
-{
- bpf_register_prog_type(&sk_filter_type);
- bpf_register_prog_type(&sched_cls_type);
- bpf_register_prog_type(&sched_act_type);
- bpf_register_prog_type(&xdp_type);
- bpf_register_prog_type(&cg_skb_type);
- bpf_register_prog_type(&cg_sock_type);
- bpf_register_prog_type(&lwt_in_type);
- bpf_register_prog_type(&lwt_out_type);
- bpf_register_prog_type(&lwt_xmit_type);
-
- return 0;
-}
-late_initcall(register_sk_filter_ops);
-
int sk_detach_filter(struct sock *sk)
{
int ret = -ENOENT;
diff --git a/net/core/flow.c b/net/core/flow.c
index f765c11d8df5..f7f5d1932a27 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -47,7 +47,7 @@ struct flow_flush_info {
static struct kmem_cache *flow_cachep __read_mostly;
-#define flow_cache_hash_size(cache) (1 << (cache)->hash_shift)
+#define flow_cache_hash_size(cache) (1U << (cache)->hash_shift)
#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ)
static void flow_cache_new_hashrnd(unsigned long arg)
@@ -99,7 +99,8 @@ static void flow_cache_gc_task(struct work_struct *work)
}
static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
- int deleted, struct list_head *gc_list,
+ unsigned int deleted,
+ struct list_head *gc_list,
struct netns_xfrm *xfrm)
{
if (deleted) {
@@ -114,17 +115,18 @@ static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
static void __flow_cache_shrink(struct flow_cache *fc,
struct flow_cache_percpu *fcp,
- int shrink_to)
+ unsigned int shrink_to)
{
struct flow_cache_entry *fle;
struct hlist_node *tmp;
LIST_HEAD(gc_list);
- int i, deleted = 0;
+ unsigned int deleted = 0;
struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
flow_cache_global);
+ unsigned int i;
for (i = 0; i < flow_cache_hash_size(fc); i++) {
- int saved = 0;
+ unsigned int saved = 0;
hlist_for_each_entry_safe(fle, tmp,
&fcp->hash_table[i], u.hlist) {
@@ -145,7 +147,7 @@ static void __flow_cache_shrink(struct flow_cache *fc,
static void flow_cache_shrink(struct flow_cache *fc,
struct flow_cache_percpu *fcp)
{
- int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
+ unsigned int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
__flow_cache_shrink(fc, fcp, shrink_to);
}
@@ -161,7 +163,7 @@ static void flow_new_hash_rnd(struct flow_cache *fc,
static u32 flow_hash_code(struct flow_cache *fc,
struct flow_cache_percpu *fcp,
const struct flowi *key,
- size_t keysize)
+ unsigned int keysize)
{
const u32 *k = (const u32 *) key;
const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32);
@@ -174,7 +176,7 @@ static u32 flow_hash_code(struct flow_cache *fc,
* important assumptions that we can here, such as alignment.
*/
static int flow_key_compare(const struct flowi *key1, const struct flowi *key2,
- size_t keysize)
+ unsigned int keysize)
{
const flow_compare_t *k1, *k1_lim, *k2;
@@ -199,7 +201,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
struct flow_cache_percpu *fcp;
struct flow_cache_entry *fle, *tfle;
struct flow_cache_object *flo;
- size_t keysize;
+ unsigned int keysize;
unsigned int hash;
local_bh_disable();
@@ -295,9 +297,10 @@ static void flow_cache_flush_tasklet(unsigned long data)
struct flow_cache_entry *fle;
struct hlist_node *tmp;
LIST_HEAD(gc_list);
- int i, deleted = 0;
+ unsigned int deleted = 0;
struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
flow_cache_global);
+ unsigned int i;
fcp = this_cpu_ptr(fc->percpu);
for (i = 0; i < flow_cache_hash_size(fc); i++) {
@@ -327,7 +330,7 @@ static void flow_cache_flush_tasklet(unsigned long data)
static int flow_cache_percpu_empty(struct flow_cache *fc, int cpu)
{
struct flow_cache_percpu *fcp;
- int i;
+ unsigned int i;
fcp = per_cpu_ptr(fc->percpu, cpu);
for (i = 0; i < flow_cache_hash_size(fc); i++)
@@ -402,12 +405,12 @@ void flow_cache_flush_deferred(struct net *net)
static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
{
struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
- size_t sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc);
+ unsigned int sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc);
if (!fcp->hash_table) {
fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
if (!fcp->hash_table) {
- pr_err("NET: failed to allocate flow cache sz %zu\n", sz);
+ pr_err("NET: failed to allocate flow cache sz %u\n", sz);
return -ENOMEM;
}
fcp->hash_rnd_recalc = 1;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index d98d4998213d..28d94bce4df8 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -113,6 +113,235 @@ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
}
EXPORT_SYMBOL(__skb_flow_get_ports);
+enum flow_dissect_ret {
+ FLOW_DISSECT_RET_OUT_GOOD,
+ FLOW_DISSECT_RET_OUT_BAD,
+ FLOW_DISSECT_RET_OUT_PROTO_AGAIN,
+};
+
+static enum flow_dissect_ret
+__skb_flow_dissect_mpls(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container, void *data, int nhoff, int hlen)
+{
+ struct flow_dissector_key_keyid *key_keyid;
+ struct mpls_label *hdr, _hdr[2];
+ u32 entry, label;
+
+ if (!dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_MPLS_ENTROPY) &&
+ !dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS))
+ return FLOW_DISSECT_RET_OUT_GOOD;
+
+ hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data,
+ hlen, &_hdr);
+ if (!hdr)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ entry = ntohl(hdr[0].entry);
+ label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
+
+ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS)) {
+ struct flow_dissector_key_mpls *key_mpls;
+
+ key_mpls = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_MPLS,
+ target_container);
+ key_mpls->mpls_label = label;
+ key_mpls->mpls_ttl = (entry & MPLS_LS_TTL_MASK)
+ >> MPLS_LS_TTL_SHIFT;
+ key_mpls->mpls_tc = (entry & MPLS_LS_TC_MASK)
+ >> MPLS_LS_TC_SHIFT;
+ key_mpls->mpls_bos = (entry & MPLS_LS_S_MASK)
+ >> MPLS_LS_S_SHIFT;
+ }
+
+ if (label == MPLS_LABEL_ENTROPY) {
+ key_keyid = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_MPLS_ENTROPY,
+ target_container);
+ key_keyid->keyid = hdr[1].entry & htonl(MPLS_LS_LABEL_MASK);
+ }
+ return FLOW_DISSECT_RET_OUT_GOOD;
+}
+
+static enum flow_dissect_ret
+__skb_flow_dissect_arp(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container, void *data, int nhoff, int hlen)
+{
+ struct flow_dissector_key_arp *key_arp;
+ struct {
+ unsigned char ar_sha[ETH_ALEN];
+ unsigned char ar_sip[4];
+ unsigned char ar_tha[ETH_ALEN];
+ unsigned char ar_tip[4];
+ } *arp_eth, _arp_eth;
+ const struct arphdr *arp;
+ struct arphdr _arp;
+
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ARP))
+ return FLOW_DISSECT_RET_OUT_GOOD;
+
+ arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data,
+ hlen, &_arp);
+ if (!arp)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ if (arp->ar_hrd != htons(ARPHRD_ETHER) ||
+ arp->ar_pro != htons(ETH_P_IP) ||
+ arp->ar_hln != ETH_ALEN ||
+ arp->ar_pln != 4 ||
+ (arp->ar_op != htons(ARPOP_REPLY) &&
+ arp->ar_op != htons(ARPOP_REQUEST)))
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ arp_eth = __skb_header_pointer(skb, nhoff + sizeof(_arp),
+ sizeof(_arp_eth), data,
+ hlen, &_arp_eth);
+ if (!arp_eth)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ key_arp = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ARP,
+ target_container);
+
+ memcpy(&key_arp->sip, arp_eth->ar_sip, sizeof(key_arp->sip));
+ memcpy(&key_arp->tip, arp_eth->ar_tip, sizeof(key_arp->tip));
+
+ /* Only store the lower byte of the opcode;
+ * this covers ARPOP_REPLY and ARPOP_REQUEST.
+ */
+ key_arp->op = ntohs(arp->ar_op) & 0xff;
+
+ ether_addr_copy(key_arp->sha, arp_eth->ar_sha);
+ ether_addr_copy(key_arp->tha, arp_eth->ar_tha);
+
+ return FLOW_DISSECT_RET_OUT_GOOD;
+}
+
+static enum flow_dissect_ret
+__skb_flow_dissect_gre(const struct sk_buff *skb,
+ struct flow_dissector_key_control *key_control,
+ struct flow_dissector *flow_dissector,
+ void *target_container, void *data,
+ __be16 *p_proto, int *p_nhoff, int *p_hlen,
+ unsigned int flags)
+{
+ struct flow_dissector_key_keyid *key_keyid;
+ struct gre_base_hdr *hdr, _hdr;
+ int offset = 0;
+ u16 gre_ver;
+
+ hdr = __skb_header_pointer(skb, *p_nhoff, sizeof(_hdr),
+ data, *p_hlen, &_hdr);
+ if (!hdr)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ /* Only look inside GRE without routing */
+ if (hdr->flags & GRE_ROUTING)
+ return FLOW_DISSECT_RET_OUT_GOOD;
+
+ /* Only look inside GRE for version 0 and 1 */
+ gre_ver = ntohs(hdr->flags & GRE_VERSION);
+ if (gre_ver > 1)
+ return FLOW_DISSECT_RET_OUT_GOOD;
+
+ *p_proto = hdr->protocol;
+ if (gre_ver) {
+ /* Version1 must be PPTP, and check the flags */
+ if (!(*p_proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY)))
+ return FLOW_DISSECT_RET_OUT_GOOD;
+ }
+
+ offset += sizeof(struct gre_base_hdr);
+
+ if (hdr->flags & GRE_CSUM)
+ offset += sizeof(((struct gre_full_hdr *) 0)->csum) +
+ sizeof(((struct gre_full_hdr *) 0)->reserved1);
+
+ if (hdr->flags & GRE_KEY) {
+ const __be32 *keyid;
+ __be32 _keyid;
+
+ keyid = __skb_header_pointer(skb, *p_nhoff + offset,
+ sizeof(_keyid),
+ data, *p_hlen, &_keyid);
+ if (!keyid)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_GRE_KEYID)) {
+ key_keyid = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_GRE_KEYID,
+ target_container);
+ if (gre_ver == 0)
+ key_keyid->keyid = *keyid;
+ else
+ key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK;
+ }
+ offset += sizeof(((struct gre_full_hdr *) 0)->key);
+ }
+
+ if (hdr->flags & GRE_SEQ)
+ offset += sizeof(((struct pptp_gre_header *) 0)->seq);
+
+ if (gre_ver == 0) {
+ if (*p_proto == htons(ETH_P_TEB)) {
+ const struct ethhdr *eth;
+ struct ethhdr _eth;
+
+ eth = __skb_header_pointer(skb, *p_nhoff + offset,
+ sizeof(_eth),
+ data, *p_hlen, &_eth);
+ if (!eth)
+ return FLOW_DISSECT_RET_OUT_BAD;
+ *p_proto = eth->h_proto;
+ offset += sizeof(*eth);
+
+ /* Cap headers that we access via pointers at the
+ * end of the Ethernet header as our maximum alignment
+ * at that point is only 2 bytes.
+ */
+ if (NET_IP_ALIGN)
+ *p_hlen = *p_nhoff + offset;
+ }
+ } else { /* version 1, must be PPTP */
+ u8 _ppp_hdr[PPP_HDRLEN];
+ u8 *ppp_hdr;
+
+ if (hdr->flags & GRE_ACK)
+ offset += sizeof(((struct pptp_gre_header *) 0)->ack);
+
+ ppp_hdr = __skb_header_pointer(skb, *p_nhoff + offset,
+ sizeof(_ppp_hdr),
+ data, *p_hlen, _ppp_hdr);
+ if (!ppp_hdr)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ switch (PPP_PROTOCOL(ppp_hdr)) {
+ case PPP_IP:
+ *p_proto = htons(ETH_P_IP);
+ break;
+ case PPP_IPV6:
+ *p_proto = htons(ETH_P_IPV6);
+ break;
+ default:
+ /* Could probably catch some more like MPLS */
+ break;
+ }
+
+ offset += PPP_HDRLEN;
+ }
+
+ *p_nhoff += offset;
+ key_control->flags |= FLOW_DIS_ENCAPSULATION;
+ if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
+ return FLOW_DISSECT_RET_OUT_GOOD;
+
+ return FLOW_DISSECT_RET_OUT_PROTO_AGAIN;
+}
+
/**
* __skb_flow_dissect - extract the flow_keys struct and return it
* @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
@@ -138,12 +367,10 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
struct flow_dissector_key_control *key_control;
struct flow_dissector_key_basic *key_basic;
struct flow_dissector_key_addrs *key_addrs;
- struct flow_dissector_key_arp *key_arp;
struct flow_dissector_key_ports *key_ports;
struct flow_dissector_key_icmp *key_icmp;
struct flow_dissector_key_tags *key_tags;
struct flow_dissector_key_vlan *key_vlan;
- struct flow_dissector_key_keyid *key_keyid;
bool skip_vlan = false;
u8 ip_proto = 0;
bool ret;
@@ -181,7 +408,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
memcpy(key_eth_addrs, &eth->h_dest, sizeof(*key_eth_addrs));
}
-again:
+proto_again:
switch (proto) {
case htons(ETH_P_IP): {
const struct iphdr *iph;
@@ -284,7 +511,7 @@ ipv6:
proto = vlan->h_vlan_encapsulated_proto;
nhoff += sizeof(*vlan);
if (skip_vlan)
- goto again;
+ goto proto_again;
}
skip_vlan = true;
@@ -307,7 +534,7 @@ ipv6:
}
}
- goto again;
+ goto proto_again;
}
case htons(ETH_P_PPP_SES): {
struct {
@@ -349,31 +576,17 @@ ipv6:
}
case htons(ETH_P_MPLS_UC):
- case htons(ETH_P_MPLS_MC): {
- struct mpls_label *hdr, _hdr[2];
+ case htons(ETH_P_MPLS_MC):
mpls:
- hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data,
- hlen, &_hdr);
- if (!hdr)
- goto out_bad;
-
- if ((ntohl(hdr[0].entry) & MPLS_LS_LABEL_MASK) >>
- MPLS_LS_LABEL_SHIFT == MPLS_LABEL_ENTROPY) {
- if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_MPLS_ENTROPY)) {
- key_keyid = skb_flow_dissector_target(flow_dissector,
- FLOW_DISSECTOR_KEY_MPLS_ENTROPY,
- target_container);
- key_keyid->keyid = hdr[1].entry &
- htonl(MPLS_LS_LABEL_MASK);
- }
-
+ switch (__skb_flow_dissect_mpls(skb, flow_dissector,
+ target_container, data,
+ nhoff, hlen)) {
+ case FLOW_DISSECT_RET_OUT_GOOD:
goto out_good;
+ case FLOW_DISSECT_RET_OUT_BAD:
+ default:
+ goto out_bad;
}
-
- goto out_good;
- }
-
case htons(ETH_P_FCOE):
if ((hlen - nhoff) < FCOE_HEADER_LEN)
goto out_bad;
@@ -382,177 +595,33 @@ mpls:
goto out_good;
case htons(ETH_P_ARP):
- case htons(ETH_P_RARP): {
- struct {
- unsigned char ar_sha[ETH_ALEN];
- unsigned char ar_sip[4];
- unsigned char ar_tha[ETH_ALEN];
- unsigned char ar_tip[4];
- } *arp_eth, _arp_eth;
- const struct arphdr *arp;
- struct arphdr _arp;
-
- arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data,
- hlen, &_arp);
- if (!arp)
- goto out_bad;
-
- if (arp->ar_hrd != htons(ARPHRD_ETHER) ||
- arp->ar_pro != htons(ETH_P_IP) ||
- arp->ar_hln != ETH_ALEN ||
- arp->ar_pln != 4 ||
- (arp->ar_op != htons(ARPOP_REPLY) &&
- arp->ar_op != htons(ARPOP_REQUEST)))
- goto out_bad;
-
- arp_eth = __skb_header_pointer(skb, nhoff + sizeof(_arp),
- sizeof(_arp_eth), data,
- hlen,
- &_arp_eth);
- if (!arp_eth)
+ case htons(ETH_P_RARP):
+ switch (__skb_flow_dissect_arp(skb, flow_dissector,
+ target_container, data,
+ nhoff, hlen)) {
+ case FLOW_DISSECT_RET_OUT_GOOD:
+ goto out_good;
+ case FLOW_DISSECT_RET_OUT_BAD:
+ default:
goto out_bad;
-
- if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_ARP)) {
-
- key_arp = skb_flow_dissector_target(flow_dissector,
- FLOW_DISSECTOR_KEY_ARP,
- target_container);
-
- memcpy(&key_arp->sip, arp_eth->ar_sip,
- sizeof(key_arp->sip));
- memcpy(&key_arp->tip, arp_eth->ar_tip,
- sizeof(key_arp->tip));
-
- /* Only store the lower byte of the opcode;
- * this covers ARPOP_REPLY and ARPOP_REQUEST.
- */
- key_arp->op = ntohs(arp->ar_op) & 0xff;
-
- ether_addr_copy(key_arp->sha, arp_eth->ar_sha);
- ether_addr_copy(key_arp->tha, arp_eth->ar_tha);
}
-
- goto out_good;
- }
-
default:
goto out_bad;
}
ip_proto_again:
switch (ip_proto) {
- case IPPROTO_GRE: {
- struct gre_base_hdr *hdr, _hdr;
- u16 gre_ver;
- int offset = 0;
-
- hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
- if (!hdr)
+ case IPPROTO_GRE:
+ switch (__skb_flow_dissect_gre(skb, key_control, flow_dissector,
+ target_container, data,
+ &proto, &nhoff, &hlen, flags)) {
+ case FLOW_DISSECT_RET_OUT_GOOD:
+ goto out_good;
+ case FLOW_DISSECT_RET_OUT_BAD:
goto out_bad;
-
- /* Only look inside GRE without routing */
- if (hdr->flags & GRE_ROUTING)
- break;
-
- /* Only look inside GRE for version 0 and 1 */
- gre_ver = ntohs(hdr->flags & GRE_VERSION);
- if (gre_ver > 1)
- break;
-
- proto = hdr->protocol;
- if (gre_ver) {
- /* Version1 must be PPTP, and check the flags */
- if (!(proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY)))
- break;
- }
-
- offset += sizeof(struct gre_base_hdr);
-
- if (hdr->flags & GRE_CSUM)
- offset += sizeof(((struct gre_full_hdr *)0)->csum) +
- sizeof(((struct gre_full_hdr *)0)->reserved1);
-
- if (hdr->flags & GRE_KEY) {
- const __be32 *keyid;
- __be32 _keyid;
-
- keyid = __skb_header_pointer(skb, nhoff + offset, sizeof(_keyid),
- data, hlen, &_keyid);
- if (!keyid)
- goto out_bad;
-
- if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_GRE_KEYID)) {
- key_keyid = skb_flow_dissector_target(flow_dissector,
- FLOW_DISSECTOR_KEY_GRE_KEYID,
- target_container);
- if (gre_ver == 0)
- key_keyid->keyid = *keyid;
- else
- key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK;
- }
- offset += sizeof(((struct gre_full_hdr *)0)->key);
- }
-
- if (hdr->flags & GRE_SEQ)
- offset += sizeof(((struct pptp_gre_header *)0)->seq);
-
- if (gre_ver == 0) {
- if (proto == htons(ETH_P_TEB)) {
- const struct ethhdr *eth;
- struct ethhdr _eth;
-
- eth = __skb_header_pointer(skb, nhoff + offset,
- sizeof(_eth),
- data, hlen, &_eth);
- if (!eth)
- goto out_bad;
- proto = eth->h_proto;
- offset += sizeof(*eth);
-
- /* Cap headers that we access via pointers at the
- * end of the Ethernet header as our maximum alignment
- * at that point is only 2 bytes.
- */
- if (NET_IP_ALIGN)
- hlen = (nhoff + offset);
- }
- } else { /* version 1, must be PPTP */
- u8 _ppp_hdr[PPP_HDRLEN];
- u8 *ppp_hdr;
-
- if (hdr->flags & GRE_ACK)
- offset += sizeof(((struct pptp_gre_header *)0)->ack);
-
- ppp_hdr = __skb_header_pointer(skb, nhoff + offset,
- sizeof(_ppp_hdr),
- data, hlen, _ppp_hdr);
- if (!ppp_hdr)
- goto out_bad;
-
- switch (PPP_PROTOCOL(ppp_hdr)) {
- case PPP_IP:
- proto = htons(ETH_P_IP);
- break;
- case PPP_IPV6:
- proto = htons(ETH_P_IPV6);
- break;
- default:
- /* Could probably catch some more like MPLS */
- break;
- }
-
- offset += PPP_HDRLEN;
+ case FLOW_DISSECT_RET_OUT_PROTO_AGAIN:
+ goto proto_again;
}
-
- nhoff += offset;
- key_control->flags |= FLOW_DIS_ENCAPSULATION;
- if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
- goto out_good;
-
- goto again;
- }
case NEXTHDR_HOP:
case NEXTHDR_ROUTING:
case NEXTHDR_DEST: {
diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c
index c98bbfbd26b8..814e58a3ce8b 100644
--- a/net/core/gro_cells.c
+++ b/net/core/gro_cells.c
@@ -13,7 +13,7 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)
struct net_device *dev = skb->dev;
struct gro_cell *cell;
- if (!gcells->cells || skb_cloned(skb) || !(dev->features & NETIF_F_GRO))
+ if (!gcells->cells || skb_cloned(skb) || netif_elide_gro(dev))
return netif_rx(skb);
cell = this_cpu_ptr(gcells->cells);
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 0cfe7b0216c3..b3bc0a31af9f 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -209,7 +209,8 @@ static int bpf_parse_prog(struct nlattr *attr, struct bpf_lwt_prog *prog,
int ret;
u32 fd;
- ret = nla_parse_nested(tb, LWT_BPF_PROG_MAX, attr, bpf_prog_policy);
+ ret = nla_parse_nested(tb, LWT_BPF_PROG_MAX, attr, bpf_prog_policy,
+ NULL);
if (ret < 0)
return ret;
@@ -249,7 +250,7 @@ static int bpf_build_state(struct nlattr *nla,
if (family != AF_INET && family != AF_INET6)
return -EAFNOSUPPORT;
- ret = nla_parse_nested(tb, LWT_BPF_MAX, nla, bpf_nl_policy);
+ ret = nla_parse_nested(tb, LWT_BPF_MAX, nla, bpf_nl_policy, NULL);
if (ret < 0)
return ret;
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index 6df9f8fabf0c..cfae3d5fe11f 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -162,7 +162,6 @@ int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining)
struct rtnexthop *rtnh = (struct rtnexthop *)attr;
struct nlattr *nla_entype;
struct nlattr *attrs;
- struct nlattr *nla;
u16 encap_type;
int attrlen;
@@ -170,7 +169,6 @@ int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining)
attrlen = rtnh_attrlen(rtnh);
if (attrlen > 0) {
attrs = rtnh_attrs(rtnh);
- nla = nla_find(attrs, attrlen, RTA_ENCAP);
nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
if (nla_entype) {
@@ -205,7 +203,7 @@ int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate)
{
const struct lwtunnel_encap_ops *ops;
struct nlattr *nest;
- int ret = -EINVAL;
+ int ret;
if (!lwtstate)
return 0;
@@ -214,8 +212,11 @@ int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate)
lwtstate->type > LWTUNNEL_ENCAP_MAX)
return 0;
- ret = -EOPNOTSUPP;
nest = nla_nest_start(skb, RTA_ENCAP);
+ if (!nest)
+ return -EMSGSIZE;
+
+ ret = -EOPNOTSUPP;
rcu_read_lock();
ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
if (likely(ops && ops->fill_encap))
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 4526cbd7e28a..58b0bcc125b5 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -52,8 +52,9 @@ do { \
#define PNEIGH_HASHMASK 0xF
static void neigh_timer_handler(unsigned long arg);
-static void __neigh_notify(struct neighbour *n, int type, int flags);
-static void neigh_update_notify(struct neighbour *neigh);
+static void __neigh_notify(struct neighbour *n, int type, int flags,
+ u32 pid);
+static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
#ifdef CONFIG_PROC_FS
@@ -99,7 +100,7 @@ static void neigh_cleanup_and_release(struct neighbour *neigh)
if (neigh->parms->neigh_cleanup)
neigh->parms->neigh_cleanup(neigh);
- __neigh_notify(neigh, RTM_DELNEIGH, 0);
+ __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
neigh_release(neigh);
}
@@ -949,7 +950,7 @@ out:
}
if (notify)
- neigh_update_notify(neigh);
+ neigh_update_notify(neigh, 0);
neigh_release(neigh);
}
@@ -1073,7 +1074,7 @@ static void neigh_update_hhs(struct neighbour *neigh)
*/
int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
- u32 flags)
+ u32 flags, u32 nlmsg_pid)
{
u8 old;
int err;
@@ -1230,7 +1231,7 @@ out:
write_unlock_bh(&neigh->lock);
if (notify)
- neigh_update_notify(neigh);
+ neigh_update_notify(neigh, nlmsg_pid);
return err;
}
@@ -1261,7 +1262,7 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl,
lladdr || !dev->addr_len);
if (neigh)
neigh_update(neigh, lladdr, NUD_STALE,
- NEIGH_UPDATE_F_OVERRIDE);
+ NEIGH_UPDATE_F_OVERRIDE, 0);
return neigh;
}
EXPORT_SYMBOL(neigh_event_ns);
@@ -1589,7 +1590,8 @@ static struct neigh_table *neigh_find_table(int family)
return tbl;
}
-static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ndmsg *ndm;
@@ -1639,14 +1641,16 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
err = neigh_update(neigh, NULL, NUD_FAILED,
NEIGH_UPDATE_F_OVERRIDE |
- NEIGH_UPDATE_F_ADMIN);
+ NEIGH_UPDATE_F_ADMIN,
+ NETLINK_CB(skb).portid);
neigh_release(neigh);
out:
return err;
}
-static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
struct net *net = sock_net(skb->sk);
@@ -1659,7 +1663,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
int err;
ASSERT_RTNL();
- err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
+ err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
if (err < 0)
goto out;
@@ -1730,7 +1734,8 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
neigh_event_send(neigh, NULL);
err = 0;
} else
- err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
+ err = neigh_update(neigh, lladdr, ndm->ndm_state, flags,
+ NETLINK_CB(skb).portid);
neigh_release(neigh);
out:
@@ -1933,7 +1938,8 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
[NDTPA_LOCKTIME] = { .type = NLA_U64 },
};
-static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct neigh_table *tbl;
@@ -1943,7 +1949,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
int err, tidx;
err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
- nl_neightbl_policy);
+ nl_neightbl_policy, extack);
if (err < 0)
goto errout;
@@ -1981,7 +1987,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
int i, ifindex = 0;
err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
- nl_ntbl_parm_policy);
+ nl_ntbl_parm_policy, extack);
if (err < 0)
goto errout_tbl_lock;
@@ -2230,10 +2236,10 @@ nla_put_failure:
return -EMSGSIZE;
}
-static void neigh_update_notify(struct neighbour *neigh)
+static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
{
call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
- __neigh_notify(neigh, RTM_NEWNEIGH, 0);
+ __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
}
static bool neigh_master_filtered(struct net_device *dev, int master_idx)
@@ -2272,7 +2278,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
unsigned int flags = NLM_F_MULTI;
int err;
- err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL);
+ err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL, NULL);
if (!err) {
if (tb[NDA_IFINDEX])
filter_idx = nla_get_u32(tb[NDA_IFINDEX]);
@@ -2831,7 +2837,8 @@ static inline size_t neigh_nlmsg_size(void)
+ nla_total_size(4); /* NDA_PROBES */
}
-static void __neigh_notify(struct neighbour *n, int type, int flags)
+static void __neigh_notify(struct neighbour *n, int type, int flags,
+ u32 pid)
{
struct net *net = dev_net(n->dev);
struct sk_buff *skb;
@@ -2841,7 +2848,7 @@ static void __neigh_notify(struct neighbour *n, int type, int flags)
if (skb == NULL)
goto errout;
- err = neigh_fill_info(skb, n, 0, 0, type, flags);
+ err = neigh_fill_info(skb, n, pid, 0, type, flags);
if (err < 0) {
/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
@@ -2857,7 +2864,7 @@ errout:
void neigh_app_ns(struct neighbour *n)
{
- __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
+ __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
}
EXPORT_SYMBOL(neigh_app_ns);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 652468ff65b7..1934efd4a9d4 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -35,7 +35,8 @@ LIST_HEAD(net_namespace_list);
EXPORT_SYMBOL_GPL(net_namespace_list);
struct net init_net = {
- .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
+ .count = ATOMIC_INIT(1),
+ .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
};
EXPORT_SYMBOL(init_net);
@@ -571,7 +572,8 @@ static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
[NETNSA_FD] = { .type = NLA_U32 },
};
-static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[NETNSA_MAX + 1];
@@ -579,7 +581,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
int nsid, err;
err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
- rtnl_net_policy);
+ rtnl_net_policy, extack);
if (err < 0)
return err;
if (!tb[NETNSA_NSID])
@@ -644,7 +646,8 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[NETNSA_MAX + 1];
@@ -653,7 +656,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
int err, id;
err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
- rtnl_net_policy);
+ rtnl_net_policy, extack);
if (err < 0)
return err;
if (tb[NETNSA_PID])
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 0f9275ee5595..1c4810919a0a 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -11,6 +11,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/module.h>
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index c4e84c558240..6e67315ec368 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -896,15 +896,13 @@ static size_t rtnl_port_size(const struct net_device *dev,
return port_self_size;
}
-static size_t rtnl_xdp_size(const struct net_device *dev)
+static size_t rtnl_xdp_size(void)
{
size_t xdp_size = nla_total_size(0) + /* nest IFLA_XDP */
- nla_total_size(1); /* XDP_ATTACHED */
+ nla_total_size(1) + /* XDP_ATTACHED */
+ nla_total_size(4); /* XDP_FLAGS */
- if (!dev->netdev_ops->ndo_xdp)
- return 0;
- else
- return xdp_size;
+ return xdp_size;
}
static noinline size_t if_nlmsg_size(const struct net_device *dev,
@@ -943,7 +941,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */
+ nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */
+ nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */
- + rtnl_xdp_size(dev) /* IFLA_XDP */
+ + rtnl_xdp_size() /* IFLA_XDP */
+ nla_total_size(1); /* IFLA_PROTO_DOWN */
}
@@ -1251,23 +1249,35 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
{
- struct netdev_xdp xdp_op = {};
struct nlattr *xdp;
+ u32 xdp_flags = 0;
+ u8 val = 0;
int err;
- if (!dev->netdev_ops->ndo_xdp)
- return 0;
xdp = nla_nest_start(skb, IFLA_XDP);
if (!xdp)
return -EMSGSIZE;
- xdp_op.command = XDP_QUERY_PROG;
- err = dev->netdev_ops->ndo_xdp(dev, &xdp_op);
- if (err)
- goto err_cancel;
- err = nla_put_u8(skb, IFLA_XDP_ATTACHED, xdp_op.prog_attached);
+ if (rcu_access_pointer(dev->xdp_prog)) {
+ xdp_flags = XDP_FLAGS_SKB_MODE;
+ val = 1;
+ } else if (dev->netdev_ops->ndo_xdp) {
+ struct netdev_xdp xdp_op = {};
+
+ xdp_op.command = XDP_QUERY_PROG;
+ err = dev->netdev_ops->ndo_xdp(dev, &xdp_op);
+ if (err)
+ goto err_cancel;
+ val = xdp_op.prog_attached;
+ }
+ err = nla_put_u8(skb, IFLA_XDP_ATTACHED, val);
if (err)
goto err_cancel;
+ if (xdp_flags) {
+ err = nla_put_u32(skb, IFLA_XDP_FLAGS, xdp_flags);
+ if (err)
+ goto err_cancel;
+ }
nla_nest_end(skb, xdp);
return 0;
@@ -1515,7 +1525,8 @@ static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla
const struct rtnl_link_ops *ops = NULL;
struct nlattr *linfo[IFLA_INFO_MAX + 1];
- if (nla_parse_nested(linfo, IFLA_INFO_MAX, nla, ifla_info_policy) < 0)
+ if (nla_parse_nested(linfo, IFLA_INFO_MAX, nla,
+ ifla_info_policy, NULL) < 0)
return NULL;
if (linfo[IFLA_INFO_KIND]) {
@@ -1592,8 +1603,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
hdrlen = nlmsg_len(cb->nlh) < sizeof(struct ifinfomsg) ?
sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg);
- if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) {
-
+ if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX,
+ ifla_policy, NULL) >= 0) {
if (tb[IFLA_EXT_MASK])
ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
@@ -1640,9 +1651,10 @@ out:
return skb->len;
}
-int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len)
+int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len,
+ struct netlink_ext_ack *exterr)
{
- return nla_parse(tb, IFLA_MAX, head, len, ifla_policy);
+ return nla_parse(tb, IFLA_MAX, head, len, ifla_policy, exterr);
}
EXPORT_SYMBOL(rtnl_nla_parse_ifla);
@@ -1907,6 +1919,7 @@ static int do_set_master(struct net_device *dev, int ifindex)
#define DO_SETLINK_NOTIFY 0x03
static int do_setlink(const struct sk_buff *skb,
struct net_device *dev, struct ifinfomsg *ifm,
+ struct netlink_ext_ack *extack,
struct nlattr **tb, char *ifname, int status)
{
const struct net_device_ops *ops = dev->netdev_ops;
@@ -2078,7 +2091,7 @@ static int do_setlink(const struct sk_buff *skb,
goto errout;
}
err = nla_parse_nested(vfinfo, IFLA_VF_MAX, attr,
- ifla_vf_policy);
+ ifla_vf_policy, NULL);
if (err < 0)
goto errout;
err = do_setvfinfo(dev, vfinfo);
@@ -2106,7 +2119,7 @@ static int do_setlink(const struct sk_buff *skb,
goto errout;
}
err = nla_parse_nested(port, IFLA_PORT_MAX, attr,
- ifla_port_policy);
+ ifla_port_policy, NULL);
if (err < 0)
goto errout;
if (!port[IFLA_PORT_VF]) {
@@ -2126,7 +2139,8 @@ static int do_setlink(const struct sk_buff *skb,
struct nlattr *port[IFLA_PORT_MAX+1];
err = nla_parse_nested(port, IFLA_PORT_MAX,
- tb[IFLA_PORT_SELF], ifla_port_policy);
+ tb[IFLA_PORT_SELF], ifla_port_policy,
+ NULL);
if (err < 0)
goto errout;
@@ -2170,7 +2184,7 @@ static int do_setlink(const struct sk_buff *skb,
u32 xdp_flags = 0;
err = nla_parse_nested(xdp, IFLA_XDP_MAX, tb[IFLA_XDP],
- ifla_xdp_policy);
+ ifla_xdp_policy, NULL);
if (err < 0)
goto errout;
@@ -2188,7 +2202,7 @@ static int do_setlink(const struct sk_buff *skb,
}
if (xdp[IFLA_XDP_FD]) {
- err = dev_change_xdp_fd(dev,
+ err = dev_change_xdp_fd(dev, extack,
nla_get_s32(xdp[IFLA_XDP_FD]),
xdp_flags);
if (err)
@@ -2210,7 +2224,8 @@ errout:
return err;
}
-static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ifinfomsg *ifm;
@@ -2219,7 +2234,8 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
struct nlattr *tb[IFLA_MAX+1];
char ifname[IFNAMSIZ];
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy,
+ extack);
if (err < 0)
goto errout;
@@ -2246,7 +2262,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
if (err < 0)
goto errout;
- err = do_setlink(skb, dev, ifm, tb, ifname, 0);
+ err = do_setlink(skb, dev, ifm, extack, tb, ifname, 0);
errout:
return err;
}
@@ -2303,7 +2319,8 @@ int rtnl_delete_link(struct net_device *dev)
}
EXPORT_SYMBOL_GPL(rtnl_delete_link);
-static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct net_device *dev;
@@ -2312,7 +2329,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
struct nlattr *tb[IFLA_MAX+1];
int err;
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack);
if (err < 0)
return err;
@@ -2407,6 +2424,7 @@ EXPORT_SYMBOL(rtnl_create_link);
static int rtnl_group_changelink(const struct sk_buff *skb,
struct net *net, int group,
struct ifinfomsg *ifm,
+ struct netlink_ext_ack *extack,
struct nlattr **tb)
{
struct net_device *dev, *aux;
@@ -2414,7 +2432,7 @@ static int rtnl_group_changelink(const struct sk_buff *skb,
for_each_netdev_safe(net, dev, aux) {
if (dev->group == group) {
- err = do_setlink(skb, dev, ifm, tb, NULL, 0);
+ err = do_setlink(skb, dev, ifm, extack, tb, NULL, 0);
if (err < 0)
return err;
}
@@ -2423,7 +2441,8 @@ static int rtnl_group_changelink(const struct sk_buff *skb,
return 0;
}
-static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
const struct rtnl_link_ops *ops;
@@ -2441,7 +2460,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh)
#ifdef CONFIG_MODULES
replay:
#endif
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack);
if (err < 0)
return err;
@@ -2472,7 +2491,8 @@ replay:
if (tb[IFLA_LINKINFO]) {
err = nla_parse_nested(linkinfo, IFLA_INFO_MAX,
- tb[IFLA_LINKINFO], ifla_info_policy);
+ tb[IFLA_LINKINFO], ifla_info_policy,
+ NULL);
if (err < 0)
return err;
} else
@@ -2497,7 +2517,7 @@ replay:
if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) {
err = nla_parse_nested(attr, ops->maxtype,
linkinfo[IFLA_INFO_DATA],
- ops->policy);
+ ops->policy, NULL);
if (err < 0)
return err;
data = attr;
@@ -2515,7 +2535,8 @@ replay:
err = nla_parse_nested(slave_attr,
m_ops->slave_maxtype,
linkinfo[IFLA_INFO_SLAVE_DATA],
- m_ops->slave_policy);
+ m_ops->slave_policy,
+ NULL);
if (err < 0)
return err;
slave_data = slave_attr;
@@ -2557,14 +2578,15 @@ replay:
status |= DO_SETLINK_NOTIFY;
}
- return do_setlink(skb, dev, ifm, tb, ifname, status);
+ return do_setlink(skb, dev, ifm, extack, tb, ifname,
+ status);
}
if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
if (ifm->ifi_index == 0 && tb[IFLA_GROUP])
return rtnl_group_changelink(skb, net,
nla_get_u32(tb[IFLA_GROUP]),
- ifm, tb);
+ ifm, extack, tb);
return -ENODEV;
}
@@ -2673,7 +2695,8 @@ out_unregister:
}
}
-static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh)
+static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ifinfomsg *ifm;
@@ -2684,7 +2707,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh)
int err;
u32 ext_filter_mask = 0;
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack);
if (err < 0)
return err;
@@ -2734,7 +2757,7 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ?
sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg);
- if (nlmsg_parse(nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) {
+ if (nlmsg_parse(nlh, hdrlen, tb, IFLA_MAX, ifla_policy, NULL) >= 0) {
if (tb[IFLA_EXT_MASK])
ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
}
@@ -2955,7 +2978,8 @@ static int fdb_vid_parse(struct nlattr *vlan_attr, u16 *p_vid)
return 0;
}
-static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ndmsg *ndm;
@@ -2965,7 +2989,7 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
u16 vid;
int err;
- err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
+ err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
if (err < 0)
return err;
@@ -3055,7 +3079,8 @@ int ndo_dflt_fdb_del(struct ndmsg *ndm,
}
EXPORT_SYMBOL(ndo_dflt_fdb_del);
-static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ndmsg *ndm;
@@ -3068,7 +3093,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
- err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
+ err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
if (err < 0)
return err;
@@ -3203,8 +3228,8 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
int err = 0;
int fidx = 0;
- if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX,
- ifla_policy) == 0) {
+ if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb,
+ IFLA_MAX, ifla_policy, NULL) == 0) {
if (tb[IFLA_MASTER])
br_idx = nla_get_u32(tb[IFLA_MASTER]);
}
@@ -3498,7 +3523,8 @@ errout:
return err;
}
-static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ifinfomsg *ifm;
@@ -3572,7 +3598,8 @@ out:
return err;
}
-static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ifinfomsg *ifm;
@@ -3940,7 +3967,8 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
return size;
}
-static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct net_device *dev = NULL;
@@ -4046,7 +4074,8 @@ out:
/* Process one rtnetlink message. */
-static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
rtnl_doit_func doit;
@@ -4101,7 +4130,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
if (doit == NULL)
return -EOPNOTSUPP;
- return doit(skb, nlh);
+ return doit(skb, nlh, extack);
}
static void rtnetlink_rcv(struct sk_buff *skb)
@@ -4116,22 +4145,16 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
switch (event) {
- case NETDEV_UP:
- case NETDEV_DOWN:
- case NETDEV_PRE_UP:
- case NETDEV_POST_INIT:
- case NETDEV_REGISTER:
- case NETDEV_CHANGE:
- case NETDEV_PRE_TYPE_CHANGE:
- case NETDEV_GOING_DOWN:
- case NETDEV_UNREGISTER:
- case NETDEV_UNREGISTER_FINAL:
- case NETDEV_RELEASE:
- case NETDEV_JOIN:
- case NETDEV_BONDING_INFO:
+ case NETDEV_REBOOT:
+ case NETDEV_CHANGENAME:
+ case NETDEV_FEAT_CHANGE:
+ case NETDEV_BONDING_FAILOVER:
+ case NETDEV_NOTIFY_PEERS:
+ case NETDEV_RESEND_IGMP:
+ case NETDEV_CHANGEINFODATA:
+ rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
break;
default:
- rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
break;
}
return NOTIFY_DONE;
@@ -4185,6 +4208,7 @@ void __init rtnetlink_init(void)
rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, NULL);
rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, NULL);
rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL);
rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL);
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index d28da7d363f1..6bd2f8fb0476 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -64,8 +64,8 @@ static u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr)
&ts_secret);
}
-u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
- __be16 sport, __be16 dport, u32 *tsoff)
+u32 secure_tcpv6_seq_and_tsoff(const __be32 *saddr, const __be32 *daddr,
+ __be16 sport, __be16 dport, u32 *tsoff)
{
const struct {
struct in6_addr saddr;
@@ -85,7 +85,7 @@ u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
*tsoff = secure_tcpv6_ts_off(saddr, daddr);
return seq_scale(hash);
}
-EXPORT_SYMBOL(secure_tcpv6_sequence_number);
+EXPORT_SYMBOL(secure_tcpv6_seq_and_tsoff);
u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
__be16 dport)
@@ -116,14 +116,13 @@ static u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr)
&ts_secret);
}
-/* secure_tcp_sequence_number(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d),
+/* secure_tcp_seq_and_tsoff(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d),
* but fortunately, `sport' cannot be 0 in any circumstances. If this changes,
* it would be easy enough to have the former function use siphash_4u32, passing
* the arguments as separate u32.
*/
-
-u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
- __be16 sport, __be16 dport, u32 *tsoff)
+u32 secure_tcp_seq_and_tsoff(__be32 saddr, __be32 daddr,
+ __be16 sport, __be16 dport, u32 *tsoff)
{
u64 hash;
net_secret_init();
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b782b4593f8c..346d3e85dfbc 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3101,7 +3101,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
skb_walk_frags(head_skb, iter) {
if (frag_len != iter->len && iter->next)
goto normal;
- if (skb_headlen(iter))
+ if (skb_headlen(iter) && !iter->head_frag)
goto normal;
len -= iter->len;
diff --git a/net/core/sock.c b/net/core/sock.c
index b416a537bd0a..b5baeb9cb0fb 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -247,12 +247,66 @@ static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = {
static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = {
_sock_locks("k-clock-")
};
+static const char *const af_family_rlock_key_strings[AF_MAX+1] = {
+ "rlock-AF_UNSPEC", "rlock-AF_UNIX" , "rlock-AF_INET" ,
+ "rlock-AF_AX25" , "rlock-AF_IPX" , "rlock-AF_APPLETALK",
+ "rlock-AF_NETROM", "rlock-AF_BRIDGE" , "rlock-AF_ATMPVC" ,
+ "rlock-AF_X25" , "rlock-AF_INET6" , "rlock-AF_ROSE" ,
+ "rlock-AF_DECnet", "rlock-AF_NETBEUI" , "rlock-AF_SECURITY" ,
+ "rlock-AF_KEY" , "rlock-AF_NETLINK" , "rlock-AF_PACKET" ,
+ "rlock-AF_ASH" , "rlock-AF_ECONET" , "rlock-AF_ATMSVC" ,
+ "rlock-AF_RDS" , "rlock-AF_SNA" , "rlock-AF_IRDA" ,
+ "rlock-AF_PPPOX" , "rlock-AF_WANPIPE" , "rlock-AF_LLC" ,
+ "rlock-27" , "rlock-28" , "rlock-AF_CAN" ,
+ "rlock-AF_TIPC" , "rlock-AF_BLUETOOTH", "rlock-AF_IUCV" ,
+ "rlock-AF_RXRPC" , "rlock-AF_ISDN" , "rlock-AF_PHONET" ,
+ "rlock-AF_IEEE802154", "rlock-AF_CAIF" , "rlock-AF_ALG" ,
+ "rlock-AF_NFC" , "rlock-AF_VSOCK" , "rlock-AF_KCM" ,
+ "rlock-AF_QIPCRTR", "rlock-AF_SMC" , "rlock-AF_MAX"
+};
+static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
+ "wlock-AF_UNSPEC", "wlock-AF_UNIX" , "wlock-AF_INET" ,
+ "wlock-AF_AX25" , "wlock-AF_IPX" , "wlock-AF_APPLETALK",
+ "wlock-AF_NETROM", "wlock-AF_BRIDGE" , "wlock-AF_ATMPVC" ,
+ "wlock-AF_X25" , "wlock-AF_INET6" , "wlock-AF_ROSE" ,
+ "wlock-AF_DECnet", "wlock-AF_NETBEUI" , "wlock-AF_SECURITY" ,
+ "wlock-AF_KEY" , "wlock-AF_NETLINK" , "wlock-AF_PACKET" ,
+ "wlock-AF_ASH" , "wlock-AF_ECONET" , "wlock-AF_ATMSVC" ,
+ "wlock-AF_RDS" , "wlock-AF_SNA" , "wlock-AF_IRDA" ,
+ "wlock-AF_PPPOX" , "wlock-AF_WANPIPE" , "wlock-AF_LLC" ,
+ "wlock-27" , "wlock-28" , "wlock-AF_CAN" ,
+ "wlock-AF_TIPC" , "wlock-AF_BLUETOOTH", "wlock-AF_IUCV" ,
+ "wlock-AF_RXRPC" , "wlock-AF_ISDN" , "wlock-AF_PHONET" ,
+ "wlock-AF_IEEE802154", "wlock-AF_CAIF" , "wlock-AF_ALG" ,
+ "wlock-AF_NFC" , "wlock-AF_VSOCK" , "wlock-AF_KCM" ,
+ "wlock-AF_QIPCRTR", "wlock-AF_SMC" , "wlock-AF_MAX"
+};
+static const char *const af_family_elock_key_strings[AF_MAX+1] = {
+ "elock-AF_UNSPEC", "elock-AF_UNIX" , "elock-AF_INET" ,
+ "elock-AF_AX25" , "elock-AF_IPX" , "elock-AF_APPLETALK",
+ "elock-AF_NETROM", "elock-AF_BRIDGE" , "elock-AF_ATMPVC" ,
+ "elock-AF_X25" , "elock-AF_INET6" , "elock-AF_ROSE" ,
+ "elock-AF_DECnet", "elock-AF_NETBEUI" , "elock-AF_SECURITY" ,
+ "elock-AF_KEY" , "elock-AF_NETLINK" , "elock-AF_PACKET" ,
+ "elock-AF_ASH" , "elock-AF_ECONET" , "elock-AF_ATMSVC" ,
+ "elock-AF_RDS" , "elock-AF_SNA" , "elock-AF_IRDA" ,
+ "elock-AF_PPPOX" , "elock-AF_WANPIPE" , "elock-AF_LLC" ,
+ "elock-27" , "elock-28" , "elock-AF_CAN" ,
+ "elock-AF_TIPC" , "elock-AF_BLUETOOTH", "elock-AF_IUCV" ,
+ "elock-AF_RXRPC" , "elock-AF_ISDN" , "elock-AF_PHONET" ,
+ "elock-AF_IEEE802154", "elock-AF_CAIF" , "elock-AF_ALG" ,
+ "elock-AF_NFC" , "elock-AF_VSOCK" , "elock-AF_KCM" ,
+ "elock-AF_QIPCRTR", "elock-AF_SMC" , "elock-AF_MAX"
+};
/*
- * sk_callback_lock locking rules are per-address-family,
+ * sk_callback_lock and sk queues locking rules are per-address-family,
* so split the lock classes by using a per-AF key:
*/
static struct lock_class_key af_callback_keys[AF_MAX];
+static struct lock_class_key af_rlock_keys[AF_MAX];
+static struct lock_class_key af_wlock_keys[AF_MAX];
+static struct lock_class_key af_elock_keys[AF_MAX];
static struct lock_class_key af_kern_callback_keys[AF_MAX];
/* Take into consideration the size of the struct sk_buff overhead in the
@@ -1029,6 +1083,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
union {
int val;
+ u64 val64;
struct linger ling;
struct timeval tm;
} v;
@@ -1259,6 +1314,40 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val = sk->sk_incoming_cpu;
break;
+ case SO_MEMINFO:
+ {
+ u32 meminfo[SK_MEMINFO_VARS];
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ sk_get_meminfo(sk, meminfo);
+
+ len = min_t(unsigned int, len, sizeof(meminfo));
+ if (copy_to_user(optval, &meminfo, len))
+ return -EFAULT;
+
+ goto lenout;
+ }
+
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ case SO_INCOMING_NAPI_ID:
+ v.val = READ_ONCE(sk->sk_napi_id);
+
+ /* aggregate non-NAPI IDs down to 0 */
+ if (v.val < MIN_NAPI_ID)
+ v.val = 0;
+
+ break;
+#endif
+
+ case SO_COOKIE:
+ lv = sizeof(u64);
+ if (len < lv)
+ return -EINVAL;
+ v.val64 = sock_gen_cookie(sk);
+ break;
+
default:
/* We implement the SO_SNDLOWAT etc to not be settable
* (1003.1g 7).
@@ -1483,6 +1572,27 @@ void sk_free(struct sock *sk)
}
EXPORT_SYMBOL(sk_free);
+static void sk_init_common(struct sock *sk)
+{
+ skb_queue_head_init(&sk->sk_receive_queue);
+ skb_queue_head_init(&sk->sk_write_queue);
+ skb_queue_head_init(&sk->sk_error_queue);
+
+ rwlock_init(&sk->sk_callback_lock);
+ lockdep_set_class_and_name(&sk->sk_receive_queue.lock,
+ af_rlock_keys + sk->sk_family,
+ af_family_rlock_key_strings[sk->sk_family]);
+ lockdep_set_class_and_name(&sk->sk_write_queue.lock,
+ af_wlock_keys + sk->sk_family,
+ af_family_wlock_key_strings[sk->sk_family]);
+ lockdep_set_class_and_name(&sk->sk_error_queue.lock,
+ af_elock_keys + sk->sk_family,
+ af_family_elock_key_strings[sk->sk_family]);
+ lockdep_set_class_and_name(&sk->sk_callback_lock,
+ af_callback_keys + sk->sk_family,
+ af_family_clock_key_strings[sk->sk_family]);
+}
+
/**
* sk_clone_lock - clone a socket, and lock its clone
* @sk: the socket to clone
@@ -1516,13 +1626,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
*/
atomic_set(&newsk->sk_wmem_alloc, 1);
atomic_set(&newsk->sk_omem_alloc, 0);
- skb_queue_head_init(&newsk->sk_receive_queue);
- skb_queue_head_init(&newsk->sk_write_queue);
-
- rwlock_init(&newsk->sk_callback_lock);
- lockdep_set_class_and_name(&newsk->sk_callback_lock,
- af_callback_keys + newsk->sk_family,
- af_family_clock_key_strings[newsk->sk_family]);
+ sk_init_common(newsk);
newsk->sk_dst_cache = NULL;
newsk->sk_dst_pending_confirm = 0;
@@ -1533,7 +1637,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
sock_reset_flag(newsk, SOCK_DONE);
- skb_queue_head_init(&newsk->sk_error_queue);
filter = rcu_dereference_protected(newsk->sk_filter, 1);
if (filter != NULL)
@@ -2466,10 +2569,7 @@ EXPORT_SYMBOL(sk_stop_timer);
void sock_init_data(struct socket *sock, struct sock *sk)
{
- skb_queue_head_init(&sk->sk_receive_queue);
- skb_queue_head_init(&sk->sk_write_queue);
- skb_queue_head_init(&sk->sk_error_queue);
-
+ sk_init_common(sk);
sk->sk_send_head = NULL;
init_timer(&sk->sk_timer);
@@ -2521,7 +2621,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
- sk->sk_stamp = ktime_set(-1L, 0);
+ sk->sk_stamp = SK_DEFAULT_STAMP;
#ifdef CONFIG_NET_RX_BUSY_POLL
sk->sk_napi_id = 0;
@@ -2802,6 +2902,21 @@ void sk_common_release(struct sock *sk)
}
EXPORT_SYMBOL(sk_common_release);
+void sk_get_meminfo(const struct sock *sk, u32 *mem)
+{
+ memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS);
+
+ mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
+ mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
+ mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
+ mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
+ mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
+ mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
+ mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
+ mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
+ mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
+}
+
#ifdef CONFIG_PROC_FS
#define PROTO_INUSE_NR 64 /* should be enough for the first time */
struct prot_inuse {
@@ -3142,3 +3257,14 @@ static int __init proto_init(void)
subsys_initcall(proto_init);
#endif /* PROC_FS */
+
+#ifdef CONFIG_NET_RX_BUSY_POLL
+bool sk_busy_loop_end(void *p, unsigned long start_time)
+{
+ struct sock *sk = p;
+
+ return !skb_queue_empty(&sk->sk_receive_queue) ||
+ sk_busy_loop_timeout(sk, start_time);
+}
+EXPORT_SYMBOL(sk_busy_loop_end);
+#endif /* CONFIG_NET_RX_BUSY_POLL */
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 6b10573cc9fa..217f4e3b82f6 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -19,7 +19,7 @@ static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
static DEFINE_MUTEX(sock_diag_table_mutex);
static struct workqueue_struct *broadcast_wq;
-static u64 sock_gen_cookie(struct sock *sk)
+u64 sock_gen_cookie(struct sock *sk)
{
while (1) {
u64 res = atomic64_read(&sk->sk_cookie);
@@ -59,15 +59,7 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype)
{
u32 mem[SK_MEMINFO_VARS];
- mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
- mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
- mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
- mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
- mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
- mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
- mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
- mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
- mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
+ sk_get_meminfo(sk, mem);
return nla_put(skb, attrtype, sizeof(mem), &mem);
}
@@ -246,7 +238,8 @@ static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh)
return err;
}
-static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
int ret;
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 9a1a352fd1eb..eed1ebf7f29d 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -13,9 +13,9 @@
static DEFINE_SPINLOCK(reuseport_lock);
-static struct sock_reuseport *__reuseport_alloc(u16 max_socks)
+static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks)
{
- size_t size = sizeof(struct sock_reuseport) +
+ unsigned int size = sizeof(struct sock_reuseport) +
sizeof(struct sock *) * max_socks;
struct sock_reuseport *reuse = kzalloc(size, GFP_ATOMIC);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 7f9cc400eca0..ea23254b2457 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -452,6 +452,14 @@ static struct ctl_table net_core_table[] = {
.extra1 = &one,
.extra2 = &max_skb_frags,
},
+ {
+ .procname = "netdev_budget_usecs",
+ .data = &netdev_budget_usecs,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ },
{ }
};
diff --git a/net/core/utils.c b/net/core/utils.c
index 32c467cf52d6..93066bd0305a 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -53,7 +53,7 @@ EXPORT_SYMBOL(net_ratelimit);
__be32 in_aton(const char *str)
{
- unsigned long l;
+ unsigned int l;
unsigned int val;
int i;