summaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c160
-rw-r--r--net/core/dst.c1
-rw-r--r--net/core/ethtool.c40
-rw-r--r--net/core/filter.c252
-rw-r--r--net/core/flow_dissector.c57
-rw-r--r--net/core/lwt_bpf.c4
-rw-r--r--net/core/lwtunnel.c4
-rw-r--r--net/core/pktgen.c4
-rw-r--r--net/core/request_sock.c2
-rw-r--r--net/core/rtnetlink.c60
-rw-r--r--net/core/scm.c2
-rw-r--r--net/core/secure_seq.c147
-rw-r--r--net/core/skbuff.c20
-rw-r--r--net/core/sock.c15
-rw-r--r--net/core/sysctl_net_core.c31
15 files changed, 432 insertions, 367 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 29101c98399f..0921609dfa81 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2403,28 +2403,6 @@ void netif_schedule_queue(struct netdev_queue *txq)
}
EXPORT_SYMBOL(netif_schedule_queue);
-/**
- * netif_wake_subqueue - allow sending packets on subqueue
- * @dev: network device
- * @queue_index: sub queue index
- *
- * Resume individual transmit queue of a device with multiple transmit queues.
- */
-void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
-{
- struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
-
- if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state)) {
- struct Qdisc *q;
-
- rcu_read_lock();
- q = rcu_dereference(txq->qdisc);
- __netif_schedule(q);
- rcu_read_unlock();
- }
-}
-EXPORT_SYMBOL(netif_wake_subqueue);
-
void netif_tx_wake_queue(struct netdev_queue *dev_queue)
{
if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state)) {
@@ -2654,9 +2632,10 @@ EXPORT_SYMBOL(skb_mac_gso_segment);
static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
{
if (tx_path)
- return skb->ip_summed != CHECKSUM_PARTIAL;
- else
- return skb->ip_summed == CHECKSUM_NONE;
+ return skb->ip_summed != CHECKSUM_PARTIAL &&
+ skb->ip_summed != CHECKSUM_NONE;
+
+ return skb->ip_summed == CHECKSUM_NONE;
}
/**
@@ -2675,11 +2654,12 @@ static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
netdev_features_t features, bool tx_path)
{
+ struct sk_buff *segs;
+
if (unlikely(skb_needs_check(skb, tx_path))) {
int err;
- skb_warn_bad_offload(skb);
-
+ /* We're going to init ->check field in TCP or UDP header */
err = skb_cow_head(skb, 0);
if (err < 0)
return ERR_PTR(err);
@@ -2707,7 +2687,12 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
skb_reset_mac_header(skb);
skb_reset_mac_len(skb);
- return skb_mac_gso_segment(skb, features);
+ segs = skb_mac_gso_segment(skb, features);
+
+ if (unlikely(skb_needs_check(skb, tx_path)))
+ skb_warn_bad_offload(skb);
+
+ return segs;
}
EXPORT_SYMBOL(__skb_gso_segment);
@@ -3148,9 +3133,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
if (!cl)
return skb;
- /* skb->tc_verd and qdisc_skb_cb(skb)->pkt_len were already set
- * earlier by the caller.
- */
+ /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
qdisc_bstats_cpu_update(cl->q, skb);
switch (tc_classify(skb, cl, &cl_res, false)) {
@@ -3315,7 +3298,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
qdisc_pkt_len_init(skb);
#ifdef CONFIG_NET_CLS_ACT
- skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
+ skb->tc_at_ingress = 0;
# ifdef CONFIG_NET_EGRESS
if (static_key_false(&egress_needed)) {
skb = sch_handle_egress(skb, &rc, dev);
@@ -3422,7 +3405,11 @@ EXPORT_SYMBOL(netdev_max_backlog);
int netdev_tstamp_prequeue __read_mostly = 1;
int netdev_budget __read_mostly = 300;
-int weight_p __read_mostly = 64; /* old backlog weight */
+int weight_p __read_mostly = 64; /* old backlog weight */
+int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */
+int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */
+int dev_rx_weight __read_mostly = 64;
+int dev_tx_weight __read_mostly = 64;
/* Called with irq disabled */
static inline void ____napi_schedule(struct softnet_data *sd,
@@ -3911,7 +3898,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
}
qdisc_skb_cb(skb)->pkt_len = skb->len;
- skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
+ skb->tc_at_ingress = 1;
qdisc_bstats_cpu_update(cl->q, skb);
switch (tc_classify(skb, cl, &cl_res, false)) {
@@ -3976,9 +3963,7 @@ int netdev_rx_handler_register(struct net_device *dev,
rx_handler_func_t *rx_handler,
void *rx_handler_data)
{
- ASSERT_RTNL();
-
- if (dev->rx_handler)
+ if (netdev_is_rx_handler_busy(dev))
return -EBUSY;
/* Note: rx_handler_data must be set before rx_handler */
@@ -4084,12 +4069,8 @@ another_round:
goto out;
}
-#ifdef CONFIG_NET_CLS_ACT
- if (skb->tc_verd & TC_NCLS) {
- skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
- goto ncls;
- }
-#endif
+ if (skb_skip_tc_classify(skb))
+ goto skip_classify;
if (pfmemalloc)
goto skip_taps;
@@ -4117,10 +4098,8 @@ skip_taps:
goto out;
}
#endif
-#ifdef CONFIG_NET_CLS_ACT
- skb->tc_verd = 0;
-ncls:
-#endif
+ skb_reset_tc(skb);
+skip_classify:
if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
goto drop;
@@ -4616,6 +4595,7 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
case GRO_MERGED_FREE:
if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) {
skb_dst_drop(skb);
+ secpath_reset(skb);
kmem_cache_free(skbuff_head_cache, skb);
} else {
__kfree_skb(skb);
@@ -4656,6 +4636,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
skb->encapsulation = 0;
skb_shinfo(skb)->gso_type = 0;
skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
+ secpath_reset(skb);
napi->skb = skb;
}
@@ -4830,7 +4811,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
net_rps_action_and_irq_enable(sd);
}
- napi->weight = weight_p;
+ napi->weight = dev_rx_weight;
while (again) {
struct sk_buff *skb;
@@ -4897,23 +4878,6 @@ void __napi_schedule_irqoff(struct napi_struct *n)
}
EXPORT_SYMBOL(__napi_schedule_irqoff);
-bool __napi_complete(struct napi_struct *n)
-{
- BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
-
- /* Some drivers call us directly, instead of calling
- * napi_complete_done().
- */
- if (unlikely(test_bit(NAPI_STATE_IN_BUSY_POLL, &n->state)))
- return false;
-
- list_del_init(&n->poll_list);
- smp_mb__before_atomic();
- clear_bit(NAPI_STATE_SCHED, &n->state);
- return true;
-}
-EXPORT_SYMBOL(__napi_complete);
-
bool napi_complete_done(struct napi_struct *n, int work_done)
{
unsigned long flags;
@@ -4940,14 +4904,13 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
else
napi_gro_flush(n, false);
}
- if (likely(list_empty(&n->poll_list))) {
- WARN_ON_ONCE(!test_and_clear_bit(NAPI_STATE_SCHED, &n->state));
- } else {
+ if (unlikely(!list_empty(&n->poll_list))) {
/* If n->poll_list is not empty, we need to mask irqs */
local_irq_save(flags);
- __napi_complete(n);
+ list_del_init(&n->poll_list);
local_irq_restore(flags);
}
+ WARN_ON_ONCE(!test_and_clear_bit(NAPI_STATE_SCHED, &n->state));
return true;
}
EXPORT_SYMBOL(napi_complete_done);
@@ -4993,7 +4956,6 @@ bool sk_busy_loop(struct sock *sk, int nonblock)
{
unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0;
int (*napi_poll)(struct napi_struct *napi, int budget);
- int (*busy_poll)(struct napi_struct *dev);
void *have_poll_lock = NULL;
struct napi_struct *napi;
int rc;
@@ -5008,17 +4970,10 @@ restart:
if (!napi)
goto out;
- /* Note: ndo_busy_poll method is optional in linux-4.5 */
- busy_poll = napi->dev->netdev_ops->ndo_busy_poll;
-
preempt_disable();
for (;;) {
rc = 0;
local_bh_disable();
- if (busy_poll) {
- rc = busy_poll(napi);
- goto count;
- }
if (!napi_poll) {
unsigned long val = READ_ONCE(napi->state);
@@ -6151,50 +6106,6 @@ void netdev_lower_state_changed(struct net_device *lower_dev,
}
EXPORT_SYMBOL(netdev_lower_state_changed);
-int netdev_default_l2upper_neigh_construct(struct net_device *dev,
- struct neighbour *n)
-{
- struct net_device *lower_dev, *stop_dev;
- struct list_head *iter;
- int err;
-
- netdev_for_each_lower_dev(dev, lower_dev, iter) {
- if (!lower_dev->netdev_ops->ndo_neigh_construct)
- continue;
- err = lower_dev->netdev_ops->ndo_neigh_construct(lower_dev, n);
- if (err) {
- stop_dev = lower_dev;
- goto rollback;
- }
- }
- return 0;
-
-rollback:
- netdev_for_each_lower_dev(dev, lower_dev, iter) {
- if (lower_dev == stop_dev)
- break;
- if (!lower_dev->netdev_ops->ndo_neigh_destroy)
- continue;
- lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n);
- }
- return err;
-}
-EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_construct);
-
-void netdev_default_l2upper_neigh_destroy(struct net_device *dev,
- struct neighbour *n)
-{
- struct net_device *lower_dev;
- struct list_head *iter;
-
- netdev_for_each_lower_dev(dev, lower_dev, iter) {
- if (!lower_dev->netdev_ops->ndo_neigh_destroy)
- continue;
- lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n);
- }
-}
-EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_destroy);
-
static void dev_change_rx_flags(struct net_device *dev, int flags)
{
const struct net_device_ops *ops = dev->netdev_ops;
@@ -6971,13 +6882,6 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
features &= ~dev->gso_partial_features;
}
-#ifdef CONFIG_NET_RX_BUSY_POLL
- if (dev->netdev_ops->ndo_busy_poll)
- features |= NETIF_F_BUSY_POLL;
- else
-#endif
- features &= ~NETIF_F_BUSY_POLL;
-
return features;
}
diff --git a/net/core/dst.c b/net/core/dst.c
index b5cbbe07f786..960e503b5a52 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -190,7 +190,6 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
dst->__use = 0;
dst->lastuse = jiffies;
dst->flags = flags;
- dst->pending_confirm = 0;
dst->next = NULL;
if (!(flags & DST_NOCOUNT))
dst_entries_add(ops, 1);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index d92de0a1f0a4..be7bab1adcde 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -102,7 +102,6 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_RXFCS_BIT] = "rx-fcs",
[NETIF_F_RXALL_BIT] = "rx-all",
[NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload",
- [NETIF_F_BUSY_POLL_BIT] = "busy-poll",
[NETIF_F_HW_TC_BIT] = "hw-tc-offload",
};
@@ -1820,11 +1819,13 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
ret = __ethtool_get_sset_count(dev, gstrings.string_set);
if (ret < 0)
return ret;
+ if (ret > S32_MAX / ETH_GSTRING_LEN)
+ return -ENOMEM;
+ WARN_ON_ONCE(!ret);
gstrings.len = ret;
-
- data = kcalloc(gstrings.len, ETH_GSTRING_LEN, GFP_USER);
- if (!data)
+ data = vzalloc(gstrings.len * ETH_GSTRING_LEN);
+ if (gstrings.len && !data)
return -ENOMEM;
__ethtool_get_strings(dev, gstrings.string_set, data);
@@ -1833,12 +1834,13 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
goto out;
useraddr += sizeof(gstrings);
- if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))
+ if (gstrings.len &&
+ copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))
goto out;
ret = 0;
out:
- kfree(data);
+ vfree(data);
return ret;
}
@@ -1915,14 +1917,15 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
n_stats = ops->get_sset_count(dev, ETH_SS_STATS);
if (n_stats < 0)
return n_stats;
- WARN_ON(n_stats == 0);
-
+ if (n_stats > S32_MAX / sizeof(u64))
+ return -ENOMEM;
+ WARN_ON_ONCE(!n_stats);
if (copy_from_user(&stats, useraddr, sizeof(stats)))
return -EFAULT;
stats.n_stats = n_stats;
- data = kmalloc(n_stats * sizeof(u64), GFP_USER);
- if (!data)
+ data = vzalloc(n_stats * sizeof(u64));
+ if (n_stats && !data)
return -ENOMEM;
ops->get_ethtool_stats(dev, &stats, data);
@@ -1931,12 +1934,12 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
if (copy_to_user(useraddr, &stats, sizeof(stats)))
goto out;
useraddr += sizeof(stats);
- if (copy_to_user(useraddr, data, stats.n_stats * sizeof(u64)))
+ if (n_stats && copy_to_user(useraddr, data, n_stats * sizeof(u64)))
goto out;
ret = 0;
out:
- kfree(data);
+ vfree(data);
return ret;
}
@@ -1951,17 +1954,18 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr)
return -EOPNOTSUPP;
n_stats = phy_get_sset_count(phydev);
-
if (n_stats < 0)
return n_stats;
- WARN_ON(n_stats == 0);
+ if (n_stats > S32_MAX / sizeof(u64))
+ return -ENOMEM;
+ WARN_ON_ONCE(!n_stats);
if (copy_from_user(&stats, useraddr, sizeof(stats)))
return -EFAULT;
stats.n_stats = n_stats;
- data = kmalloc_array(n_stats, sizeof(u64), GFP_USER);
- if (!data)
+ data = vzalloc(n_stats * sizeof(u64));
+ if (n_stats && !data)
return -ENOMEM;
mutex_lock(&phydev->lock);
@@ -1972,12 +1976,12 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr)
if (copy_to_user(useraddr, &stats, sizeof(stats)))
goto out;
useraddr += sizeof(stats);
- if (copy_to_user(useraddr, data, stats.n_stats * sizeof(u64)))
+ if (n_stats && copy_to_user(useraddr, data, n_stats * sizeof(u64)))
goto out;
ret = 0;
out:
- kfree(data);
+ vfree(data);
return ret;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 1969b3f118c1..0b753cbb2536 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -76,9 +76,10 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
* allow SOCK_MEMALLOC sockets to use it as this socket is
* helping free memory
*/
- if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC))
+ if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP);
return -ENOMEM;
-
+ }
err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb);
if (err)
return err;
@@ -1416,8 +1417,8 @@ static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
- .arg3_type = ARG_PTR_TO_STACK,
- .arg4_type = ARG_CONST_STACK_SIZE,
+ .arg3_type = ARG_PTR_TO_MEM,
+ .arg4_type = ARG_CONST_SIZE,
.arg5_type = ARG_ANYTHING,
};
@@ -1447,8 +1448,8 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
- .arg3_type = ARG_PTR_TO_RAW_STACK,
- .arg4_type = ARG_CONST_STACK_SIZE,
+ .arg3_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg4_type = ARG_CONST_SIZE,
};
BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len)
@@ -1522,10 +1523,11 @@ BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
{
bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
+ bool do_mforce = flags & BPF_F_MARK_ENFORCE;
__sum16 *ptr;
- if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_PSEUDO_HDR |
- BPF_F_HDR_FIELD_MASK)))
+ if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_MARK_ENFORCE |
+ BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK)))
return -EINVAL;
if (unlikely(offset > 0xffff || offset & 1))
return -EFAULT;
@@ -1533,7 +1535,7 @@ BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
return -EFAULT;
ptr = (__sum16 *)(skb->data + offset);
- if (is_mmzero && !*ptr)
+ if (is_mmzero && !do_mforce && !*ptr)
return 0;
switch (flags & BPF_F_HDR_FIELD_MASK) {
@@ -1601,10 +1603,10 @@ static const struct bpf_func_proto bpf_csum_diff_proto = {
.gpl_only = false,
.pkt_access = true,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_STACK,
- .arg2_type = ARG_CONST_STACK_SIZE_OR_ZERO,
- .arg3_type = ARG_PTR_TO_STACK,
- .arg4_type = ARG_CONST_STACK_SIZE_OR_ZERO,
+ .arg1_type = ARG_PTR_TO_MEM,
+ .arg2_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg3_type = ARG_PTR_TO_MEM,
+ .arg4_type = ARG_CONST_SIZE_OR_ZERO,
.arg5_type = ARG_ANYTHING,
};
@@ -2306,8 +2308,8 @@ static const struct bpf_func_proto bpf_skb_event_output_proto = {
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
- .arg4_type = ARG_PTR_TO_STACK,
- .arg5_type = ARG_CONST_STACK_SIZE,
+ .arg4_type = ARG_PTR_TO_MEM,
+ .arg5_type = ARG_CONST_SIZE,
};
static unsigned short bpf_tunnel_key_af(u64 flags)
@@ -2377,8 +2379,8 @@ static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
- .arg2_type = ARG_PTR_TO_RAW_STACK,
- .arg3_type = ARG_CONST_STACK_SIZE,
+ .arg2_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_ANYTHING,
};
@@ -2412,8 +2414,8 @@ static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
- .arg2_type = ARG_PTR_TO_RAW_STACK,
- .arg3_type = ARG_CONST_STACK_SIZE,
+ .arg2_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg3_type = ARG_CONST_SIZE,
};
static struct metadata_dst __percpu *md_dst;
@@ -2483,8 +2485,8 @@ static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
- .arg2_type = ARG_PTR_TO_STACK,
- .arg3_type = ARG_CONST_STACK_SIZE,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_ANYTHING,
};
@@ -2509,8 +2511,8 @@ static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
- .arg2_type = ARG_PTR_TO_STACK,
- .arg3_type = ARG_CONST_STACK_SIZE,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
};
static const struct bpf_func_proto *
@@ -2593,12 +2595,12 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = {
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
- .arg4_type = ARG_PTR_TO_STACK,
- .arg5_type = ARG_CONST_STACK_SIZE,
+ .arg4_type = ARG_PTR_TO_MEM,
+ .arg5_type = ARG_CONST_SIZE,
};
static const struct bpf_func_proto *
-sk_filter_func_proto(enum bpf_func_id func_id)
+bpf_base_func_proto(enum bpf_func_id func_id)
{
switch (func_id) {
case BPF_FUNC_map_lookup_elem:
@@ -2626,6 +2628,17 @@ sk_filter_func_proto(enum bpf_func_id func_id)
}
static const struct bpf_func_proto *
+sk_filter_func_proto(enum bpf_func_id func_id)
+{
+ switch (func_id) {
+ case BPF_FUNC_skb_load_bytes:
+ return &bpf_skb_load_bytes_proto;
+ default:
+ return bpf_base_func_proto(func_id);
+ }
+}
+
+static const struct bpf_func_proto *
tc_cls_act_func_proto(enum bpf_func_id func_id)
{
switch (func_id) {
@@ -2680,7 +2693,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_skb_under_cgroup:
return &bpf_skb_under_cgroup_proto;
default:
- return sk_filter_func_proto(func_id);
+ return bpf_base_func_proto(func_id);
}
}
@@ -2695,7 +2708,7 @@ xdp_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_xdp_adjust_head:
return &bpf_xdp_adjust_head_proto;
default:
- return sk_filter_func_proto(func_id);
+ return bpf_base_func_proto(func_id);
}
}
@@ -2706,7 +2719,7 @@ cg_skb_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_skb_load_bytes:
return &bpf_skb_load_bytes_proto;
default:
- return sk_filter_func_proto(func_id);
+ return bpf_base_func_proto(func_id);
}
}
@@ -2733,7 +2746,7 @@ lwt_inout_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_skb_under_cgroup:
return &bpf_skb_under_cgroup_proto;
default:
- return sk_filter_func_proto(func_id);
+ return bpf_base_func_proto(func_id);
}
}
@@ -2776,11 +2789,22 @@ static bool __is_valid_access(int off, int size)
{
if (off < 0 || off >= sizeof(struct __sk_buff))
return false;
+
/* The verifier guarantees that size > 0. */
if (off % size != 0)
return false;
- if (size != sizeof(__u32))
- return false;
+
+ switch (off) {
+ case offsetof(struct __sk_buff, cb[0]) ...
+ offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
+ if (off + size >
+ offsetof(struct __sk_buff, cb[4]) + sizeof(__u32))
+ return false;
+ break;
+ default:
+ if (size != sizeof(__u32))
+ return false;
+ }
return true;
}
@@ -2799,7 +2823,7 @@ static bool sk_filter_is_valid_access(int off, int size,
if (type == BPF_WRITE) {
switch (off) {
case offsetof(struct __sk_buff, cb[0]) ...
- offsetof(struct __sk_buff, cb[4]):
+ offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
break;
default:
return false;
@@ -2823,7 +2847,7 @@ static bool lwt_is_valid_access(int off, int size,
case offsetof(struct __sk_buff, mark):
case offsetof(struct __sk_buff, priority):
case offsetof(struct __sk_buff, cb[0]) ...
- offsetof(struct __sk_buff, cb[4]):
+ offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
break;
default:
return false;
@@ -2915,7 +2939,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
case offsetof(struct __sk_buff, tc_index):
case offsetof(struct __sk_buff, priority):
case offsetof(struct __sk_buff, cb[0]) ...
- offsetof(struct __sk_buff, cb[4]):
+ offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
case offsetof(struct __sk_buff, tc_classid):
break;
default:
@@ -2972,32 +2996,33 @@ void bpf_warn_invalid_xdp_action(u32 act)
}
EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
-static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
- int src_reg, int ctx_off,
- struct bpf_insn *insn_buf,
- struct bpf_prog *prog)
+static u32 bpf_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog)
{
struct bpf_insn *insn = insn_buf;
+ int off;
- switch (ctx_off) {
+ switch (si->off) {
case offsetof(struct __sk_buff, len):
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sk_buff, len));
break;
case offsetof(struct __sk_buff, protocol):
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
- *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
offsetof(struct sk_buff, protocol));
break;
case offsetof(struct __sk_buff, vlan_proto):
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2);
- *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
offsetof(struct sk_buff, vlan_proto));
break;
@@ -3005,17 +3030,17 @@ static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, priority) != 4);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
+ *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sk_buff, priority));
else
- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sk_buff, priority));
break;
case offsetof(struct __sk_buff, ingress_ifindex):
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, skb_iif) != 4);
- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sk_buff, skb_iif));
break;
@@ -3023,17 +3048,17 @@ static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
- dst_reg, src_reg,
+ si->dst_reg, si->src_reg,
offsetof(struct sk_buff, dev));
- *insn++ = BPF_JMP_IMM(BPF_JEQ, dst_reg, 0, 1);
- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, dst_reg,
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
offsetof(struct net_device, ifindex));
break;
case offsetof(struct __sk_buff, hash):
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sk_buff, hash));
break;
@@ -3041,63 +3066,77 @@ static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
+ *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sk_buff, mark));
else
- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sk_buff, mark));
break;
case offsetof(struct __sk_buff, pkt_type):
- return convert_skb_access(SKF_AD_PKTTYPE, dst_reg, src_reg, insn);
+ return convert_skb_access(SKF_AD_PKTTYPE, si->dst_reg,
+ si->src_reg, insn);
case offsetof(struct __sk_buff, queue_mapping):
- return convert_skb_access(SKF_AD_QUEUE, dst_reg, src_reg, insn);
+ return convert_skb_access(SKF_AD_QUEUE, si->dst_reg,
+ si->src_reg, insn);
case offsetof(struct __sk_buff, vlan_present):
return convert_skb_access(SKF_AD_VLAN_TAG_PRESENT,
- dst_reg, src_reg, insn);
+ si->dst_reg, si->src_reg, insn);
case offsetof(struct __sk_buff, vlan_tci):
return convert_skb_access(SKF_AD_VLAN_TAG,
- dst_reg, src_reg, insn);
+ si->dst_reg, si->src_reg, insn);
case offsetof(struct __sk_buff, cb[0]) ...
- offsetof(struct __sk_buff, cb[4]):
+ offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
+ BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
+ offsetof(struct qdisc_skb_cb, data)) %
+ sizeof(__u64));
prog->cb_access = 1;
- ctx_off -= offsetof(struct __sk_buff, cb[0]);
- ctx_off += offsetof(struct sk_buff, cb);
- ctx_off += offsetof(struct qdisc_skb_cb, data);
+ off = si->off;
+ off -= offsetof(struct __sk_buff, cb[0]);
+ off += offsetof(struct sk_buff, cb);
+ off += offsetof(struct qdisc_skb_cb, data);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
+ *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg,
+ si->src_reg, off);
else
- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
+ *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
+ si->src_reg, off);
break;
case offsetof(struct __sk_buff, tc_classid):
- ctx_off -= offsetof(struct __sk_buff, tc_classid);
- ctx_off += offsetof(struct sk_buff, cb);
- ctx_off += offsetof(struct qdisc_skb_cb, tc_classid);
+ BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, tc_classid) != 2);
+
+ off = si->off;
+ off -= offsetof(struct __sk_buff, tc_classid);
+ off += offsetof(struct sk_buff, cb);
+ off += offsetof(struct qdisc_skb_cb, tc_classid);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, ctx_off);
+ *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg,
+ si->src_reg, off);
else
- *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, ctx_off);
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg,
+ si->src_reg, off);
break;
case offsetof(struct __sk_buff, data):
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
- dst_reg, src_reg,
+ si->dst_reg, si->src_reg,
offsetof(struct sk_buff, data));
break;
case offsetof(struct __sk_buff, data_end):
- ctx_off -= offsetof(struct __sk_buff, data_end);
- ctx_off += offsetof(struct sk_buff, cb);
- ctx_off += offsetof(struct bpf_skb_data_end, data_end);
- *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), dst_reg, src_reg,
- ctx_off);
+ off = si->off;
+ off -= offsetof(struct __sk_buff, data_end);
+ off += offsetof(struct sk_buff, cb);
+ off += offsetof(struct bpf_skb_data_end, data_end);
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
+ si->src_reg, off);
break;
case offsetof(struct __sk_buff, tc_index):
@@ -3105,110 +3144,107 @@ static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg,
+ *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
offsetof(struct sk_buff, tc_index));
else
- *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
offsetof(struct sk_buff, tc_index));
- break;
#else
if (type == BPF_WRITE)
- *insn++ = BPF_MOV64_REG(dst_reg, dst_reg);
+ *insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg);
else
- *insn++ = BPF_MOV64_IMM(dst_reg, 0);
- break;
+ *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
#endif
+ break;
}
return insn - insn_buf;
}
static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
- int dst_reg, int src_reg,
- int ctx_off,
+ const struct bpf_insn *si,
struct bpf_insn *insn_buf,
struct bpf_prog *prog)
{
struct bpf_insn *insn = insn_buf;
- switch (ctx_off) {
+ switch (si->off) {
case offsetof(struct bpf_sock, bound_dev_if):
BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_bound_dev_if) != 4);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
+ *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sock, sk_bound_dev_if));
else
- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sock, sk_bound_dev_if));
break;
case offsetof(struct bpf_sock, family):
BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2);
- *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
offsetof(struct sock, sk_family));
break;
case offsetof(struct bpf_sock, type):
- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sock, __sk_flags_offset));
- *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, SK_FL_TYPE_MASK);
- *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, SK_FL_TYPE_SHIFT);
+ *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
break;
case offsetof(struct bpf_sock, protocol):
- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sock, __sk_flags_offset));
- *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, SK_FL_PROTO_MASK);
- *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, SK_FL_PROTO_SHIFT);
+ *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT);
break;
}
return insn - insn_buf;
}
-static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, int dst_reg,
- int src_reg, int ctx_off,
+static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
struct bpf_insn *insn_buf,
struct bpf_prog *prog)
{
struct bpf_insn *insn = insn_buf;
- switch (ctx_off) {
+ switch (si->off) {
case offsetof(struct __sk_buff, ifindex):
BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
- dst_reg, src_reg,
+ si->dst_reg, si->src_reg,
offsetof(struct sk_buff, dev));
- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, dst_reg,
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
offsetof(struct net_device, ifindex));
break;
default:
- return sk_filter_convert_ctx_access(type, dst_reg, src_reg,
- ctx_off, insn_buf, prog);
+ return bpf_convert_ctx_access(type, si, insn_buf, prog);
}
return insn - insn_buf;
}
-static u32 xdp_convert_ctx_access(enum bpf_access_type type, int dst_reg,
- int src_reg, int ctx_off,
+static u32 xdp_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
struct bpf_insn *insn_buf,
struct bpf_prog *prog)
{
struct bpf_insn *insn = insn_buf;
- switch (ctx_off) {
+ switch (si->off) {
case offsetof(struct xdp_md, data):
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data),
- dst_reg, src_reg,
+ si->dst_reg, si->src_reg,
offsetof(struct xdp_buff, data));
break;
case offsetof(struct xdp_md, data_end):
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end),
- dst_reg, src_reg,
+ si->dst_reg, si->src_reg,
offsetof(struct xdp_buff, data_end));
break;
}
@@ -3219,7 +3255,7 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, int dst_reg,
static const struct bpf_verifier_ops sk_filter_ops = {
.get_func_proto = sk_filter_func_proto,
.is_valid_access = sk_filter_is_valid_access,
- .convert_ctx_access = sk_filter_convert_ctx_access,
+ .convert_ctx_access = bpf_convert_ctx_access,
};
static const struct bpf_verifier_ops tc_cls_act_ops = {
@@ -3238,24 +3274,24 @@ static const struct bpf_verifier_ops xdp_ops = {
static const struct bpf_verifier_ops cg_skb_ops = {
.get_func_proto = cg_skb_func_proto,
.is_valid_access = sk_filter_is_valid_access,
- .convert_ctx_access = sk_filter_convert_ctx_access,
+ .convert_ctx_access = bpf_convert_ctx_access,
};
static const struct bpf_verifier_ops lwt_inout_ops = {
.get_func_proto = lwt_inout_func_proto,
.is_valid_access = lwt_is_valid_access,
- .convert_ctx_access = sk_filter_convert_ctx_access,
+ .convert_ctx_access = bpf_convert_ctx_access,
};
static const struct bpf_verifier_ops lwt_xmit_ops = {
.get_func_proto = lwt_xmit_func_proto,
.is_valid_access = lwt_is_valid_access,
- .convert_ctx_access = sk_filter_convert_ctx_access,
+ .convert_ctx_access = bpf_convert_ctx_access,
.gen_prologue = tc_cls_act_prologue,
};
static const struct bpf_verifier_ops cg_sock_ops = {
- .get_func_proto = sk_filter_func_proto,
+ .get_func_proto = bpf_base_func_proto,
.is_valid_access = sock_filter_is_valid_access,
.convert_ctx_access = sock_filter_convert_ctx_access,
};
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 1b7673aac59d..c35aae13c8d2 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -138,6 +138,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
struct flow_dissector_key_control *key_control;
struct flow_dissector_key_basic *key_basic;
struct flow_dissector_key_addrs *key_addrs;
+ struct flow_dissector_key_arp *key_arp;
struct flow_dissector_key_ports *key_ports;
struct flow_dissector_key_icmp *key_icmp;
struct flow_dissector_key_tags *key_tags;
@@ -379,6 +380,62 @@ mpls:
nhoff += FCOE_HEADER_LEN;
goto out_good;
+
+ case htons(ETH_P_ARP):
+ case htons(ETH_P_RARP): {
+ struct {
+ unsigned char ar_sha[ETH_ALEN];
+ unsigned char ar_sip[4];
+ unsigned char ar_tha[ETH_ALEN];
+ unsigned char ar_tip[4];
+ } *arp_eth, _arp_eth;
+ const struct arphdr *arp;
+ struct arphdr *_arp;
+
+ arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data,
+ hlen, &_arp);
+ if (!arp)
+ goto out_bad;
+
+ if (arp->ar_hrd != htons(ARPHRD_ETHER) ||
+ arp->ar_pro != htons(ETH_P_IP) ||
+ arp->ar_hln != ETH_ALEN ||
+ arp->ar_pln != 4 ||
+ (arp->ar_op != htons(ARPOP_REPLY) &&
+ arp->ar_op != htons(ARPOP_REQUEST)))
+ goto out_bad;
+
+ arp_eth = __skb_header_pointer(skb, nhoff + sizeof(_arp),
+ sizeof(_arp_eth), data,
+ hlen,
+ &_arp_eth);
+ if (!arp_eth)
+ goto out_bad;
+
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ARP)) {
+
+ key_arp = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ARP,
+ target_container);
+
+ memcpy(&key_arp->sip, arp_eth->ar_sip,
+ sizeof(key_arp->sip));
+ memcpy(&key_arp->tip, arp_eth->ar_tip,
+ sizeof(key_arp->tip));
+
+ /* Only store the lower byte of the opcode;
+ * this covers ARPOP_REPLY and ARPOP_REQUEST.
+ */
+ key_arp->op = ntohs(arp->ar_op) & 0xff;
+
+ ether_addr_copy(key_arp->sha, arp_eth->ar_sha);
+ ether_addr_copy(key_arp->tha, arp_eth->ar_tha);
+ }
+
+ goto out_good;
+ }
+
default:
goto out_bad;
}
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index b3eef90b2df9..0cfe7b0216c3 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -237,7 +237,7 @@ static const struct nla_policy bpf_nl_policy[LWT_BPF_MAX + 1] = {
[LWT_BPF_XMIT_HEADROOM] = { .type = NLA_U32 },
};
-static int bpf_build_state(struct net_device *dev, struct nlattr *nla,
+static int bpf_build_state(struct nlattr *nla,
unsigned int family, const void *cfg,
struct lwtunnel_state **ts)
{
@@ -352,7 +352,7 @@ static int bpf_encap_nlsize(struct lwtunnel_state *lwtstate)
0;
}
-int bpf_lwt_prog_cmp(struct bpf_lwt_prog *a, struct bpf_lwt_prog *b)
+static int bpf_lwt_prog_cmp(struct bpf_lwt_prog *a, struct bpf_lwt_prog *b)
{
/* FIXME:
* The LWT state is currently rebuilt for delete requests which
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index c23465005f2f..6df9f8fabf0c 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -101,7 +101,7 @@ int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops,
}
EXPORT_SYMBOL(lwtunnel_encap_del_ops);
-int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
+int lwtunnel_build_state(u16 encap_type,
struct nlattr *encap, unsigned int family,
const void *cfg, struct lwtunnel_state **lws)
{
@@ -116,7 +116,7 @@ int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
rcu_read_lock();
ops = rcu_dereference(lwtun_encaps[encap_type]);
if (likely(ops && ops->build_state && try_module_get(ops->owner))) {
- ret = ops->build_state(dev, encap, family, cfg, lws);
+ ret = ops->build_state(encap, family, cfg, lws);
if (ret)
module_put(ops->owner);
}
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 8e69ce472236..96947f5d41e4 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3439,9 +3439,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
/* skb was 'freed' by stack, so clean few
* bits and reuse it
*/
-#ifdef CONFIG_NET_CLS_ACT
- skb->tc_verd = 0; /* reset reclass/redir ttl */
-#endif
+ skb_reset_tc(skb);
} while (--burst > 0);
goto out; /* Skips xmit_mode M_START_XMIT */
} else if (pkt_dev->xmit_mode == M_QUEUE_XMIT) {
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 5d26056b6d8f..9b8727c67b58 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -34,8 +34,6 @@
* and it will increase in proportion to the memory of machine.
* Note : Dont forget somaxconn that may limit backlog too.
*/
-int sysctl_max_syn_backlog = 256;
-EXPORT_SYMBOL(sysctl_max_syn_backlog);
void reqsk_queue_alloc(struct request_sock_queue *queue)
{
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 75e3ea7bda08..adfb54b896da 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -837,8 +837,7 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
static inline int rtnl_vfinfo_size(const struct net_device *dev,
u32 ext_filter_mask)
{
- if (dev->dev.parent && dev_is_pci(dev->dev.parent) &&
- (ext_filter_mask & RTEXT_FILTER_VF)) {
+ if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF)) {
int num_vfs = dev_num_vf(dev->dev.parent);
size_t size = nla_total_size(0);
size += num_vfs *
@@ -2571,7 +2570,7 @@ replay:
return -ENODEV;
}
- if (tb[IFLA_MAP] || tb[IFLA_MASTER] || tb[IFLA_PROTINFO])
+ if (tb[IFLA_MAP] || tb[IFLA_PROTINFO])
return -EOPNOTSUPP;
if (!ops) {
@@ -2653,6 +2652,11 @@ replay:
if (err < 0)
goto out_unregister;
}
+ if (tb[IFLA_MASTER]) {
+ err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]));
+ if (err)
+ goto out_unregister;
+ }
out:
if (link_net)
put_net(link_net);
@@ -3829,6 +3833,39 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
*idxattr = 0;
}
+ if (stats_attr_valid(filter_mask, IFLA_STATS_AF_SPEC, *idxattr)) {
+ struct rtnl_af_ops *af_ops;
+
+ *idxattr = IFLA_STATS_AF_SPEC;
+ attr = nla_nest_start(skb, IFLA_STATS_AF_SPEC);
+ if (!attr)
+ goto nla_put_failure;
+
+ list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+ if (af_ops->fill_stats_af) {
+ struct nlattr *af;
+ int err;
+
+ af = nla_nest_start(skb, af_ops->family);
+ if (!af)
+ goto nla_put_failure;
+
+ err = af_ops->fill_stats_af(skb, dev);
+
+ if (err == -ENODATA)
+ nla_nest_cancel(skb, af);
+ else if (err < 0)
+ goto nla_put_failure;
+
+ nla_nest_end(skb, af);
+ }
+ }
+
+ nla_nest_end(skb, attr);
+
+ *idxattr = 0;
+ }
+
nlmsg_end(skb, nlh);
return 0;
@@ -3885,6 +3922,23 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, 0))
size += rtnl_get_offload_stats_size(dev);
+ if (stats_attr_valid(filter_mask, IFLA_STATS_AF_SPEC, 0)) {
+ struct rtnl_af_ops *af_ops;
+
+ /* for IFLA_STATS_AF_SPEC */
+ size += nla_total_size(0);
+
+ list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+ if (af_ops->get_stats_af_size) {
+ size += nla_total_size(
+ af_ops->get_stats_af_size(dev));
+
+ /* for AF_* */
+ size += nla_total_size(0);
+ }
+ }
+ }
+
return size;
}
diff --git a/net/core/scm.c b/net/core/scm.c
index d8820438ba37..b6d83686e149 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -71,7 +71,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
struct file **fpp;
int i, num;
- num = (cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)))/sizeof(int);
+ num = (cmsg->cmsg_len - sizeof(struct cmsghdr))/sizeof(int);
if (num <= 0)
return 0;
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 88a8e429fc3e..758f140b6bed 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -1,3 +1,7 @@
+/*
+ * Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/cryptohash.h>
@@ -8,18 +12,18 @@
#include <linux/ktime.h>
#include <linux/string.h>
#include <linux/net.h>
-
+#include <linux/siphash.h>
#include <net/secure_seq.h>
#if IS_ENABLED(CONFIG_IPV6) || IS_ENABLED(CONFIG_INET)
+#include <linux/in6.h>
#include <net/tcp.h>
-#define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4)
-static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned;
+static siphash_key_t net_secret __read_mostly;
static __always_inline void net_secret_init(void)
{
- net_get_random_once(net_secret, sizeof(net_secret));
+ net_get_random_once(&net_secret, sizeof(net_secret));
}
#endif
@@ -44,80 +48,70 @@ static u32 seq_scale(u32 seq)
u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
__be16 sport, __be16 dport, u32 *tsoff)
{
- u32 secret[MD5_MESSAGE_BYTES / 4];
- u32 hash[MD5_DIGEST_WORDS];
- u32 i;
-
+ const struct {
+ struct in6_addr saddr;
+ struct in6_addr daddr;
+ __be16 sport;
+ __be16 dport;
+ } __aligned(SIPHASH_ALIGNMENT) combined = {
+ .saddr = *(struct in6_addr *)saddr,
+ .daddr = *(struct in6_addr *)daddr,
+ .sport = sport,
+ .dport = dport
+ };
+ u64 hash;
net_secret_init();
- memcpy(hash, saddr, 16);
- for (i = 0; i < 4; i++)
- secret[i] = net_secret[i] + (__force u32)daddr[i];
- secret[4] = net_secret[4] +
- (((__force u16)sport << 16) + (__force u16)dport);
- for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++)
- secret[i] = net_secret[i];
-
- md5_transform(hash, secret);
-
- *tsoff = sysctl_tcp_timestamps == 1 ? hash[1] : 0;
- return seq_scale(hash[0]);
+ hash = siphash(&combined, offsetofend(typeof(combined), dport),
+ &net_secret);
+ *tsoff = sysctl_tcp_timestamps == 1 ? (hash >> 32) : 0;
+ return seq_scale(hash);
}
EXPORT_SYMBOL(secure_tcpv6_sequence_number);
u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
__be16 dport)
{
- u32 secret[MD5_MESSAGE_BYTES / 4];
- u32 hash[MD5_DIGEST_WORDS];
- u32 i;
-
+ const struct {
+ struct in6_addr saddr;
+ struct in6_addr daddr;
+ __be16 dport;
+ } __aligned(SIPHASH_ALIGNMENT) combined = {
+ .saddr = *(struct in6_addr *)saddr,
+ .daddr = *(struct in6_addr *)daddr,
+ .dport = dport
+ };
net_secret_init();
- memcpy(hash, saddr, 16);
- for (i = 0; i < 4; i++)
- secret[i] = net_secret[i] + (__force u32) daddr[i];
- secret[4] = net_secret[4] + (__force u32)dport;
- for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++)
- secret[i] = net_secret[i];
-
- md5_transform(hash, secret);
-
- return hash[0];
+ return siphash(&combined, offsetofend(typeof(combined), dport),
+ &net_secret);
}
EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
#endif
#ifdef CONFIG_INET
+/* secure_tcp_sequence_number(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d),
+ * but fortunately, `sport' cannot be 0 in any circumstances. If this changes,
+ * it would be easy enough to have the former function use siphash_4u32, passing
+ * the arguments as separate u32.
+ */
+
u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
__be16 sport, __be16 dport, u32 *tsoff)
{
- u32 hash[MD5_DIGEST_WORDS];
-
+ u64 hash;
net_secret_init();
- hash[0] = (__force u32)saddr;
- hash[1] = (__force u32)daddr;
- hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
- hash[3] = net_secret[15];
-
- md5_transform(hash, net_secret);
-
- *tsoff = sysctl_tcp_timestamps == 1 ? hash[1] : 0;
- return seq_scale(hash[0]);
+ hash = siphash_3u32((__force u32)saddr, (__force u32)daddr,
+ (__force u32)sport << 16 | (__force u32)dport,
+ &net_secret);
+ *tsoff = sysctl_tcp_timestamps == 1 ? (hash >> 32) : 0;
+ return seq_scale(hash);
}
u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
{
- u32 hash[MD5_DIGEST_WORDS];
-
net_secret_init();
- hash[0] = (__force u32)saddr;
- hash[1] = (__force u32)daddr;
- hash[2] = (__force u32)dport ^ net_secret[14];
- hash[3] = net_secret[15];
-
- md5_transform(hash, net_secret);
-
- return hash[0];
+ return siphash_3u32((__force u32)saddr, (__force u32)daddr,
+ (__force u16)dport, &net_secret);
}
EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral);
#endif
@@ -126,21 +120,13 @@ EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral);
u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
__be16 sport, __be16 dport)
{
- u32 hash[MD5_DIGEST_WORDS];
u64 seq;
-
net_secret_init();
- hash[0] = (__force u32)saddr;
- hash[1] = (__force u32)daddr;
- hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
- hash[3] = net_secret[15];
-
- md5_transform(hash, net_secret);
-
- seq = hash[0] | (((u64)hash[1]) << 32);
+ seq = siphash_3u32((__force u32)saddr, (__force u32)daddr,
+ (__force u32)sport << 16 | (__force u32)dport,
+ &net_secret);
seq += ktime_get_real_ns();
seq &= (1ull << 48) - 1;
-
return seq;
}
EXPORT_SYMBOL(secure_dccp_sequence_number);
@@ -149,26 +135,23 @@ EXPORT_SYMBOL(secure_dccp_sequence_number);
u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
__be16 sport, __be16 dport)
{
- u32 secret[MD5_MESSAGE_BYTES / 4];
- u32 hash[MD5_DIGEST_WORDS];
+ const struct {
+ struct in6_addr saddr;
+ struct in6_addr daddr;
+ __be16 sport;
+ __be16 dport;
+ } __aligned(SIPHASH_ALIGNMENT) combined = {
+ .saddr = *(struct in6_addr *)saddr,
+ .daddr = *(struct in6_addr *)daddr,
+ .sport = sport,
+ .dport = dport
+ };
u64 seq;
- u32 i;
-
net_secret_init();
- memcpy(hash, saddr, 16);
- for (i = 0; i < 4; i++)
- secret[i] = net_secret[i] + (__force u32)daddr[i];
- secret[4] = net_secret[4] +
- (((__force u16)sport << 16) + (__force u16)dport);
- for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++)
- secret[i] = net_secret[i];
-
- md5_transform(hash, secret);
-
- seq = hash[0] | (((u64)hash[1]) << 32);
+ seq = siphash(&combined, offsetofend(typeof(combined), dport),
+ &net_secret);
seq += ktime_get_real_ns();
seq &= (1ull << 48) - 1;
-
return seq;
}
EXPORT_SYMBOL(secure_dccpv6_sequence_number);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 734c71468b01..f3557958e9bf 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -271,7 +271,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
atomic_set(&fclones->fclone_ref, 1);
fclones->skb2.fclone = SKB_FCLONE_CLONE;
- fclones->skb2.pfmemalloc = pfmemalloc;
}
out:
return skb;
@@ -655,7 +654,7 @@ static void skb_release_head_state(struct sk_buff *skb)
skb->destructor(skb);
}
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
- nf_conntrack_put(skb->nfct);
+ nf_conntrack_put(skb_nfct(skb));
#endif
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
nf_bridge_put(skb->nf_bridge);
@@ -878,9 +877,6 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
#endif
#ifdef CONFIG_NET_SCHED
CHECK_SKB_FIELD(tc_index);
-#ifdef CONFIG_NET_CLS_ACT
- CHECK_SKB_FIELD(tc_verd);
-#endif
#endif
}
@@ -1195,10 +1191,10 @@ EXPORT_SYMBOL(__pskb_copy_fclone);
int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
gfp_t gfp_mask)
{
- int i;
- u8 *data;
- int size = nhead + skb_end_offset(skb) + ntail;
+ int i, osize = skb_end_offset(skb);
+ int size = osize + nhead + ntail;
long off;
+ u8 *data;
BUG_ON(nhead < 0);
@@ -1260,6 +1256,14 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
skb->hdr_len = 0;
skb->nohdr = 0;
atomic_set(&skb_shinfo(skb)->dataref, 1);
+
+ /* It is not generally safe to change skb->truesize.
+ * For the moment, we really care of rx path, or
+ * when skb is orphaned (not attached to a socket).
+ */
+ if (!skb->sk || skb->destructor == sock_edemux)
+ skb->truesize += size - osize;
+
return 0;
nofrags:
diff --git a/net/core/sock.c b/net/core/sock.c
index 4eca27dc5c94..b74356535559 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -222,7 +222,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = {
"sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
"sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" ,
"sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_KCM" ,
- "sk_lock-AF_QIPCRTR", "sk_lock-AF_MAX"
+ "sk_lock-AF_QIPCRTR", "sk_lock-AF_SMC" , "sk_lock-AF_MAX"
};
static const char *const af_family_slock_key_strings[AF_MAX+1] = {
"slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
@@ -239,7 +239,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = {
"slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
"slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
"slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_KCM" ,
- "slock-AF_QIPCRTR", "slock-AF_MAX"
+ "slock-AF_QIPCRTR", "slock-AF_SMC" , "slock-AF_MAX"
};
static const char *const af_family_clock_key_strings[AF_MAX+1] = {
"clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
@@ -256,7 +256,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = {
"clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
"clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" ,
"clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_KCM" ,
- "clock-AF_QIPCRTR", "clock-AF_MAX"
+ "clock-AF_QIPCRTR", "clock-AF_SMC" , "clock-AF_MAX"
};
/*
@@ -502,6 +502,7 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
sk_tx_queue_clear(sk);
+ sk->sk_dst_pending_confirm = 0;
RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
dst_release(dst);
return NULL;
@@ -762,11 +763,8 @@ set_rcvbuf:
goto set_rcvbuf;
case SO_KEEPALIVE:
-#ifdef CONFIG_INET
- if (sk->sk_protocol == IPPROTO_TCP &&
- sk->sk_type == SOCK_STREAM)
- tcp_set_keepalive(sk, valbool);
-#endif
+ if (sk->sk_prot->keepalive)
+ sk->sk_prot->keepalive(sk, valbool);
sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
break;
@@ -1522,6 +1520,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
af_family_clock_key_strings[newsk->sk_family]);
newsk->sk_dst_cache = NULL;
+ newsk->sk_dst_pending_confirm = 0;
newsk->sk_wmem_queued = 0;
newsk->sk_forward_alloc = 0;
atomic_set(&newsk->sk_drops, 0);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 2a46e4009f62..eaa72eb0399c 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -222,6 +222,21 @@ static int set_default_qdisc(struct ctl_table *table, int write,
}
#endif
+static int proc_do_dev_weight(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int ret;
+
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
+ if (ret != 0)
+ return ret;
+
+ dev_rx_weight = weight_p * dev_weight_rx_bias;
+ dev_tx_weight = weight_p * dev_weight_tx_bias;
+
+ return ret;
+}
+
static int proc_do_rss_key(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
@@ -273,7 +288,21 @@ static struct ctl_table net_core_table[] = {
.data = &weight_p,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_do_dev_weight,
+ },
+ {
+ .procname = "dev_weight_rx_bias",
+ .data = &dev_weight_rx_bias,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_do_dev_weight,
+ },
+ {
+ .procname = "dev_weight_tx_bias",
+ .data = &dev_weight_tx_bias,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_do_dev_weight,
},
{
.procname = "netdev_max_backlog",