summaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c131
-rw-r--r--net/core/ethtool.c2
-rw-r--r--net/core/filter.c139
-rw-r--r--net/core/flow.c4
-rw-r--r--net/core/neighbour.c4
-rw-r--r--net/core/net-sysfs.c17
-rw-r--r--net/core/net_namespace.c23
-rw-r--r--net/core/netprio_cgroup.c2
-rw-r--r--net/core/pktgen.c47
-rw-r--r--net/core/rtnetlink.c226
-rw-r--r--net/core/scm.c6
-rw-r--r--net/core/skbuff.c24
-rw-r--r--net/core/sock.c24
-rw-r--r--net/core/sysctl_net_core.c5
14 files changed, 558 insertions, 96 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index c0946cb2b354..7304ea8a1f13 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -176,8 +176,10 @@
#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
static DEFINE_SPINLOCK(ptype_lock);
+static DEFINE_SPINLOCK(offload_lock);
static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
static struct list_head ptype_all __read_mostly; /* Taps */
+static struct list_head offload_base __read_mostly;
/*
* The @dev_base_head list is protected by @dev_base_lock and the rtnl
@@ -470,6 +472,82 @@ void dev_remove_pack(struct packet_type *pt)
}
EXPORT_SYMBOL(dev_remove_pack);
+
+/**
+ * dev_add_offload - register offload handlers
+ * @po: protocol offload declaration
+ *
+ * Add protocol offload handlers to the networking stack. The passed
+ * &proto_offload is linked into kernel lists and may not be freed until
+ * it has been removed from the kernel lists.
+ *
+ * This call does not sleep therefore it can not
+ * guarantee all CPU's that are in middle of receiving packets
+ * will see the new offload handlers (until the next received packet).
+ */
+void dev_add_offload(struct packet_offload *po)
+{
+ struct list_head *head = &offload_base;
+
+ spin_lock(&offload_lock);
+ list_add_rcu(&po->list, head);
+ spin_unlock(&offload_lock);
+}
+EXPORT_SYMBOL(dev_add_offload);
+
+/**
+ * __dev_remove_offload - remove offload handler
+ * @po: packet offload declaration
+ *
+ * Remove a protocol offload handler that was previously added to the
+ * kernel offload handlers by dev_add_offload(). The passed &offload_type
+ * is removed from the kernel lists and can be freed or reused once this
+ * function returns.
+ *
+ * The packet type might still be in use by receivers
+ * and must not be freed until after all the CPU's have gone
+ * through a quiescent state.
+ */
+void __dev_remove_offload(struct packet_offload *po)
+{
+ struct list_head *head = &offload_base;
+ struct packet_offload *po1;
+
+ spin_lock(&offload_lock);
+
+ list_for_each_entry(po1, head, list) {
+ if (po == po1) {
+ list_del_rcu(&po->list);
+ goto out;
+ }
+ }
+
+ pr_warn("dev_remove_offload: %p not found\n", po);
+out:
+ spin_unlock(&offload_lock);
+}
+EXPORT_SYMBOL(__dev_remove_offload);
+
+/**
+ * dev_remove_offload - remove packet offload handler
+ * @po: packet offload declaration
+ *
+ * Remove a packet offload handler that was previously added to the kernel
+ * offload handlers by dev_add_offload(). The passed &offload_type is
+ * removed from the kernel lists and can be freed or reused once this
+ * function returns.
+ *
+ * This call sleeps to guarantee that no CPU is looking at the packet
+ * type after return.
+ */
+void dev_remove_offload(struct packet_offload *po)
+{
+ __dev_remove_offload(po);
+
+ synchronize_net();
+}
+EXPORT_SYMBOL(dev_remove_offload);
+
/******************************************************************************
Device Boot-time Settings Routines
@@ -1075,10 +1153,8 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
return -EINVAL;
if (!len) {
- if (dev->ifalias) {
- kfree(dev->ifalias);
- dev->ifalias = NULL;
- }
+ kfree(dev->ifalias);
+ dev->ifalias = NULL;
return 0;
}
@@ -1994,7 +2070,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
- struct packet_type *ptype;
+ struct packet_offload *ptype;
__be16 type = skb->protocol;
int vlan_depth = ETH_HLEN;
int err;
@@ -2023,18 +2099,17 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
}
rcu_read_lock();
- list_for_each_entry_rcu(ptype,
- &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
- if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
+ list_for_each_entry_rcu(ptype, &offload_base, list) {
+ if (ptype->type == type && ptype->callbacks.gso_segment) {
if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
- err = ptype->gso_send_check(skb);
+ err = ptype->callbacks.gso_send_check(skb);
segs = ERR_PTR(err);
if (err || skb_gso_ok(skb, features))
break;
__skb_push(skb, (skb->data -
skb_network_header(skb)));
}
- segs = ptype->gso_segment(skb, features);
+ segs = ptype->callbacks.gso_segment(skb, features);
break;
}
}
@@ -3446,9 +3521,9 @@ static void flush_backlog(void *arg)
static int napi_gro_complete(struct sk_buff *skb)
{
- struct packet_type *ptype;
+ struct packet_offload *ptype;
__be16 type = skb->protocol;
- struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+ struct list_head *head = &offload_base;
int err = -ENOENT;
if (NAPI_GRO_CB(skb)->count == 1) {
@@ -3458,10 +3533,10 @@ static int napi_gro_complete(struct sk_buff *skb)
rcu_read_lock();
list_for_each_entry_rcu(ptype, head, list) {
- if (ptype->type != type || ptype->dev || !ptype->gro_complete)
+ if (ptype->type != type || !ptype->callbacks.gro_complete)
continue;
- err = ptype->gro_complete(skb);
+ err = ptype->callbacks.gro_complete(skb);
break;
}
rcu_read_unlock();
@@ -3508,9 +3583,9 @@ EXPORT_SYMBOL(napi_gro_flush);
enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
struct sk_buff **pp = NULL;
- struct packet_type *ptype;
+ struct packet_offload *ptype;
__be16 type = skb->protocol;
- struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+ struct list_head *head = &offload_base;
int same_flow;
int mac_len;
enum gro_result ret;
@@ -3523,7 +3598,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
rcu_read_lock();
list_for_each_entry_rcu(ptype, head, list) {
- if (ptype->type != type || ptype->dev || !ptype->gro_receive)
+ if (ptype->type != type || !ptype->callbacks.gro_receive)
continue;
skb_set_network_header(skb, skb_gro_offset(skb));
@@ -3533,7 +3608,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
NAPI_GRO_CB(skb)->flush = 0;
NAPI_GRO_CB(skb)->free = 0;
- pp = ptype->gro_receive(&napi->gro_list, skb);
+ pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
break;
}
rcu_read_unlock();
@@ -5202,7 +5277,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
case SIOCGMIIPHY:
case SIOCGMIIREG:
case SIOCSIFNAME:
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
dev_load(net, ifr.ifr_name);
rtnl_lock();
@@ -5223,16 +5298,25 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
* - require strict serialization.
* - do not return a value
*/
+ case SIOCSIFMAP:
+ case SIOCSIFTXQLEN:
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ /* fall through */
+ /*
+ * These ioctl calls:
+ * - require local superuser power.
+ * - require strict serialization.
+ * - do not return a value
+ */
case SIOCSIFFLAGS:
case SIOCSIFMETRIC:
case SIOCSIFMTU:
- case SIOCSIFMAP:
case SIOCSIFHWADDR:
case SIOCSIFSLAVE:
case SIOCADDMULTI:
case SIOCDELMULTI:
case SIOCSIFHWBROADCAST:
- case SIOCSIFTXQLEN:
case SIOCSMIIREG:
case SIOCBONDENSLAVE:
case SIOCBONDRELEASE:
@@ -5241,7 +5325,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
case SIOCBRADDIF:
case SIOCBRDELIF:
case SIOCSHWTSTAMP:
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
/* fall through */
case SIOCBONDSLAVEINFOQUERY:
@@ -6266,7 +6350,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
goto out;
/* Ensure the device has been registrered */
- err = -EINVAL;
if (dev->reg_state != NETREG_REGISTERED)
goto out;
@@ -6664,6 +6747,8 @@ static int __init net_dev_init(void)
for (i = 0; i < PTYPE_HASH_SIZE; i++)
INIT_LIST_HEAD(&ptype_base[i]);
+ INIT_LIST_HEAD(&offload_base);
+
if (register_pernet_subsys(&netdev_net_ops))
goto out;
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 4d64cc2e3fa9..a8705432e4b1 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1460,7 +1460,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_GEEE:
break;
default:
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 3d92ebb7fbcf..c23543cba132 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -39,6 +39,7 @@
#include <linux/reciprocal_div.h>
#include <linux/ratelimit.h>
#include <linux/seccomp.h>
+#include <linux/if_vlan.h>
/* No hurry in this branch
*
@@ -341,6 +342,12 @@ load_b:
case BPF_S_ANC_CPU:
A = raw_smp_processor_id();
continue;
+ case BPF_S_ANC_VLAN_TAG:
+ A = vlan_tx_tag_get(skb);
+ continue;
+ case BPF_S_ANC_VLAN_TAG_PRESENT:
+ A = !!vlan_tx_tag_present(skb);
+ continue;
case BPF_S_ANC_NLATTR: {
struct nlattr *nla;
@@ -600,6 +607,8 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
ANCILLARY(RXHASH);
ANCILLARY(CPU);
ANCILLARY(ALU_XOR_X);
+ ANCILLARY(VLAN_TAG);
+ ANCILLARY(VLAN_TAG_PRESENT);
}
}
ftest->code = code;
@@ -751,3 +760,133 @@ int sk_detach_filter(struct sock *sk)
return ret;
}
EXPORT_SYMBOL_GPL(sk_detach_filter);
+
+static void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
+{
+ static const u16 decodes[] = {
+ [BPF_S_ALU_ADD_K] = BPF_ALU|BPF_ADD|BPF_K,
+ [BPF_S_ALU_ADD_X] = BPF_ALU|BPF_ADD|BPF_X,
+ [BPF_S_ALU_SUB_K] = BPF_ALU|BPF_SUB|BPF_K,
+ [BPF_S_ALU_SUB_X] = BPF_ALU|BPF_SUB|BPF_X,
+ [BPF_S_ALU_MUL_K] = BPF_ALU|BPF_MUL|BPF_K,
+ [BPF_S_ALU_MUL_X] = BPF_ALU|BPF_MUL|BPF_X,
+ [BPF_S_ALU_DIV_X] = BPF_ALU|BPF_DIV|BPF_X,
+ [BPF_S_ALU_MOD_K] = BPF_ALU|BPF_MOD|BPF_K,
+ [BPF_S_ALU_MOD_X] = BPF_ALU|BPF_MOD|BPF_X,
+ [BPF_S_ALU_AND_K] = BPF_ALU|BPF_AND|BPF_K,
+ [BPF_S_ALU_AND_X] = BPF_ALU|BPF_AND|BPF_X,
+ [BPF_S_ALU_OR_K] = BPF_ALU|BPF_OR|BPF_K,
+ [BPF_S_ALU_OR_X] = BPF_ALU|BPF_OR|BPF_X,
+ [BPF_S_ALU_XOR_K] = BPF_ALU|BPF_XOR|BPF_K,
+ [BPF_S_ALU_XOR_X] = BPF_ALU|BPF_XOR|BPF_X,
+ [BPF_S_ALU_LSH_K] = BPF_ALU|BPF_LSH|BPF_K,
+ [BPF_S_ALU_LSH_X] = BPF_ALU|BPF_LSH|BPF_X,
+ [BPF_S_ALU_RSH_K] = BPF_ALU|BPF_RSH|BPF_K,
+ [BPF_S_ALU_RSH_X] = BPF_ALU|BPF_RSH|BPF_X,
+ [BPF_S_ALU_NEG] = BPF_ALU|BPF_NEG,
+ [BPF_S_LD_W_ABS] = BPF_LD|BPF_W|BPF_ABS,
+ [BPF_S_LD_H_ABS] = BPF_LD|BPF_H|BPF_ABS,
+ [BPF_S_LD_B_ABS] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_ANC_PROTOCOL] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_ANC_PKTTYPE] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_ANC_IFINDEX] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_ANC_NLATTR] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_ANC_NLATTR_NEST] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_ANC_MARK] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_ANC_QUEUE] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_ANC_HATYPE] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN,
+ [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND,
+ [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND,
+ [BPF_S_LD_B_IND] = BPF_LD|BPF_B|BPF_IND,
+ [BPF_S_LD_IMM] = BPF_LD|BPF_IMM,
+ [BPF_S_LDX_W_LEN] = BPF_LDX|BPF_W|BPF_LEN,
+ [BPF_S_LDX_B_MSH] = BPF_LDX|BPF_B|BPF_MSH,
+ [BPF_S_LDX_IMM] = BPF_LDX|BPF_IMM,
+ [BPF_S_MISC_TAX] = BPF_MISC|BPF_TAX,
+ [BPF_S_MISC_TXA] = BPF_MISC|BPF_TXA,
+ [BPF_S_RET_K] = BPF_RET|BPF_K,
+ [BPF_S_RET_A] = BPF_RET|BPF_A,
+ [BPF_S_ALU_DIV_K] = BPF_ALU|BPF_DIV|BPF_K,
+ [BPF_S_LD_MEM] = BPF_LD|BPF_MEM,
+ [BPF_S_LDX_MEM] = BPF_LDX|BPF_MEM,
+ [BPF_S_ST] = BPF_ST,
+ [BPF_S_STX] = BPF_STX,
+ [BPF_S_JMP_JA] = BPF_JMP|BPF_JA,
+ [BPF_S_JMP_JEQ_K] = BPF_JMP|BPF_JEQ|BPF_K,
+ [BPF_S_JMP_JEQ_X] = BPF_JMP|BPF_JEQ|BPF_X,
+ [BPF_S_JMP_JGE_K] = BPF_JMP|BPF_JGE|BPF_K,
+ [BPF_S_JMP_JGE_X] = BPF_JMP|BPF_JGE|BPF_X,
+ [BPF_S_JMP_JGT_K] = BPF_JMP|BPF_JGT|BPF_K,
+ [BPF_S_JMP_JGT_X] = BPF_JMP|BPF_JGT|BPF_X,
+ [BPF_S_JMP_JSET_K] = BPF_JMP|BPF_JSET|BPF_K,
+ [BPF_S_JMP_JSET_X] = BPF_JMP|BPF_JSET|BPF_X,
+ };
+ u16 code;
+
+ code = filt->code;
+
+ to->code = decodes[code];
+ to->jt = filt->jt;
+ to->jf = filt->jf;
+
+ if (code == BPF_S_ALU_DIV_K) {
+ /*
+ * When loaded this rule user gave us X, which was
+ * translated into R = r(X). Now we calculate the
+ * RR = r(R) and report it back. If next time this
+ * value is loaded and RRR = r(RR) is calculated
+ * then the R == RRR will be true.
+ *
+ * One exception. X == 1 translates into R == 0 and
+ * we can't calculate RR out of it with r().
+ */
+
+ if (filt->k == 0)
+ to->k = 1;
+ else
+ to->k = reciprocal_value(filt->k);
+
+ BUG_ON(reciprocal_value(to->k) != filt->k);
+ } else
+ to->k = filt->k;
+}
+
+int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len)
+{
+ struct sk_filter *filter;
+ int i, ret;
+
+ lock_sock(sk);
+ filter = rcu_dereference_protected(sk->sk_filter,
+ sock_owned_by_user(sk));
+ ret = 0;
+ if (!filter)
+ goto out;
+ ret = filter->len;
+ if (!len)
+ goto out;
+ ret = -EINVAL;
+ if (len < filter->len)
+ goto out;
+
+ ret = -EFAULT;
+ for (i = 0; i < filter->len; i++) {
+ struct sock_filter fb;
+
+ sk_decode_filter(&filter->insns[i], &fb);
+ if (copy_to_user(&ubuf[i], &fb, sizeof(fb)))
+ goto out;
+ }
+
+ ret = filter->len;
+out:
+ release_sock(sk);
+ return ret;
+}
diff --git a/net/core/flow.c b/net/core/flow.c
index e318c7e98042..b0901ee5a002 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -327,11 +327,9 @@ static void flow_cache_flush_tasklet(unsigned long data)
static void flow_cache_flush_per_cpu(void *data)
{
struct flow_flush_info *info = data;
- int cpu;
struct tasklet_struct *tasklet;
- cpu = smp_processor_id();
- tasklet = &per_cpu_ptr(info->cache->percpu, cpu)->flush_tasklet;
+ tasklet = this_cpu_ptr(&info->cache->percpu->flush_tasklet);
tasklet->data = (unsigned long)info;
tasklet_schedule(tasklet);
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 22571488730a..f1c0c2e9cad5 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2987,6 +2987,10 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
}
+ /* Don't export sysctls to unprivileged users */
+ if (neigh_parms_net(p)->user_ns != &init_user_ns)
+ t->neigh_vars[0].procname = NULL;
+
snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
p_name, dev_name_source);
t->sysctl_header =
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 017a8bacfb27..334efd5d67a9 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -18,11 +18,9 @@
#include <net/sock.h>
#include <net/net_namespace.h>
#include <linux/rtnetlink.h>
-#include <linux/wireless.h>
#include <linux/vmalloc.h>
#include <linux/export.h>
#include <linux/jiffies.h>
-#include <net/wext.h>
#include "net-sysfs.h"
@@ -73,11 +71,12 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t len,
int (*set)(struct net_device *, unsigned long))
{
- struct net_device *net = to_net_dev(dev);
+ struct net_device *netdev = to_net_dev(dev);
+ struct net *net = dev_net(netdev);
unsigned long new;
int ret = -EINVAL;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
ret = kstrtoul(buf, 0, &new);
@@ -87,8 +86,8 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
if (!rtnl_trylock())
return restart_syscall();
- if (dev_isalive(net)) {
- if ((ret = (*set)(net, new)) == 0)
+ if (dev_isalive(netdev)) {
+ if ((ret = (*set)(netdev, new)) == 0)
ret = len;
}
rtnl_unlock();
@@ -264,6 +263,9 @@ static ssize_t store_tx_queue_len(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t len)
{
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
return netdev_store(dev, attr, buf, len, change_tx_queue_len);
}
@@ -271,10 +273,11 @@ static ssize_t store_ifalias(struct device *dev, struct device_attribute *attr,
const char *buf, size_t len)
{
struct net_device *netdev = to_net_dev(dev);
+ struct net *net = dev_net(netdev);
size_t count = len;
ssize_t ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
/* ignore trailing newline */
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 42f1e1c7514f..6456439cbbd9 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -13,6 +13,7 @@
#include <linux/proc_fs.h>
#include <linux/file.h>
#include <linux/export.h>
+#include <linux/user_namespace.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
@@ -145,7 +146,7 @@ static void ops_free_list(const struct pernet_operations *ops,
/*
* setup_net runs the initializers for the network namespace object.
*/
-static __net_init int setup_net(struct net *net)
+static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
{
/* Must be called with net_mutex held */
const struct pernet_operations *ops, *saved_ops;
@@ -155,6 +156,7 @@ static __net_init int setup_net(struct net *net)
atomic_set(&net->count, 1);
atomic_set(&net->passive, 1);
net->dev_base_seq = 1;
+ net->user_ns = user_ns;
#ifdef NETNS_REFCNT_DEBUG
atomic_set(&net->use_count, 0);
@@ -232,7 +234,8 @@ void net_drop_ns(void *p)
net_free(ns);
}
-struct net *copy_net_ns(unsigned long flags, struct net *old_net)
+struct net *copy_net_ns(unsigned long flags,
+ struct user_namespace *user_ns, struct net *old_net)
{
struct net *net;
int rv;
@@ -243,8 +246,11 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
net = net_alloc();
if (!net)
return ERR_PTR(-ENOMEM);
+
+ get_user_ns(user_ns);
+
mutex_lock(&net_mutex);
- rv = setup_net(net);
+ rv = setup_net(net, user_ns);
if (rv == 0) {
rtnl_lock();
list_add_tail_rcu(&net->list, &net_namespace_list);
@@ -252,6 +258,7 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
}
mutex_unlock(&net_mutex);
if (rv < 0) {
+ put_user_ns(user_ns);
net_drop_ns(net);
return ERR_PTR(rv);
}
@@ -308,6 +315,7 @@ static void cleanup_net(struct work_struct *work)
/* Finally it is safe to free my network namespace structure */
list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
list_del_init(&net->exit_list);
+ put_user_ns(net->user_ns);
net_drop_ns(net);
}
}
@@ -347,13 +355,6 @@ struct net *get_net_ns_by_fd(int fd)
}
#else
-struct net *copy_net_ns(unsigned long flags, struct net *old_net)
-{
- if (flags & CLONE_NEWNET)
- return ERR_PTR(-EINVAL);
- return old_net;
-}
-
struct net *get_net_ns_by_fd(int fd)
{
return ERR_PTR(-EINVAL);
@@ -402,7 +403,7 @@ static int __init net_ns_init(void)
rcu_assign_pointer(init_net.gen, ng);
mutex_lock(&net_mutex);
- if (setup_net(&init_net))
+ if (setup_net(&init_net, &init_user_ns))
panic("Could not setup the initial network namespace");
rtnl_lock();
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 79285a36035f..847c02b197b0 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -248,7 +248,7 @@ static int update_netprio(const void *v, struct file *file, unsigned n)
return 0;
}
-void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+static void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
{
struct task_struct *p;
void *v;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index d1dc14c2aac4..b29dacf900f9 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -419,20 +419,6 @@ struct pktgen_thread {
#define REMOVE 1
#define FIND 0
-static inline ktime_t ktime_now(void)
-{
- struct timespec ts;
- ktime_get_ts(&ts);
-
- return timespec_to_ktime(ts);
-}
-
-/* This works even if 32 bit because of careful byte order choice */
-static inline int ktime_lt(const ktime_t cmp1, const ktime_t cmp2)
-{
- return cmp1.tv64 < cmp2.tv64;
-}
-
static const char version[] =
"Packet Generator for packet performance testing. "
"Version: " VERSION "\n";
@@ -675,7 +661,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
seq_puts(seq, "\n");
/* not really stopped, more like last-running-at */
- stopped = pkt_dev->running ? ktime_now() : pkt_dev->stopped_at;
+ stopped = pkt_dev->running ? ktime_get() : pkt_dev->stopped_at;
idle = pkt_dev->idle_acc;
do_div(idle, NSEC_PER_USEC);
@@ -2141,12 +2127,12 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
return;
}
- start_time = ktime_now();
+ start_time = ktime_get();
if (remaining < 100000) {
/* for small delays (<100us), just loop until limit is reached */
do {
- end_time = ktime_now();
- } while (ktime_lt(end_time, spin_until));
+ end_time = ktime_get();
+ } while (ktime_compare(end_time, spin_until) < 0);
} else {
/* see do_nanosleep */
hrtimer_init_sleeper(&t, current);
@@ -2162,7 +2148,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
hrtimer_cancel(&t.timer);
} while (t.task && pkt_dev->running && !signal_pending(current));
__set_current_state(TASK_RUNNING);
- end_time = ktime_now();
+ end_time = ktime_get();
}
pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time));
@@ -2427,11 +2413,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
}
} else { /* IPV6 * */
- if (pkt_dev->min_in6_daddr.s6_addr32[0] == 0 &&
- pkt_dev->min_in6_daddr.s6_addr32[1] == 0 &&
- pkt_dev->min_in6_daddr.s6_addr32[2] == 0 &&
- pkt_dev->min_in6_daddr.s6_addr32[3] == 0) ;
- else {
+ if (!ipv6_addr_any(&pkt_dev->min_in6_daddr)) {
int i;
/* Only random destinations yet */
@@ -2916,8 +2898,7 @@ static void pktgen_run(struct pktgen_thread *t)
pktgen_clear_counters(pkt_dev);
pkt_dev->running = 1; /* Cranke yeself! */
pkt_dev->skb = NULL;
- pkt_dev->started_at =
- pkt_dev->next_tx = ktime_now();
+ pkt_dev->started_at = pkt_dev->next_tx = ktime_get();
set_pkt_overhead(pkt_dev);
@@ -3076,7 +3057,7 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev)
kfree_skb(pkt_dev->skb);
pkt_dev->skb = NULL;
- pkt_dev->stopped_at = ktime_now();
+ pkt_dev->stopped_at = ktime_get();
pkt_dev->running = 0;
show_results(pkt_dev, nr_frags);
@@ -3095,7 +3076,7 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t)
continue;
if (best == NULL)
best = pkt_dev;
- else if (ktime_lt(pkt_dev->next_tx, best->next_tx))
+ else if (ktime_compare(pkt_dev->next_tx, best->next_tx) < 0)
best = pkt_dev;
}
if_unlock(t);
@@ -3180,14 +3161,14 @@ static void pktgen_rem_thread(struct pktgen_thread *t)
static void pktgen_resched(struct pktgen_dev *pkt_dev)
{
- ktime_t idle_start = ktime_now();
+ ktime_t idle_start = ktime_get();
schedule();
- pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start));
+ pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_get(), idle_start));
}
static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
{
- ktime_t idle_start = ktime_now();
+ ktime_t idle_start = ktime_get();
while (atomic_read(&(pkt_dev->skb->users)) != 1) {
if (signal_pending(current))
@@ -3198,7 +3179,7 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
else
cpu_relax();
}
- pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start));
+ pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_get(), idle_start));
}
static void pktgen_xmit(struct pktgen_dev *pkt_dev)
@@ -3220,7 +3201,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
* "never transmit"
*/
if (unlikely(pkt_dev->delay == ULLONG_MAX)) {
- pkt_dev->next_tx = ktime_add_ns(ktime_now(), ULONG_MAX);
+ pkt_dev->next_tx = ktime_add_ns(ktime_get(), ULONG_MAX);
return;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index fad649ae4dec..575a6ee89944 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -128,7 +128,7 @@ static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex)
if (tab == NULL || tab[msgindex].doit == NULL)
tab = rtnl_msg_handlers[PF_UNSPEC];
- return tab ? tab[msgindex].doit : NULL;
+ return tab[msgindex].doit;
}
static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
@@ -143,7 +143,7 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
if (tab == NULL || tab[msgindex].dumpit == NULL)
tab = rtnl_msg_handlers[PF_UNSPEC];
- return tab ? tab[msgindex].dumpit : NULL;
+ return tab[msgindex].dumpit;
}
static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex)
@@ -158,7 +158,7 @@ static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex)
if (tab == NULL || tab[msgindex].calcit == NULL)
tab = rtnl_msg_handlers[PF_UNSPEC];
- return tab ? tab[msgindex].calcit : NULL;
+ return tab[msgindex].calcit;
}
/**
@@ -1316,6 +1316,10 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
err = PTR_ERR(net);
goto errout;
}
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) {
+ err = -EPERM;
+ goto errout;
+ }
err = dev_change_net_namespace(dev, net, ifname);
put_net(net);
if (err)
@@ -2057,6 +2061,9 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
u8 *addr;
int err;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
if (err < 0)
return err;
@@ -2123,6 +2130,9 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
int err = -EINVAL;
__u8 *addr;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
if (nlmsg_len(nlh) < sizeof(*ndm))
return -EINVAL;
@@ -2253,6 +2263,211 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
+int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+ struct net_device *dev, u16 mode)
+{
+ struct nlmsghdr *nlh;
+ struct ifinfomsg *ifm;
+ struct nlattr *br_afspec;
+ u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
+
+ nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), NLM_F_MULTI);
+ if (nlh == NULL)
+ return -EMSGSIZE;
+
+ ifm = nlmsg_data(nlh);
+ ifm->ifi_family = AF_BRIDGE;
+ ifm->__ifi_pad = 0;
+ ifm->ifi_type = dev->type;
+ ifm->ifi_index = dev->ifindex;
+ ifm->ifi_flags = dev_get_flags(dev);
+ ifm->ifi_change = 0;
+
+
+ if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
+ nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
+ nla_put_u8(skb, IFLA_OPERSTATE, operstate) ||
+ (dev->master &&
+ nla_put_u32(skb, IFLA_MASTER, dev->master->ifindex)) ||
+ (dev->addr_len &&
+ nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) ||
+ (dev->ifindex != dev->iflink &&
+ nla_put_u32(skb, IFLA_LINK, dev->iflink)))
+ goto nla_put_failure;
+
+ br_afspec = nla_nest_start(skb, IFLA_AF_SPEC);
+ if (!br_afspec)
+ goto nla_put_failure;
+
+ if (nla_put_u16(skb, IFLA_BRIDGE_FLAGS, BRIDGE_FLAGS_SELF) ||
+ nla_put_u16(skb, IFLA_BRIDGE_MODE, mode)) {
+ nla_nest_cancel(skb, br_afspec);
+ goto nla_put_failure;
+ }
+ nla_nest_end(skb, br_afspec);
+
+ return nlmsg_end(skb, nlh);
+nla_put_failure:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+EXPORT_SYMBOL(ndo_dflt_bridge_getlink);
+
+static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct net_device *dev;
+ int idx = 0;
+ u32 portid = NETLINK_CB(cb->skb).portid;
+ u32 seq = cb->nlh->nlmsg_seq;
+
+ rcu_read_lock();
+ for_each_netdev_rcu(net, dev) {
+ const struct net_device_ops *ops = dev->netdev_ops;
+ struct net_device *master = dev->master;
+
+ if (master && master->netdev_ops->ndo_bridge_getlink) {
+ if (idx >= cb->args[0] &&
+ master->netdev_ops->ndo_bridge_getlink(
+ skb, portid, seq, dev) < 0)
+ break;
+ idx++;
+ }
+
+ if (ops->ndo_bridge_getlink) {
+ if (idx >= cb->args[0] &&
+ ops->ndo_bridge_getlink(skb, portid, seq, dev) < 0)
+ break;
+ idx++;
+ }
+ }
+ rcu_read_unlock();
+ cb->args[0] = idx;
+
+ return skb->len;
+}
+
+static inline size_t bridge_nlmsg_size(void)
+{
+ return NLMSG_ALIGN(sizeof(struct ifinfomsg))
+ + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
+ + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
+ + nla_total_size(sizeof(u32)) /* IFLA_MASTER */
+ + nla_total_size(sizeof(u32)) /* IFLA_MTU */
+ + nla_total_size(sizeof(u32)) /* IFLA_LINK */
+ + nla_total_size(sizeof(u32)) /* IFLA_OPERSTATE */
+ + nla_total_size(sizeof(u8)) /* IFLA_PROTINFO */
+ + nla_total_size(sizeof(struct nlattr)) /* IFLA_AF_SPEC */
+ + nla_total_size(sizeof(u16)) /* IFLA_BRIDGE_FLAGS */
+ + nla_total_size(sizeof(u16)); /* IFLA_BRIDGE_MODE */
+}
+
+static int rtnl_bridge_notify(struct net_device *dev, u16 flags)
+{
+ struct net *net = dev_net(dev);
+ struct net_device *master = dev->master;
+ struct sk_buff *skb;
+ int err = -EOPNOTSUPP;
+
+ skb = nlmsg_new(bridge_nlmsg_size(), GFP_ATOMIC);
+ if (!skb) {
+ err = -ENOMEM;
+ goto errout;
+ }
+
+ if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) &&
+ master && master->netdev_ops->ndo_bridge_getlink) {
+ err = master->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev);
+ if (err < 0)
+ goto errout;
+ }
+
+ if ((flags & BRIDGE_FLAGS_SELF) &&
+ dev->netdev_ops->ndo_bridge_getlink) {
+ err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev);
+ if (err < 0)
+ goto errout;
+ }
+
+ rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
+ return 0;
+errout:
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+ rtnl_set_sk_err(net, RTNLGRP_LINK, err);
+ return err;
+}
+
+static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ void *arg)
+{
+ struct net *net = sock_net(skb->sk);
+ struct ifinfomsg *ifm;
+ struct net_device *dev;
+ struct nlattr *br_spec, *attr = NULL;
+ int rem, err = -EOPNOTSUPP;
+ u16 oflags, flags = 0;
+ bool have_flags = false;
+
+ if (nlmsg_len(nlh) < sizeof(*ifm))
+ return -EINVAL;
+
+ ifm = nlmsg_data(nlh);
+ if (ifm->ifi_family != AF_BRIDGE)
+ return -EPFNOSUPPORT;
+
+ dev = __dev_get_by_index(net, ifm->ifi_index);
+ if (!dev) {
+ pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n");
+ return -ENODEV;
+ }
+
+ br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+ if (br_spec) {
+ nla_for_each_nested(attr, br_spec, rem) {
+ if (nla_type(attr) == IFLA_BRIDGE_FLAGS) {
+ have_flags = true;
+ flags = nla_get_u16(attr);
+ break;
+ }
+ }
+ }
+
+ oflags = flags;
+
+ if (!flags || (flags & BRIDGE_FLAGS_MASTER)) {
+ if (!dev->master ||
+ !dev->master->netdev_ops->ndo_bridge_setlink) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ err = dev->master->netdev_ops->ndo_bridge_setlink(dev, nlh);
+ if (err)
+ goto out;
+
+ flags &= ~BRIDGE_FLAGS_MASTER;
+ }
+
+ if ((flags & BRIDGE_FLAGS_SELF)) {
+ if (!dev->netdev_ops->ndo_bridge_setlink)
+ err = -EOPNOTSUPP;
+ else
+ err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh);
+
+ if (!err)
+ flags &= ~BRIDGE_FLAGS_SELF;
+ }
+
+ if (have_flags)
+ memcpy(nla_data(attr), &flags, sizeof(flags));
+ /* Generate event to notify upper layer of bridge change */
+ if (!err)
+ err = rtnl_bridge_notify(dev, oflags);
+out:
+ return err;
+}
+
/* Protected by RTNL sempahore. */
static struct rtattr **rta_buf;
static int rtattr_max;
@@ -2283,7 +2498,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
sz_idx = type>>2;
kind = type&3;
- if (kind != 2 && !capable(CAP_NET_ADMIN))
+ if (kind != 2 && !ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
@@ -2434,5 +2649,8 @@ void __init rtnetlink_init(void)
rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL);
rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL);
rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL);
+
+ rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL);
+ rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL);
}
diff --git a/net/core/scm.c b/net/core/scm.c
index ab570841a532..57fb1ee6649f 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -51,11 +51,11 @@ static __inline__ int scm_check_creds(struct ucred *creds)
if (!uid_valid(uid) || !gid_valid(gid))
return -EINVAL;
- if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) &&
+ if ((creds->pid == task_tgid_vnr(current) || nsown_capable(CAP_SYS_ADMIN)) &&
((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) ||
- uid_eq(uid, cred->suid)) || capable(CAP_SETUID)) &&
+ uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) &&
((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) ||
- gid_eq(gid, cred->sgid)) || capable(CAP_SETGID))) {
+ gid_eq(gid, cred->sgid)) || nsown_capable(CAP_SETGID))) {
return 0;
}
return -EPERM;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4007c1437fda..880722e22cc5 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -519,7 +519,7 @@ static void skb_release_data(struct sk_buff *skb)
uarg = skb_shinfo(skb)->destructor_arg;
if (uarg->callback)
- uarg->callback(uarg);
+ uarg->callback(uarg, true);
}
if (skb_has_frag_list(skb))
@@ -635,6 +635,26 @@ void kfree_skb(struct sk_buff *skb)
EXPORT_SYMBOL(kfree_skb);
/**
+ * skb_tx_error - report an sk_buff xmit error
+ * @skb: buffer that triggered an error
+ *
+ * Report xmit error if a device callback is tracking this skb.
+ * skb must be freed afterwards.
+ */
+void skb_tx_error(struct sk_buff *skb)
+{
+ if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
+ struct ubuf_info *uarg;
+
+ uarg = skb_shinfo(skb)->destructor_arg;
+ if (uarg->callback)
+ uarg->callback(uarg, false);
+ skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
+ }
+}
+EXPORT_SYMBOL(skb_tx_error);
+
+/**
* consume_skb - free an skbuff
* @skb: buffer to free
*
@@ -797,7 +817,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
for (i = 0; i < num_frags; i++)
skb_frag_unref(skb, i);
- uarg->callback(uarg);
+ uarg->callback(uarg, false);
/* skb frags point to kernel buffers */
for (i = num_frags - 1; i >= 0; i--) {
diff --git a/net/core/sock.c b/net/core/sock.c
index 8a146cfcc366..d4f7b58b3866 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -515,7 +515,7 @@ static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
/* Sorry... */
ret = -EPERM;
- if (!capable(CAP_NET_RAW))
+ if (!ns_capable(net->user_ns, CAP_NET_RAW))
goto out;
ret = -EINVAL;
@@ -696,7 +696,8 @@ set_rcvbuf:
break;
case SO_PRIORITY:
- if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
+ if ((val >= 0 && val <= 6) ||
+ ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
sk->sk_priority = val;
else
ret = -EPERM;
@@ -813,7 +814,7 @@ set_rcvbuf:
clear_bit(SOCK_PASSSEC, &sock->flags);
break;
case SO_MARK:
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
ret = -EPERM;
else
sk->sk_mark = val;
@@ -1074,6 +1075,15 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
case SO_NOFCS:
v.val = sock_flag(sk, SOCK_NOFCS);
break;
+ case SO_BINDTODEVICE:
+ v.val = sk->sk_bound_dev_if;
+ break;
+ case SO_GET_FILTER:
+ len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
+ if (len < 0)
+ return len;
+
+ goto lenout;
default:
return -ENOPROTOOPT;
}
@@ -1214,13 +1224,11 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
#ifdef CONFIG_CGROUPS
#if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
-void sock_update_classid(struct sock *sk)
+void sock_update_classid(struct sock *sk, struct task_struct *task)
{
u32 classid;
- rcu_read_lock(); /* doing current task, which cannot vanish. */
- classid = task_cls_classid(current);
- rcu_read_unlock();
+ classid = task_cls_classid(task);
if (classid != sk->sk_classid)
sk->sk_classid = classid;
}
@@ -1263,7 +1271,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
sock_net_set(sk, get_net(net));
atomic_set(&sk->sk_wmem_alloc, 1);
- sock_update_classid(sk);
+ sock_update_classid(sk, current);
sock_update_netprioidx(sk, current);
}
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index a7c36845b123..d1b08045a9df 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -216,6 +216,11 @@ static __net_init int sysctl_core_net_init(struct net *net)
goto err_dup;
tbl[0].data = &net->core.sysctl_somaxconn;
+
+ /* Don't export any sysctls to unprivileged users */
+ if (net->user_ns != &init_user_ns) {
+ tbl[0].procname = NULL;
+ }
}
net->core.sysctl_hdr = register_net_sysctl(net, "net/core", tbl);