summaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c622
1 files changed, 440 insertions, 182 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 5c713f2239cc..ba3b7ea5ebb3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -131,6 +131,7 @@
#include <linux/static_key.h>
#include <linux/hashtable.h>
#include <linux/vmalloc.h>
+#include <linux/if_macvlan.h>
#include "net-sysfs.h"
@@ -1203,7 +1204,7 @@ void netdev_state_change(struct net_device *dev)
{
if (dev->flags & IFF_UP) {
call_netdevice_notifiers(NETDEV_CHANGE, dev);
- rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
+ rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
}
}
EXPORT_SYMBOL(netdev_state_change);
@@ -1293,7 +1294,7 @@ int dev_open(struct net_device *dev)
if (ret < 0)
return ret;
- rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+ rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
call_netdevice_notifiers(NETDEV_UP, dev);
return ret;
@@ -1307,7 +1308,7 @@ static int __dev_close_many(struct list_head *head)
ASSERT_RTNL();
might_sleep();
- list_for_each_entry(dev, head, unreg_list) {
+ list_for_each_entry(dev, head, close_list) {
call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
clear_bit(__LINK_STATE_START, &dev->state);
@@ -1323,7 +1324,7 @@ static int __dev_close_many(struct list_head *head)
dev_deactivate_many(head);
- list_for_each_entry(dev, head, unreg_list) {
+ list_for_each_entry(dev, head, close_list) {
const struct net_device_ops *ops = dev->netdev_ops;
/*
@@ -1351,7 +1352,7 @@ static int __dev_close(struct net_device *dev)
/* Temporarily disable netpoll until the interface is down */
netpoll_rx_disable(dev);
- list_add(&dev->unreg_list, &single);
+ list_add(&dev->close_list, &single);
retval = __dev_close_many(&single);
list_del(&single);
@@ -1362,21 +1363,20 @@ static int __dev_close(struct net_device *dev)
static int dev_close_many(struct list_head *head)
{
struct net_device *dev, *tmp;
- LIST_HEAD(tmp_list);
- list_for_each_entry_safe(dev, tmp, head, unreg_list)
+ /* Remove the devices that don't need to be closed */
+ list_for_each_entry_safe(dev, tmp, head, close_list)
if (!(dev->flags & IFF_UP))
- list_move(&dev->unreg_list, &tmp_list);
+ list_del_init(&dev->close_list);
__dev_close_many(head);
- list_for_each_entry(dev, head, unreg_list) {
- rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+ list_for_each_entry_safe(dev, tmp, head, close_list) {
+ rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
call_netdevice_notifiers(NETDEV_DOWN, dev);
+ list_del_init(&dev->close_list);
}
- /* rollback_registered_many needs the complete original list */
- list_splice(&tmp_list, head);
return 0;
}
@@ -1397,7 +1397,7 @@ int dev_close(struct net_device *dev)
/* Block netpoll rx while the interface is going down */
netpoll_rx_disable(dev);
- list_add(&dev->unreg_list, &single);
+ list_add(&dev->close_list, &single);
dev_close_many(&single);
list_del(&single);
@@ -1425,6 +1425,10 @@ void dev_disable_lro(struct net_device *dev)
if (is_vlan_dev(dev))
dev = vlan_dev_real_dev(dev);
+ /* the same for macvlan devices */
+ if (netif_is_macvlan(dev))
+ dev = macvlan_dev_real_dev(dev);
+
dev->wanted_features &= ~NETIF_F_LRO;
netdev_update_features(dev);
@@ -1691,13 +1695,9 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
kfree_skb(skb);
return NET_RX_DROP;
}
- skb->protocol = eth_type_trans(skb, dev);
- /* eth_type_trans() can set pkt_type.
- * call skb_scrub_packet() after it to clear pkt_type _after_ calling
- * eth_type_trans().
- */
skb_scrub_packet(skb, true);
+ skb->protocol = eth_type_trans(skb, dev);
return netif_rx(skb);
}
@@ -1917,7 +1917,8 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
return new_map;
}
-int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, u16 index)
+int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
+ u16 index)
{
struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
struct xps_map *map, *new_map;
@@ -2377,6 +2378,8 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
}
SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
+ SKB_GSO_CB(skb)->encap_level = 0;
+
skb_reset_mac_header(skb);
skb_reset_mac_len(skb);
@@ -2536,7 +2539,7 @@ static inline int skb_needs_linearize(struct sk_buff *skb,
}
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
- struct netdev_queue *txq)
+ struct netdev_queue *txq, void *accel_priv)
{
const struct net_device_ops *ops = dev->netdev_ops;
int rc = NETDEV_TX_OK;
@@ -2602,9 +2605,13 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
dev_queue_xmit_nit(skb, dev);
skb_len = skb->len;
- rc = ops->ndo_start_xmit(skb, dev);
+ if (accel_priv)
+ rc = ops->ndo_dfwd_start_xmit(skb, dev, accel_priv);
+ else
+ rc = ops->ndo_start_xmit(skb, dev);
+
trace_net_dev_xmit(skb, rc, dev, skb_len);
- if (rc == NETDEV_TX_OK)
+ if (rc == NETDEV_TX_OK && txq)
txq_trans_update(txq);
return rc;
}
@@ -2620,7 +2627,10 @@ gso:
dev_queue_xmit_nit(nskb, dev);
skb_len = nskb->len;
- rc = ops->ndo_start_xmit(nskb, dev);
+ if (accel_priv)
+ rc = ops->ndo_dfwd_start_xmit(nskb, dev, accel_priv);
+ else
+ rc = ops->ndo_start_xmit(nskb, dev);
trace_net_dev_xmit(nskb, rc, dev, skb_len);
if (unlikely(rc != NETDEV_TX_OK)) {
if (rc & ~NETDEV_TX_MASK)
@@ -2645,6 +2655,7 @@ out_kfree_skb:
out:
return rc;
}
+EXPORT_SYMBOL_GPL(dev_hard_start_xmit);
static void qdisc_pkt_len_init(struct sk_buff *skb)
{
@@ -2852,7 +2863,7 @@ int dev_queue_xmit(struct sk_buff *skb)
if (!netif_xmit_stopped(txq)) {
__this_cpu_inc(xmit_recursion);
- rc = dev_hard_start_xmit(skb, dev, txq);
+ rc = dev_hard_start_xmit(skb, dev, txq, NULL);
__this_cpu_dec(xmit_recursion);
if (dev_xmit_complete(rc)) {
HARD_TX_UNLOCK(dev, txq);
@@ -4373,42 +4384,40 @@ struct netdev_adjacent {
/* upper master flag, there can only be one master device per list */
bool master;
- /* indicates that this dev is our first-level lower/upper device */
- bool neighbour;
-
/* counter for the number of times this device was added to us */
u16 ref_nr;
+ /* private field for the users */
+ void *private;
+
struct list_head list;
struct rcu_head rcu;
};
-static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
- struct net_device *adj_dev,
- bool upper)
+static struct netdev_adjacent *__netdev_find_adj_rcu(struct net_device *dev,
+ struct net_device *adj_dev,
+ struct list_head *adj_list)
{
struct netdev_adjacent *adj;
- struct list_head *dev_list;
-
- dev_list = upper ? &dev->upper_dev_list : &dev->lower_dev_list;
- list_for_each_entry(adj, dev_list, list) {
+ list_for_each_entry_rcu(adj, adj_list, list) {
if (adj->dev == adj_dev)
return adj;
}
return NULL;
}
-static inline struct netdev_adjacent *__netdev_find_upper(struct net_device *dev,
- struct net_device *udev)
+static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
+ struct net_device *adj_dev,
+ struct list_head *adj_list)
{
- return __netdev_find_adj(dev, udev, true);
-}
+ struct netdev_adjacent *adj;
-static inline struct netdev_adjacent *__netdev_find_lower(struct net_device *dev,
- struct net_device *ldev)
-{
- return __netdev_find_adj(dev, ldev, false);
+ list_for_each_entry(adj, adj_list, list) {
+ if (adj->dev == adj_dev)
+ return adj;
+ }
+ return NULL;
}
/**
@@ -4425,7 +4434,7 @@ bool netdev_has_upper_dev(struct net_device *dev,
{
ASSERT_RTNL();
- return __netdev_find_upper(dev, upper_dev);
+ return __netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper);
}
EXPORT_SYMBOL(netdev_has_upper_dev);
@@ -4440,7 +4449,7 @@ bool netdev_has_any_upper_dev(struct net_device *dev)
{
ASSERT_RTNL();
- return !list_empty(&dev->upper_dev_list);
+ return !list_empty(&dev->all_adj_list.upper);
}
EXPORT_SYMBOL(netdev_has_any_upper_dev);
@@ -4457,10 +4466,10 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
ASSERT_RTNL();
- if (list_empty(&dev->upper_dev_list))
+ if (list_empty(&dev->adj_list.upper))
return NULL;
- upper = list_first_entry(&dev->upper_dev_list,
+ upper = list_first_entry(&dev->adj_list.upper,
struct netdev_adjacent, list);
if (likely(upper->master))
return upper->dev;
@@ -4468,15 +4477,26 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
}
EXPORT_SYMBOL(netdev_master_upper_dev_get);
-/* netdev_upper_get_next_dev_rcu - Get the next dev from upper list
+void *netdev_adjacent_get_private(struct list_head *adj_list)
+{
+ struct netdev_adjacent *adj;
+
+ adj = list_entry(adj_list, struct netdev_adjacent, list);
+
+ return adj->private;
+}
+EXPORT_SYMBOL(netdev_adjacent_get_private);
+
+/**
+ * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list
* @dev: device
* @iter: list_head ** of the current position
*
* Gets the next device from the dev's upper list, starting from iter
* position. The caller must hold RCU read lock.
*/
-struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
- struct list_head **iter)
+struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
+ struct list_head **iter)
{
struct netdev_adjacent *upper;
@@ -4484,14 +4504,71 @@ struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
- if (&upper->list == &dev->upper_dev_list)
+ if (&upper->list == &dev->all_adj_list.upper)
return NULL;
*iter = &upper->list;
return upper->dev;
}
-EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
+EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu);
+
+/**
+ * netdev_lower_get_next_private - Get the next ->private from the
+ * lower neighbour list
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next netdev_adjacent->private from the dev's lower neighbour
+ * list, starting from iter position. The caller must hold either hold the
+ * RTNL lock or its own locking that guarantees that the neighbour lower
+ * list will remain unchainged.
+ */
+void *netdev_lower_get_next_private(struct net_device *dev,
+ struct list_head **iter)
+{
+ struct netdev_adjacent *lower;
+
+ lower = list_entry(*iter, struct netdev_adjacent, list);
+
+ if (&lower->list == &dev->adj_list.lower)
+ return NULL;
+
+ if (iter)
+ *iter = lower->list.next;
+
+ return lower->private;
+}
+EXPORT_SYMBOL(netdev_lower_get_next_private);
+
+/**
+ * netdev_lower_get_next_private_rcu - Get the next ->private from the
+ * lower neighbour list, RCU
+ * variant
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next netdev_adjacent->private from the dev's lower neighbour
+ * list, starting from iter position. The caller must hold RCU read lock.
+ */
+void *netdev_lower_get_next_private_rcu(struct net_device *dev,
+ struct list_head **iter)
+{
+ struct netdev_adjacent *lower;
+
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
+ lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
+
+ if (&lower->list == &dev->adj_list.lower)
+ return NULL;
+
+ if (iter)
+ *iter = &lower->list;
+
+ return lower->private;
+}
+EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);
/**
* netdev_master_upper_dev_get_rcu - Get master upper device
@@ -4504,7 +4581,7 @@ struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
{
struct netdev_adjacent *upper;
- upper = list_first_or_null_rcu(&dev->upper_dev_list,
+ upper = list_first_or_null_rcu(&dev->adj_list.upper,
struct netdev_adjacent, list);
if (upper && likely(upper->master))
return upper->dev;
@@ -4514,15 +4591,16 @@ EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
static int __netdev_adjacent_dev_insert(struct net_device *dev,
struct net_device *adj_dev,
- bool neighbour, bool master,
- bool upper)
+ struct list_head *dev_list,
+ void *private, bool master)
{
struct netdev_adjacent *adj;
+ char linkname[IFNAMSIZ+7];
+ int ret;
- adj = __netdev_find_adj(dev, adj_dev, upper);
+ adj = __netdev_find_adj(dev, adj_dev, dev_list);
if (adj) {
- BUG_ON(neighbour);
adj->ref_nr++;
return 0;
}
@@ -4533,124 +4611,179 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
adj->dev = adj_dev;
adj->master = master;
- adj->neighbour = neighbour;
adj->ref_nr = 1;
-
+ adj->private = private;
dev_hold(adj_dev);
- pr_debug("dev_hold for %s, because of %s link added from %s to %s\n",
- adj_dev->name, upper ? "upper" : "lower", dev->name,
- adj_dev->name);
- if (!upper) {
- list_add_tail_rcu(&adj->list, &dev->lower_dev_list);
- return 0;
+ pr_debug("dev_hold for %s, because of link added from %s to %s\n",
+ adj_dev->name, dev->name, adj_dev->name);
+
+ if (dev_list == &dev->adj_list.lower) {
+ sprintf(linkname, "lower_%s", adj_dev->name);
+ ret = sysfs_create_link(&(dev->dev.kobj),
+ &(adj_dev->dev.kobj), linkname);
+ if (ret)
+ goto free_adj;
+ } else if (dev_list == &dev->adj_list.upper) {
+ sprintf(linkname, "upper_%s", adj_dev->name);
+ ret = sysfs_create_link(&(dev->dev.kobj),
+ &(adj_dev->dev.kobj), linkname);
+ if (ret)
+ goto free_adj;
}
- /* Ensure that master upper link is always the first item in list. */
- if (master)
- list_add_rcu(&adj->list, &dev->upper_dev_list);
- else
- list_add_tail_rcu(&adj->list, &dev->upper_dev_list);
+ /* Ensure that master link is always the first item in list. */
+ if (master) {
+ ret = sysfs_create_link(&(dev->dev.kobj),
+ &(adj_dev->dev.kobj), "master");
+ if (ret)
+ goto remove_symlinks;
+
+ list_add_rcu(&adj->list, dev_list);
+ } else {
+ list_add_tail_rcu(&adj->list, dev_list);
+ }
return 0;
-}
-static inline int __netdev_upper_dev_insert(struct net_device *dev,
- struct net_device *udev,
- bool master, bool neighbour)
-{
- return __netdev_adjacent_dev_insert(dev, udev, neighbour, master,
- true);
-}
+remove_symlinks:
+ if (dev_list == &dev->adj_list.lower) {
+ sprintf(linkname, "lower_%s", adj_dev->name);
+ sysfs_remove_link(&(dev->dev.kobj), linkname);
+ } else if (dev_list == &dev->adj_list.upper) {
+ sprintf(linkname, "upper_%s", adj_dev->name);
+ sysfs_remove_link(&(dev->dev.kobj), linkname);
+ }
-static inline int __netdev_lower_dev_insert(struct net_device *dev,
- struct net_device *ldev,
- bool neighbour)
-{
- return __netdev_adjacent_dev_insert(dev, ldev, neighbour, false,
- false);
+free_adj:
+ kfree(adj);
+ dev_put(adj_dev);
+
+ return ret;
}
void __netdev_adjacent_dev_remove(struct net_device *dev,
- struct net_device *adj_dev, bool upper)
+ struct net_device *adj_dev,
+ struct list_head *dev_list)
{
struct netdev_adjacent *adj;
+ char linkname[IFNAMSIZ+7];
- if (upper)
- adj = __netdev_find_upper(dev, adj_dev);
- else
- adj = __netdev_find_lower(dev, adj_dev);
+ adj = __netdev_find_adj(dev, adj_dev, dev_list);
- if (!adj)
+ if (!adj) {
+ pr_err("tried to remove device %s from %s\n",
+ dev->name, adj_dev->name);
BUG();
+ }
if (adj->ref_nr > 1) {
+ pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name,
+ adj->ref_nr-1);
adj->ref_nr--;
return;
}
+ if (adj->master)
+ sysfs_remove_link(&(dev->dev.kobj), "master");
+
+ if (dev_list == &dev->adj_list.lower) {
+ sprintf(linkname, "lower_%s", adj_dev->name);
+ sysfs_remove_link(&(dev->dev.kobj), linkname);
+ } else if (dev_list == &dev->adj_list.upper) {
+ sprintf(linkname, "upper_%s", adj_dev->name);
+ sysfs_remove_link(&(dev->dev.kobj), linkname);
+ }
+
list_del_rcu(&adj->list);
- pr_debug("dev_put for %s, because of %s link removed from %s to %s\n",
- adj_dev->name, upper ? "upper" : "lower", dev->name,
- adj_dev->name);
+ pr_debug("dev_put for %s, because link removed from %s to %s\n",
+ adj_dev->name, dev->name, adj_dev->name);
dev_put(adj_dev);
kfree_rcu(adj, rcu);
}
-static inline void __netdev_upper_dev_remove(struct net_device *dev,
- struct net_device *udev)
-{
- return __netdev_adjacent_dev_remove(dev, udev, true);
-}
-
-static inline void __netdev_lower_dev_remove(struct net_device *dev,
- struct net_device *ldev)
-{
- return __netdev_adjacent_dev_remove(dev, ldev, false);
-}
-
-int __netdev_adjacent_dev_insert_link(struct net_device *dev,
- struct net_device *upper_dev,
- bool master, bool neighbour)
+int __netdev_adjacent_dev_link_lists(struct net_device *dev,
+ struct net_device *upper_dev,
+ struct list_head *up_list,
+ struct list_head *down_list,
+ void *private, bool master)
{
int ret;
- ret = __netdev_upper_dev_insert(dev, upper_dev, master, neighbour);
+ ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private,
+ master);
if (ret)
return ret;
- ret = __netdev_lower_dev_insert(upper_dev, dev, neighbour);
+ ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private,
+ false);
if (ret) {
- __netdev_upper_dev_remove(dev, upper_dev);
+ __netdev_adjacent_dev_remove(dev, upper_dev, up_list);
return ret;
}
return 0;
}
-static inline int __netdev_adjacent_dev_link(struct net_device *dev,
- struct net_device *udev)
+int __netdev_adjacent_dev_link(struct net_device *dev,
+ struct net_device *upper_dev)
{
- return __netdev_adjacent_dev_insert_link(dev, udev, false, false);
+ return __netdev_adjacent_dev_link_lists(dev, upper_dev,
+ &dev->all_adj_list.upper,
+ &upper_dev->all_adj_list.lower,
+ NULL, false);
}
-static inline int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
- struct net_device *udev,
- bool master)
+void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
+ struct net_device *upper_dev,
+ struct list_head *up_list,
+ struct list_head *down_list)
{
- return __netdev_adjacent_dev_insert_link(dev, udev, master, true);
+ __netdev_adjacent_dev_remove(dev, upper_dev, up_list);
+ __netdev_adjacent_dev_remove(upper_dev, dev, down_list);
}
void __netdev_adjacent_dev_unlink(struct net_device *dev,
struct net_device *upper_dev)
{
- __netdev_upper_dev_remove(dev, upper_dev);
- __netdev_lower_dev_remove(upper_dev, dev);
+ __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
+ &dev->all_adj_list.upper,
+ &upper_dev->all_adj_list.lower);
}
+int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
+ struct net_device *upper_dev,
+ void *private, bool master)
+{
+ int ret = __netdev_adjacent_dev_link(dev, upper_dev);
+
+ if (ret)
+ return ret;
+
+ ret = __netdev_adjacent_dev_link_lists(dev, upper_dev,
+ &dev->adj_list.upper,
+ &upper_dev->adj_list.lower,
+ private, master);
+ if (ret) {
+ __netdev_adjacent_dev_unlink(dev, upper_dev);
+ return ret;
+ }
+
+ return 0;
+}
+
+void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
+ struct net_device *upper_dev)
+{
+ __netdev_adjacent_dev_unlink(dev, upper_dev);
+ __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
+ &dev->adj_list.upper,
+ &upper_dev->adj_list.lower);
+}
static int __netdev_upper_dev_link(struct net_device *dev,
- struct net_device *upper_dev, bool master)
+ struct net_device *upper_dev, bool master,
+ void *private)
{
struct netdev_adjacent *i, *j, *to_i, *to_j;
int ret = 0;
@@ -4661,26 +4794,29 @@ static int __netdev_upper_dev_link(struct net_device *dev,
return -EBUSY;
/* To prevent loops, check if dev is not upper device to upper_dev. */
- if (__netdev_find_upper(upper_dev, dev))
+ if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper))
return -EBUSY;
- if (__netdev_find_upper(dev, upper_dev))
+ if (__netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper))
return -EEXIST;
if (master && netdev_master_upper_dev_get(dev))
return -EBUSY;
- ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, master);
+ ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private,
+ master);
if (ret)
return ret;
/* Now that we linked these devs, make all the upper_dev's
- * upper_dev_list visible to every dev's lower_dev_list and vice
+ * all_adj_list.upper visible to every dev's all_adj_list.lower an
* versa, and don't forget the devices itself. All of these
* links are non-neighbours.
*/
- list_for_each_entry(i, &dev->lower_dev_list, list) {
- list_for_each_entry(j, &upper_dev->upper_dev_list, list) {
+ list_for_each_entry(i, &dev->all_adj_list.lower, list) {
+ list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
+ pr_debug("Interlinking %s with %s, non-neighbour\n",
+ i->dev->name, j->dev->name);
ret = __netdev_adjacent_dev_link(i->dev, j->dev);
if (ret)
goto rollback_mesh;
@@ -4688,14 +4824,18 @@ static int __netdev_upper_dev_link(struct net_device *dev,
}
/* add dev to every upper_dev's upper device */
- list_for_each_entry(i, &upper_dev->upper_dev_list, list) {
+ list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
+ pr_debug("linking %s's upper device %s with %s\n",
+ upper_dev->name, i->dev->name, dev->name);
ret = __netdev_adjacent_dev_link(dev, i->dev);
if (ret)
goto rollback_upper_mesh;
}
/* add upper_dev to every dev's lower device */
- list_for_each_entry(i, &dev->lower_dev_list, list) {
+ list_for_each_entry(i, &dev->all_adj_list.lower, list) {
+ pr_debug("linking %s's lower device %s with %s\n", dev->name,
+ i->dev->name, upper_dev->name);
ret = __netdev_adjacent_dev_link(i->dev, upper_dev);
if (ret)
goto rollback_lower_mesh;
@@ -4706,7 +4846,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
rollback_lower_mesh:
to_i = i;
- list_for_each_entry(i, &dev->lower_dev_list, list) {
+ list_for_each_entry(i, &dev->all_adj_list.lower, list) {
if (i == to_i)
break;
__netdev_adjacent_dev_unlink(i->dev, upper_dev);
@@ -4716,7 +4856,7 @@ rollback_lower_mesh:
rollback_upper_mesh:
to_i = i;
- list_for_each_entry(i, &upper_dev->upper_dev_list, list) {
+ list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
if (i == to_i)
break;
__netdev_adjacent_dev_unlink(dev, i->dev);
@@ -4727,8 +4867,8 @@ rollback_upper_mesh:
rollback_mesh:
to_i = i;
to_j = j;
- list_for_each_entry(i, &dev->lower_dev_list, list) {
- list_for_each_entry(j, &upper_dev->upper_dev_list, list) {
+ list_for_each_entry(i, &dev->all_adj_list.lower, list) {
+ list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
if (i == to_i && j == to_j)
break;
__netdev_adjacent_dev_unlink(i->dev, j->dev);
@@ -4737,7 +4877,7 @@ rollback_mesh:
break;
}
- __netdev_adjacent_dev_unlink(dev, upper_dev);
+ __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
return ret;
}
@@ -4755,7 +4895,7 @@ rollback_mesh:
int netdev_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev)
{
- return __netdev_upper_dev_link(dev, upper_dev, false);
+ return __netdev_upper_dev_link(dev, upper_dev, false, NULL);
}
EXPORT_SYMBOL(netdev_upper_dev_link);
@@ -4773,10 +4913,18 @@ EXPORT_SYMBOL(netdev_upper_dev_link);
int netdev_master_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev)
{
- return __netdev_upper_dev_link(dev, upper_dev, true);
+ return __netdev_upper_dev_link(dev, upper_dev, true, NULL);
}
EXPORT_SYMBOL(netdev_master_upper_dev_link);
+int netdev_master_upper_dev_link_private(struct net_device *dev,
+ struct net_device *upper_dev,
+ void *private)
+{
+ return __netdev_upper_dev_link(dev, upper_dev, true, private);
+}
+EXPORT_SYMBOL(netdev_master_upper_dev_link_private);
+
/**
* netdev_upper_dev_unlink - Removes a link to upper device
* @dev: device
@@ -4791,38 +4939,68 @@ void netdev_upper_dev_unlink(struct net_device *dev,
struct netdev_adjacent *i, *j;
ASSERT_RTNL();
- __netdev_adjacent_dev_unlink(dev, upper_dev);
+ __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
/* Here is the tricky part. We must remove all dev's lower
* devices from all upper_dev's upper devices and vice
* versa, to maintain the graph relationship.
*/
- list_for_each_entry(i, &dev->lower_dev_list, list)
- list_for_each_entry(j, &upper_dev->upper_dev_list, list)
+ list_for_each_entry(i, &dev->all_adj_list.lower, list)
+ list_for_each_entry(j, &upper_dev->all_adj_list.upper, list)
__netdev_adjacent_dev_unlink(i->dev, j->dev);
/* remove also the devices itself from lower/upper device
* list
*/
- list_for_each_entry(i, &dev->lower_dev_list, list)
+ list_for_each_entry(i, &dev->all_adj_list.lower, list)
__netdev_adjacent_dev_unlink(i->dev, upper_dev);
- list_for_each_entry(i, &upper_dev->upper_dev_list, list)
+ list_for_each_entry(i, &upper_dev->all_adj_list.upper, list)
__netdev_adjacent_dev_unlink(dev, i->dev);
call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
}
EXPORT_SYMBOL(netdev_upper_dev_unlink);
+void *netdev_lower_dev_get_private_rcu(struct net_device *dev,
+ struct net_device *lower_dev)
+{
+ struct netdev_adjacent *lower;
+
+ if (!lower_dev)
+ return NULL;
+ lower = __netdev_find_adj_rcu(dev, lower_dev, &dev->adj_list.lower);
+ if (!lower)
+ return NULL;
+
+ return lower->private;
+}
+EXPORT_SYMBOL(netdev_lower_dev_get_private_rcu);
+
+void *netdev_lower_dev_get_private(struct net_device *dev,
+ struct net_device *lower_dev)
+{
+ struct netdev_adjacent *lower;
+
+ if (!lower_dev)
+ return NULL;
+ lower = __netdev_find_adj(dev, lower_dev, &dev->adj_list.lower);
+ if (!lower)
+ return NULL;
+
+ return lower->private;
+}
+EXPORT_SYMBOL(netdev_lower_dev_get_private);
+
static void dev_change_rx_flags(struct net_device *dev, int flags)
{
const struct net_device_ops *ops = dev->netdev_ops;
- if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
+ if (ops->ndo_change_rx_flags)
ops->ndo_change_rx_flags(dev, flags);
}
-static int __dev_set_promiscuity(struct net_device *dev, int inc)
+static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
{
unsigned int old_flags = dev->flags;
kuid_t uid;
@@ -4865,6 +5043,8 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc)
dev_change_rx_flags(dev, IFF_PROMISC);
}
+ if (notify)
+ __dev_notify_flags(dev, old_flags, IFF_PROMISC);
return 0;
}
@@ -4884,7 +5064,7 @@ int dev_set_promiscuity(struct net_device *dev, int inc)
unsigned int old_flags = dev->flags;
int err;
- err = __dev_set_promiscuity(dev, inc);
+ err = __dev_set_promiscuity(dev, inc, true);
if (err < 0)
return err;
if (dev->flags != old_flags)
@@ -4893,22 +5073,9 @@ int dev_set_promiscuity(struct net_device *dev, int inc)
}
EXPORT_SYMBOL(dev_set_promiscuity);
-/**
- * dev_set_allmulti - update allmulti count on a device
- * @dev: device
- * @inc: modifier
- *
- * Add or remove reception of all multicast frames to a device. While the
- * count in the device remains above zero the interface remains listening
- * to all interfaces. Once it hits zero the device reverts back to normal
- * filtering operation. A negative @inc value is used to drop the counter
- * when releasing a resource needing all multicasts.
- * Return 0 if successful or a negative errno code on error.
- */
-
-int dev_set_allmulti(struct net_device *dev, int inc)
+static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify)
{
- unsigned int old_flags = dev->flags;
+ unsigned int old_flags = dev->flags, old_gflags = dev->gflags;
ASSERT_RTNL();
@@ -4931,9 +5098,30 @@ int dev_set_allmulti(struct net_device *dev, int inc)
if (dev->flags ^ old_flags) {
dev_change_rx_flags(dev, IFF_ALLMULTI);
dev_set_rx_mode(dev);
+ if (notify)
+ __dev_notify_flags(dev, old_flags,
+ dev->gflags ^ old_gflags);
}
return 0;
}
+
+/**
+ * dev_set_allmulti - update allmulti count on a device
+ * @dev: device
+ * @inc: modifier
+ *
+ * Add or remove reception of all multicast frames to a device. While the
+ * count in the device remains above zero the interface remains listening
+ * to all interfaces. Once it hits zero the device reverts back to normal
+ * filtering operation. A negative @inc value is used to drop the counter
+ * when releasing a resource needing all multicasts.
+ * Return 0 if successful or a negative errno code on error.
+ */
+
+int dev_set_allmulti(struct net_device *dev, int inc)
+{
+ return __dev_set_allmulti(dev, inc, true);
+}
EXPORT_SYMBOL(dev_set_allmulti);
/*
@@ -4958,10 +5146,10 @@ void __dev_set_rx_mode(struct net_device *dev)
* therefore calling __dev_set_promiscuity here is safe.
*/
if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
- __dev_set_promiscuity(dev, 1);
+ __dev_set_promiscuity(dev, 1, false);
dev->uc_promisc = true;
} else if (netdev_uc_empty(dev) && dev->uc_promisc) {
- __dev_set_promiscuity(dev, -1);
+ __dev_set_promiscuity(dev, -1, false);
dev->uc_promisc = false;
}
}
@@ -5050,9 +5238,13 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags)
if ((flags ^ dev->gflags) & IFF_PROMISC) {
int inc = (flags & IFF_PROMISC) ? 1 : -1;
+ unsigned int old_flags = dev->flags;
dev->gflags ^= IFF_PROMISC;
- dev_set_promiscuity(dev, inc);
+
+ if (__dev_set_promiscuity(dev, inc, false) >= 0)
+ if (dev->flags != old_flags)
+ dev_set_rx_mode(dev);
}
/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
@@ -5063,16 +5255,20 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags)
int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
dev->gflags ^= IFF_ALLMULTI;
- dev_set_allmulti(dev, inc);
+ __dev_set_allmulti(dev, inc, false);
}
return ret;
}
-void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
+void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
+ unsigned int gchanges)
{
unsigned int changes = dev->flags ^ old_flags;
+ if (gchanges)
+ rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC);
+
if (changes & IFF_UP) {
if (dev->flags & IFF_UP)
call_netdevice_notifiers(NETDEV_UP, dev);
@@ -5101,17 +5297,14 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
int dev_change_flags(struct net_device *dev, unsigned int flags)
{
int ret;
- unsigned int changes, old_flags = dev->flags;
+ unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags;
ret = __dev_change_flags(dev, flags);
if (ret < 0)
return ret;
- changes = old_flags ^ dev->flags;
- if (changes)
- rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
-
- __dev_notify_flags(dev, old_flags);
+ changes = (old_flags ^ dev->flags) | (old_gflags ^ dev->gflags);
+ __dev_notify_flags(dev, old_flags, changes);
return ret;
}
EXPORT_SYMBOL(dev_change_flags);
@@ -5247,15 +5440,18 @@ static int dev_new_index(struct net *net)
/* Delayed registration/unregisteration */
static LIST_HEAD(net_todo_list);
+static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
static void net_set_todo(struct net_device *dev)
{
list_add_tail(&dev->todo_list, &net_todo_list);
+ dev_net(dev)->dev_unreg_count++;
}
static void rollback_registered_many(struct list_head *head)
{
struct net_device *dev, *tmp;
+ LIST_HEAD(close_head);
BUG_ON(dev_boot_phase);
ASSERT_RTNL();
@@ -5278,7 +5474,9 @@ static void rollback_registered_many(struct list_head *head)
}
/* If device is running, close it first. */
- dev_close_many(head);
+ list_for_each_entry(dev, head, unreg_list)
+ list_add_tail(&dev->close_list, &close_head);
+ dev_close_many(&close_head);
list_for_each_entry(dev, head, unreg_list) {
/* And unlink it from device chain. */
@@ -5301,7 +5499,7 @@ static void rollback_registered_many(struct list_head *head)
if (!dev->rtnl_link_ops ||
dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
- rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
+ rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
/*
* Flush the unicast and multicast chains
@@ -5700,7 +5898,7 @@ int register_netdevice(struct net_device *dev)
*/
if (!dev->rtnl_link_ops ||
dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
- rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
+ rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
out:
return ret;
@@ -5918,6 +6116,12 @@ void netdev_run_todo(void)
if (dev->destructor)
dev->destructor(dev);
+ /* Report a network device has been unregistered */
+ rtnl_lock();
+ dev_net(dev)->dev_unreg_count--;
+ __rtnl_unlock();
+ wake_up(&netdev_unregistering_wq);
+
/* Free network device */
kobject_put(&dev->dev.kobj);
}
@@ -6001,6 +6205,16 @@ void netdev_set_default_ethtool_ops(struct net_device *dev,
}
EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
+void netdev_freemem(struct net_device *dev)
+{
+ char *addr = (char *)dev - dev->padded;
+
+ if (is_vmalloc_addr(addr))
+ vfree(addr);
+ else
+ kfree(addr);
+}
+
/**
* alloc_netdev_mqs - allocate network device
* @sizeof_priv: size of private data to allocate space for
@@ -6044,7 +6258,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
/* ensure 32-byte alignment of whole construct */
alloc_size += NETDEV_ALIGN - 1;
- p = kzalloc(alloc_size, GFP_KERNEL);
+ p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+ if (!p)
+ p = vzalloc(alloc_size);
if (!p)
return NULL;
@@ -6053,7 +6269,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->pcpu_refcnt = alloc_percpu(int);
if (!dev->pcpu_refcnt)
- goto free_p;
+ goto free_dev;
if (dev_addr_init(dev))
goto free_pcpu;
@@ -6068,9 +6284,12 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
INIT_LIST_HEAD(&dev->napi_list);
INIT_LIST_HEAD(&dev->unreg_list);
+ INIT_LIST_HEAD(&dev->close_list);
INIT_LIST_HEAD(&dev->link_watch_list);
- INIT_LIST_HEAD(&dev->upper_dev_list);
- INIT_LIST_HEAD(&dev->lower_dev_list);
+ INIT_LIST_HEAD(&dev->adj_list.upper);
+ INIT_LIST_HEAD(&dev->adj_list.lower);
+ INIT_LIST_HEAD(&dev->all_adj_list.upper);
+ INIT_LIST_HEAD(&dev->all_adj_list.lower);
dev->priv_flags = IFF_XMIT_DST_RELEASE;
setup(dev);
@@ -6103,8 +6322,8 @@ free_pcpu:
kfree(dev->_rx);
#endif
-free_p:
- kfree(p);
+free_dev:
+ netdev_freemem(dev);
return NULL;
}
EXPORT_SYMBOL(alloc_netdev_mqs);
@@ -6141,7 +6360,7 @@ void free_netdev(struct net_device *dev)
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED) {
- kfree((char *)dev - dev->padded);
+ netdev_freemem(dev);
return;
}
@@ -6303,7 +6522,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
rcu_barrier();
call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
- rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
+ rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
/*
* Flush the unicast and multicast chains
@@ -6342,7 +6561,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
* Prevent userspace races by waiting until the network
* device is fully setup before sending notifications.
*/
- rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
+ rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
synchronize_net();
err = 0;
@@ -6603,6 +6822,34 @@ static void __net_exit default_device_exit(struct net *net)
rtnl_unlock();
}
+static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
+{
+ /* Return with the rtnl_lock held when there are no network
+ * devices unregistering in any network namespace in net_list.
+ */
+ struct net *net;
+ bool unregistering;
+ DEFINE_WAIT(wait);
+
+ for (;;) {
+ prepare_to_wait(&netdev_unregistering_wq, &wait,
+ TASK_UNINTERRUPTIBLE);
+ unregistering = false;
+ rtnl_lock();
+ list_for_each_entry(net, net_list, exit_list) {
+ if (net->dev_unreg_count > 0) {
+ unregistering = true;
+ break;
+ }
+ }
+ if (!unregistering)
+ break;
+ __rtnl_unlock();
+ schedule();
+ }
+ finish_wait(&netdev_unregistering_wq, &wait);
+}
+
static void __net_exit default_device_exit_batch(struct list_head *net_list)
{
/* At exit all network devices most be removed from a network
@@ -6614,7 +6861,18 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
struct net *net;
LIST_HEAD(dev_kill_list);
- rtnl_lock();
+ /* To prevent network device cleanup code from dereferencing
+ * loopback devices or network devices that have been freed
+ * wait here for all pending unregistrations to complete,
+ * before unregistring the loopback device and allowing the
+ * network namespace be freed.
+ *
+ * The netdev todo list containing all network devices
+ * unregistrations that happen in default_device_exit_batch
+ * will run in the rtnl_unlock() at the end of
+ * default_device_exit_batch.
+ */
+ rtnl_lock_unregistering(net_list);
list_for_each_entry(net, net_list, exit_list) {
for_each_netdev_reverse(net, dev) {
if (dev->rtnl_link_ops)