summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2014-02-18 16:48:34 -0500
committerDavid S. Miller <davem@davemloft.net>2014-02-18 16:48:34 -0500
commit82f148e9926399b4a7f1fad2dff0cd811a0a52fc (patch)
treee5c6f38906a666ffc5b16e25f595069cceca1f3f
parent814ce14806e9c8ea4b54ed766d3363fd86e931c0 (diff)
parent49f17de72174ceb340a96996e14058e8f6ff951d (diff)
downloadlinux-82f148e9926399b4a7f1fad2dff0cd811a0a52fc.tar.gz
linux-82f148e9926399b4a7f1fad2dff0cd811a0a52fc.tar.bz2
linux-82f148e9926399b4a7f1fad2dff0cd811a0a52fc.zip
Merge branch 'bonding'
Veaceslav Falico says: ==================== bonding: add an option to rely on unvalidated arp packets v4 -> v5: Again per Nik's advise correct the bond_opts restrictions for arp_validate - set it the same as arp_interval. v3 -> v4: Per Nikolay's advise, remove the new bond_opts restriction on modes setting for arp_validate. v2 -> v3: Per Jay's advise, use the 'filter' keyword instead of 'arp' one, and use his text for documentation. Also, rebase on the latest net-next. Sorry for the delay, didn't manage to send it before net-next was closed. v1 -> v2: Don't remove the 'all traffic' functionality - rather, add new arp_validate options to specify that we want *only* unvalidated arps. Currently, if arp_validate is off (0), slave_last_rx() returns the slave->dev->last_rx, which is always updated on *any* packet received by slave, and not only arps. This means that, if the validation of arps is off, we're treating *any* incoming packet as a proof of slave being up, and not only arps. This might seem logical at the first glance, however it can cause a lot of troubles and false-positives, one example would be: The arp_ip_target is NOT accessible, however someone in the broadcast domain spams with any broadcast traffic. This way bonding will be tricked that the slave is still up (as in - can access arp_ip_target), while it's not. The net_device->last_rx is already used in a lot of drivers (even though the comment states to NOT do it :)), and it's also ugly to modify it from bonding. However, some loadbalance setups might rely on the fact that even non-arp traffic is a sign of slave being up - and we definitely can't break anyones config - so an extension to arp_validate is needed. So, to fix this, add an option for the user to specify if he wants to filter out non-arp traffic on unvalidated slaves, remove the last_rx from bonding, *always* call bond_arp_rcv() in slave's rx_handler (which is bond_handle_frame), and if we spot an arp there with this option on - update the slave->last_arp_rx - and use it instead of net_device->last_rx. Finally, rename last_arp_rx to last_rx to reflect the changes. Also rename slave->jiffies to ->last_link_up, to reflect better its meaning, add the new option's documentation and update the arp_validate one to be a bit more descriptive. ==================== Signed-off-by: Veaceslav Falico <vfalico@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/networking/bonding.txt96
-rw-r--r--drivers/net/bonding/bond_main.c56
-rw-r--r--drivers/net/bonding/bond_options.c19
-rw-r--r--drivers/net/bonding/bonding.h26
-rw-r--r--include/linux/netdevice.h8
5 files changed, 119 insertions, 86 deletions
diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index 5cdb22971d19..a383c00392d0 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -270,16 +270,15 @@ arp_ip_target
arp_validate
Specifies whether or not ARP probes and replies should be
- validated in the active-backup mode. This causes the ARP
- monitor to examine the incoming ARP requests and replies, and
- only consider a slave to be up if it is receiving the
- appropriate ARP traffic.
+ validated in any mode that supports arp monitoring, or whether
+ non-ARP traffic should be filtered (disregarded) for link
+ monitoring purposes.
Possible values are:
none or 0
- No validation is performed. This is the default.
+ No validation or filtering is performed.
active or 1
@@ -293,31 +292,68 @@ arp_validate
Validation is performed for all slaves.
- For the active slave, the validation checks ARP replies to
- confirm that they were generated by an arp_ip_target. Since
- backup slaves do not typically receive these replies, the
- validation performed for backup slaves is on the ARP request
- sent out via the active slave. It is possible that some
- switch or network configurations may result in situations
- wherein the backup slaves do not receive the ARP requests; in
- such a situation, validation of backup slaves must be
- disabled.
-
- The validation of ARP requests on backup slaves is mainly
- helping bonding to decide which slaves are more likely to
- work in case of the active slave failure, it doesn't really
- guarantee that the backup slave will work if it's selected
- as the next active slave.
-
- This option is useful in network configurations in which
- multiple bonding hosts are concurrently issuing ARPs to one or
- more targets beyond a common switch. Should the link between
- the switch and target fail (but not the switch itself), the
- probe traffic generated by the multiple bonding instances will
- fool the standard ARP monitor into considering the links as
- still up. Use of the arp_validate option can resolve this, as
- the ARP monitor will only consider ARP requests and replies
- associated with its own instance of bonding.
+ filter or 4
+
+ Filtering is applied to all slaves. No validation is
+ performed.
+
+ filter_active or 5
+
+ Filtering is applied to all slaves, validation is performed
+ only for the active slave.
+
+ filter_backup or 6
+
+ Filtering is applied to all slaves, validation is performed
+ only for backup slaves.
+
+ Validation:
+
+ Enabling validation causes the ARP monitor to examine the incoming
+ ARP requests and replies, and only consider a slave to be up if it
+ is receiving the appropriate ARP traffic.
+
+ For an active slave, the validation checks ARP replies to confirm
+ that they were generated by an arp_ip_target. Since backup slaves
+ do not typically receive these replies, the validation performed
+ for backup slaves is on the broadcast ARP request sent out via the
+ active slave. It is possible that some switch or network
+ configurations may result in situations wherein the backup slaves
+ do not receive the ARP requests; in such a situation, validation
+ of backup slaves must be disabled.
+
+ The validation of ARP requests on backup slaves is mainly helping
+ bonding to decide which slaves are more likely to work in case of
+ the active slave failure, it doesn't really guarantee that the
+ backup slave will work if it's selected as the next active slave.
+
+ Validation is useful in network configurations in which multiple
+ bonding hosts are concurrently issuing ARPs to one or more targets
+ beyond a common switch. Should the link between the switch and
+ target fail (but not the switch itself), the probe traffic
+ generated by the multiple bonding instances will fool the standard
+ ARP monitor into considering the links as still up. Use of
+ validation can resolve this, as the ARP monitor will only consider
+ ARP requests and replies associated with its own instance of
+ bonding.
+
+ Filtering:
+
+ Enabling filtering causes the ARP monitor to only use incoming ARP
+ packets for link availability purposes. Arriving packets that are
+ not ARPs are delivered normally, but do not count when determining
+ if a slave is available.
+
+ Filtering operates by only considering the reception of ARP
+ packets (any ARP packet, regardless of source or destination) when
+ determining if a slave has received traffic for link availability
+ purposes.
+
+ Filtering is useful in network configurations in which significant
+ levels of third party broadcast traffic would fool the standard
+ ARP monitor into considering the links as still up. Use of
+ filtering can resolve this, as only ARP traffic is considered for
+ link availability purposes.
This option was added in bonding version 3.1.0.
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 3bce855e627b..ac4a1b88115e 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -798,7 +798,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
return;
if (new_active) {
- new_active->jiffies = jiffies;
+ new_active->last_link_up = jiffies;
if (new_active->link == BOND_LINK_BACK) {
if (USES_PRIMARY(bond->params.mode)) {
@@ -1115,9 +1115,6 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb)
slave = bond_slave_get_rcu(skb->dev);
bond = slave->bond;
- if (bond->params.arp_interval)
- slave->dev->last_rx = jiffies;
-
recv_probe = ACCESS_ONCE(bond->recv_probe);
if (recv_probe) {
ret = recv_probe(skb, bond, slave);
@@ -1400,10 +1397,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
bond_update_speed_duplex(new_slave);
- new_slave->last_arp_rx = jiffies -
+ new_slave->last_rx = jiffies -
(msecs_to_jiffies(bond->params.arp_interval) + 1);
for (i = 0; i < BOND_MAX_ARP_TARGETS; i++)
- new_slave->target_last_arp_rx[i] = new_slave->last_arp_rx;
+ new_slave->target_last_arp_rx[i] = new_slave->last_rx;
if (bond->params.miimon && !bond->params.use_carrier) {
link_reporting = bond_check_dev_link(bond, slave_dev, 1);
@@ -1447,7 +1444,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
}
if (new_slave->link != BOND_LINK_DOWN)
- new_slave->jiffies = jiffies;
+ new_slave->last_link_up = jiffies;
pr_debug("Initial state of slave_dev is BOND_LINK_%s\n",
new_slave->link == BOND_LINK_DOWN ? "DOWN" :
(new_slave->link == BOND_LINK_UP ? "UP" : "BACK"));
@@ -1894,7 +1891,7 @@ static int bond_miimon_inspect(struct bonding *bond)
* recovered before downdelay expired
*/
slave->link = BOND_LINK_UP;
- slave->jiffies = jiffies;
+ slave->last_link_up = jiffies;
pr_info("%s: link status up again after %d ms for interface %s\n",
bond->dev->name,
(bond->params.downdelay - slave->delay) *
@@ -1969,7 +1966,7 @@ static void bond_miimon_commit(struct bonding *bond)
case BOND_LINK_UP:
slave->link = BOND_LINK_UP;
- slave->jiffies = jiffies;
+ slave->last_link_up = jiffies;
if (bond->params.mode == BOND_MODE_8023AD) {
/* prevent it from being the active one */
@@ -2245,7 +2242,7 @@ static void bond_validate_arp(struct bonding *bond, struct slave *slave, __be32
pr_debug("bva: sip %pI4 not found in targets\n", &sip);
return;
}
- slave->last_arp_rx = jiffies;
+ slave->last_rx = jiffies;
slave->target_last_arp_rx[i] = jiffies;
}
@@ -2255,15 +2252,16 @@ int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond,
struct arphdr *arp = (struct arphdr *)skb->data;
unsigned char *arp_ptr;
__be32 sip, tip;
- int alen;
+ int alen, is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP);
- if (skb->protocol != __cpu_to_be16(ETH_P_ARP))
+ if (!slave_do_arp_validate(bond, slave)) {
+ if ((slave_do_arp_validate_only(bond, slave) && is_arp) ||
+ !slave_do_arp_validate_only(bond, slave))
+ slave->last_rx = jiffies;
return RX_HANDLER_ANOTHER;
-
- read_lock(&bond->lock);
-
- if (!slave_do_arp_validate(bond, slave))
- goto out_unlock;
+ } else if (!is_arp) {
+ return RX_HANDLER_ANOTHER;
+ }
alen = arp_hdr_len(bond->dev);
@@ -2314,11 +2312,10 @@ int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond,
bond_validate_arp(bond, slave, sip, tip);
else if (bond->curr_active_slave &&
time_after(slave_last_rx(bond, bond->curr_active_slave),
- bond->curr_active_slave->jiffies))
+ bond->curr_active_slave->last_link_up))
bond_validate_arp(bond, slave, tip, sip);
out_unlock:
- read_unlock(&bond->lock);
if (arp != (struct arphdr *)skb->data)
kfree(arp);
return RX_HANDLER_ANOTHER;
@@ -2361,9 +2358,9 @@ static void bond_loadbalance_arp_mon(struct work_struct *work)
oldcurrent = ACCESS_ONCE(bond->curr_active_slave);
/* see if any of the previous devices are up now (i.e. they have
* xmt and rcv traffic). the curr_active_slave does not come into
- * the picture unless it is null. also, slave->jiffies is not needed
- * here because we send an arp on each slave and give a slave as
- * long as it needs to get the tx/rx within the delta.
+ * the picture unless it is null. also, slave->last_link_up is not
+ * needed here because we send an arp on each slave and give a slave
+ * as long as it needs to get the tx/rx within the delta.
* TODO: what about up/down delay in arp mode? it wasn't here before
* so it can wait
*/
@@ -2372,7 +2369,7 @@ static void bond_loadbalance_arp_mon(struct work_struct *work)
if (slave->link != BOND_LINK_UP) {
if (bond_time_in_interval(bond, trans_start, 1) &&
- bond_time_in_interval(bond, slave->dev->last_rx, 1)) {
+ bond_time_in_interval(bond, slave->last_rx, 1)) {
slave->link = BOND_LINK_UP;
slave_state_changed = 1;
@@ -2401,7 +2398,7 @@ static void bond_loadbalance_arp_mon(struct work_struct *work)
* if we don't know our ip yet
*/
if (!bond_time_in_interval(bond, trans_start, 2) ||
- !bond_time_in_interval(bond, slave->dev->last_rx, 2)) {
+ !bond_time_in_interval(bond, slave->last_rx, 2)) {
slave->link = BOND_LINK_DOWN;
slave_state_changed = 1;
@@ -2489,7 +2486,7 @@ static int bond_ab_arp_inspect(struct bonding *bond)
* active. This avoids bouncing, as the last receive
* times need a full ARP monitor cycle to be updated.
*/
- if (bond_time_in_interval(bond, slave->jiffies, 2))
+ if (bond_time_in_interval(bond, slave->last_link_up, 2))
continue;
/*
@@ -2690,7 +2687,7 @@ static bool bond_ab_arp_probe(struct bonding *bond)
new_slave->link = BOND_LINK_BACK;
bond_set_slave_active_flags(new_slave);
bond_arp_send_all(bond, new_slave);
- new_slave->jiffies = jiffies;
+ new_slave->last_link_up = jiffies;
rcu_assign_pointer(bond->current_arp_slave, new_slave);
rtnl_unlock();
@@ -3060,8 +3057,7 @@ static int bond_open(struct net_device *bond_dev)
if (bond->params.arp_interval) { /* arp interval, in milliseconds. */
queue_delayed_work(bond->wq, &bond->arp_work, 0);
- if (bond->params.arp_validate)
- bond->recv_probe = bond_arp_rcv;
+ bond->recv_probe = bond_arp_rcv;
}
if (bond->params.mode == BOND_MODE_8023AD) {
@@ -4186,10 +4182,6 @@ static int bond_check_params(struct bond_params *params)
}
if (arp_validate) {
- if (bond_mode != BOND_MODE_ACTIVEBACKUP) {
- pr_err("arp_validate only supported in active-backup mode\n");
- return -EINVAL;
- }
if (!arp_interval) {
pr_err("arp_validate requires arp_interval\n");
return -EINVAL;
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index f3eb44d2e231..5f997b9af54d 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -47,11 +47,14 @@ static struct bond_opt_value bond_xmit_hashtype_tbl[] = {
};
static struct bond_opt_value bond_arp_validate_tbl[] = {
- { "none", BOND_ARP_VALIDATE_NONE, BOND_VALFLAG_DEFAULT},
- { "active", BOND_ARP_VALIDATE_ACTIVE, 0},
- { "backup", BOND_ARP_VALIDATE_BACKUP, 0},
- { "all", BOND_ARP_VALIDATE_ALL, 0},
- { NULL, -1, 0},
+ { "none", BOND_ARP_VALIDATE_NONE, BOND_VALFLAG_DEFAULT},
+ { "active", BOND_ARP_VALIDATE_ACTIVE, 0},
+ { "backup", BOND_ARP_VALIDATE_BACKUP, 0},
+ { "all", BOND_ARP_VALIDATE_ALL, 0},
+ { "filter", BOND_ARP_FILTER, 0},
+ { "filter_active", BOND_ARP_FILTER_ACTIVE, 0},
+ { "filter_backup", BOND_ARP_FILTER_BACKUP, 0},
+ { NULL, -1, 0},
};
static struct bond_opt_value bond_arp_all_targets_tbl[] = {
@@ -151,7 +154,8 @@ static struct bond_option bond_opts[] = {
.id = BOND_OPT_ARP_VALIDATE,
.name = "arp_validate",
.desc = "validate src/dst of ARP probes",
- .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_ACTIVEBACKUP)),
+ .unsuppmodes = BIT(BOND_MODE_8023AD) | BIT(BOND_MODE_TLB) |
+ BIT(BOND_MODE_ALB),
.values = bond_arp_validate_tbl,
.set = bond_option_arp_validate_set
},
@@ -809,8 +813,7 @@ int bond_option_arp_interval_set(struct bonding *bond,
cancel_delayed_work_sync(&bond->arp_work);
} else {
/* arp_validate can be set only in active-backup mode */
- if (bond->params.arp_validate)
- bond->recv_probe = bond_arp_rcv;
+ bond->recv_probe = bond_arp_rcv;
cancel_delayed_work_sync(&bond->mii_work);
queue_delayed_work(bond->wq, &bond->arp_work, 0);
}
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 86ccfb9f71cc..430362891d0d 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -188,8 +188,9 @@ struct slave {
struct net_device *dev; /* first - useful for panic debug */
struct bonding *bond; /* our master */
int delay;
- unsigned long jiffies;
- unsigned long last_arp_rx;
+ /* all three in jiffies */
+ unsigned long last_link_up;
+ unsigned long last_rx;
unsigned long target_last_arp_rx[BOND_MAX_ARP_TARGETS];
s8 link; /* one of BOND_LINK_XXXX */
s8 new_link;
@@ -342,6 +343,11 @@ static inline bool bond_is_active_slave(struct slave *slave)
#define BOND_ARP_VALIDATE_BACKUP (1 << BOND_STATE_BACKUP)
#define BOND_ARP_VALIDATE_ALL (BOND_ARP_VALIDATE_ACTIVE | \
BOND_ARP_VALIDATE_BACKUP)
+#define BOND_ARP_FILTER (BOND_ARP_VALIDATE_ALL + 1)
+#define BOND_ARP_FILTER_ACTIVE (BOND_ARP_VALIDATE_ACTIVE | \
+ BOND_ARP_FILTER)
+#define BOND_ARP_FILTER_BACKUP (BOND_ARP_VALIDATE_BACKUP | \
+ BOND_ARP_FILTER)
static inline int slave_do_arp_validate(struct bonding *bond,
struct slave *slave)
@@ -349,6 +355,12 @@ static inline int slave_do_arp_validate(struct bonding *bond,
return bond->params.arp_validate & (1 << bond_slave_state(slave));
}
+static inline int slave_do_arp_validate_only(struct bonding *bond,
+ struct slave *slave)
+{
+ return bond->params.arp_validate & BOND_ARP_FILTER;
+}
+
/* Get the oldest arp which we've received on this slave for bond's
* arp_targets.
*/
@@ -368,14 +380,10 @@ static inline unsigned long slave_oldest_target_arp_rx(struct bonding *bond,
static inline unsigned long slave_last_rx(struct bonding *bond,
struct slave *slave)
{
- if (slave_do_arp_validate(bond, slave)) {
- if (bond->params.arp_all_targets == BOND_ARP_TARGETS_ALL)
- return slave_oldest_target_arp_rx(bond, slave);
- else
- return slave->last_arp_rx;
- }
+ if (bond->params.arp_all_targets == BOND_ARP_TARGETS_ALL)
+ return slave_oldest_target_arp_rx(bond, slave);
- return slave->dev->last_rx;
+ return slave->last_rx;
}
#ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 430c51aed6a4..891432a994c0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1312,13 +1312,7 @@ struct net_device {
/*
* Cache lines mostly used on receive path (including eth_type_trans())
*/
- unsigned long last_rx; /* Time of last Rx
- * This should not be set in
- * drivers, unless really needed,
- * because network stack (bonding)
- * use it if/when necessary, to
- * avoid dirtying this cache line.
- */
+ unsigned long last_rx; /* Time of last Rx */
/* Interface address info used in eth_type_trans() */
unsigned char *dev_addr; /* hw address, (before bcast