summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/net/rockchip-dwmac.txt8
-rw-r--r--MAINTAINERS1
-rw-r--r--drivers/net/dsa/mv88e6xxx/Kconfig11
-rw-r--r--drivers/net/dsa/mv88e6xxx/Makefile4
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c467
-rw-r--r--drivers/net/dsa/mv88e6xxx/global2.c471
-rw-r--r--drivers/net/dsa/mv88e6xxx/global2.h88
-rw-r--r--drivers/net/dsa/mv88e6xxx/mv88e6xxx.h6
-rw-r--r--drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c66
-rw-r--r--drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h2
-rw-r--r--drivers/net/ethernet/cavium/liquidio/lio_core.c11
-rw-r--r--drivers/net/ethernet/cavium/liquidio/lio_ethtool.c495
-rw-r--r--drivers/net/ethernet/cavium/liquidio/lio_main.c360
-rw-r--r--drivers/net/ethernet/cavium/liquidio/liquidio_common.h12
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_device.c50
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_device.h14
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_droq.c13
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_main.h8
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_network.h10
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_nic.c27
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_nic.h4
-rw-r--r--drivers/net/ethernet/cavium/liquidio/request_manager.c167
-rw-r--r--drivers/net/ethernet/cavium/liquidio/response_manager.c9
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4.h8
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c64
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_hw.c2
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c26
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c15
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c254
-rw-r--r--drivers/net/ethernet/ti/cpmac.c5
-rw-r--r--drivers/net/usb/hso.c118
-rw-r--r--drivers/net/usb/smsc95xx.c109
-rw-r--r--drivers/net/usb/smsc95xx.h8
-rw-r--r--drivers/net/vxlan.c3
-rw-r--r--include/linux/rhashtable.h70
-rw-r--r--include/net/netfilter/nf_conntrack.h56
-rw-r--r--include/net/netfilter/nf_conntrack_core.h3
-rw-r--r--include/net/netfilter/nf_conntrack_ecache.h17
-rw-r--r--include/net/netfilter/nf_conntrack_l4proto.h8
-rw-r--r--include/net/netfilter/nf_log.h3
-rw-r--r--include/net/netfilter/nf_tables.h3
-rw-r--r--include/net/netns/conntrack.h8
-rw-r--r--include/uapi/linux/netfilter/nf_tables.h65
-rw-r--r--lib/rhashtable.c10
-rw-r--r--net/bridge/netfilter/nf_log_bridge.c3
-rw-r--r--net/core/net_namespace.c37
-rw-r--r--net/ipv4/netfilter/Kconfig11
-rw-r--r--net/ipv4/netfilter/Makefile5
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c70
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c492
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c39
-rw-r--r--net/ipv4/netfilter/nf_dup_ipv4.c10
-rw-r--r--net/ipv4/netfilter/nf_log_arp.c5
-rw-r--r--net/ipv4/netfilter/nf_log_ipv4.c3
-rw-r--r--net/ipv6/netfilter/nf_log_ipv6.c3
-rw-r--r--net/netfilter/Kconfig22
-rw-r--r--net/netfilter/Makefile7
-rw-r--r--net/netfilter/ipvs/ip_vs_nfct.c7
-rw-r--r--net/netfilter/nf_conntrack_core.c229
-rw-r--r--net/netfilter/nf_conntrack_ecache.c22
-rw-r--r--net/netfilter/nf_conntrack_ftp.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c44
-rw-r--r--net/netfilter/nf_conntrack_pptp.c3
-rw-r--r--net/netfilter/nf_conntrack_proto.c81
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c3
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c39
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c89
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c131
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c53
-rw-r--r--net/netfilter/nf_conntrack_proto_udplite.c3
-rw-r--r--net/netfilter/nf_conntrack_standalone.c3
-rw-r--r--net/netfilter/nf_log.c8
-rw-r--r--net/netfilter/nf_nat_core.c6
-rw-r--r--net/netfilter/nf_tables_api.c203
-rw-r--r--net/netfilter/nft_hash.c417
-rw-r--r--net/netfilter/nft_numgen.c192
-rw-r--r--net/netfilter/nft_quota.c121
-rw-r--r--net/netfilter/nft_set_hash.c404
-rw-r--r--net/netfilter/nft_set_rbtree.c (renamed from net/netfilter/nft_rbtree.c)12
-rw-r--r--net/netfilter/xt_conntrack.c4
-rw-r--r--net/netfilter/xt_physdev.c4
-rw-r--r--net/openvswitch/conntrack.c2
-rw-r--r--net/tipc/bcast.c8
-rw-r--r--net/tipc/bcast.h4
-rw-r--r--net/tipc/link.c131
-rw-r--r--net/tipc/link.h6
-rw-r--r--net/tipc/msg.h10
-rw-r--r--net/tipc/node.c32
-rw-r--r--net/tipc/node.h11
91 files changed, 3820 insertions, 2345 deletions
diff --git a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt
index cccd945fc45b..95383c5131fc 100644
--- a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt
+++ b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt
@@ -3,8 +3,12 @@ Rockchip SoC RK3288 10/100/1000 Ethernet driver(GMAC)
The device node has following properties.
Required properties:
- - compatible: Can be one of "rockchip,rk3228-gmac", "rockchip,rk3288-gmac",
- "rockchip,rk3368-gmac"
+ - compatible: should be "rockchip,<name>-gamc"
+ "rockchip,rk3228-gmac": found on RK322x SoCs
+ "rockchip,rk3288-gmac": found on RK3288 SoCs
+ "rockchip,rk3366-gmac": found on RK3366 SoCs
+ "rockchip,rk3368-gmac": found on RK3368 SoCs
+ "rockchip,rk3399-gmac": found on RK3399 SoCs
- reg: addresses and length of the register sets for the device.
- interrupts: Should contain the GMAC interrupts.
- interrupt-names: Should contain the interrupt names "macirq".
diff --git a/MAINTAINERS b/MAINTAINERS
index 734e03556e2c..9a1783547baf 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12285,6 +12285,7 @@ F: drivers/net/usb/smsc75xx.*
USB SMSC95XX ETHERNET DRIVER
M: Steve Glendinning <steve.glendinning@shawell.net>
+M: Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/usb/smsc95xx.*
diff --git a/drivers/net/dsa/mv88e6xxx/Kconfig b/drivers/net/dsa/mv88e6xxx/Kconfig
index ac77737bbd87..486668813e15 100644
--- a/drivers/net/dsa/mv88e6xxx/Kconfig
+++ b/drivers/net/dsa/mv88e6xxx/Kconfig
@@ -6,3 +6,14 @@ config NET_DSA_MV88E6XXX
help
This driver adds support for most of the Marvell 88E6xxx models of
Ethernet switch chips, except 88E6060.
+
+config NET_DSA_MV88E6XXX_GLOBAL2
+ bool "Switch Global 2 Registers support"
+ default y
+ depends on NET_DSA_MV88E6XXX
+ help
+ This registers set at internal SMI address 0x1C provides extended
+ features like EEPROM interface, trunking, cross-chip setup, etc.
+
+ It is required on most chips. If the chip you compile the support for
+ doesn't have such registers set, say N here. In doubt, say Y.
diff --git a/drivers/net/dsa/mv88e6xxx/Makefile b/drivers/net/dsa/mv88e6xxx/Makefile
index 6e29a75ee2f7..697103934317 100644
--- a/drivers/net/dsa/mv88e6xxx/Makefile
+++ b/drivers/net/dsa/mv88e6xxx/Makefile
@@ -1 +1,3 @@
-obj-$(CONFIG_NET_DSA_MV88E6XXX) += chip.o
+obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx.o
+mv88e6xxx-objs := chip.o
+mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2.o
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index d6b0f78e9598..70a812d159c9 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -29,7 +29,9 @@
#include <linux/phy.h>
#include <net/dsa.h>
#include <net/switchdev.h>
+
#include "mv88e6xxx.h"
+#include "global2.h"
static void assert_reg_lock(struct mv88e6xxx_chip *chip)
{
@@ -182,8 +184,7 @@ static const struct mv88e6xxx_ops mv88e6xxx_smi_multi_chip_ops = {
.write = mv88e6xxx_smi_multi_chip_write,
};
-static int mv88e6xxx_read(struct mv88e6xxx_chip *chip,
- int addr, int reg, u16 *val)
+int mv88e6xxx_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val)
{
int err;
@@ -199,8 +200,7 @@ static int mv88e6xxx_read(struct mv88e6xxx_chip *chip,
return 0;
}
-static int mv88e6xxx_write(struct mv88e6xxx_chip *chip,
- int addr, int reg, u16 val)
+int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val)
{
int err;
@@ -306,8 +306,7 @@ static int mv88e6xxx_serdes_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
reg, val);
}
-static int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg,
- u16 mask)
+int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask)
{
int i;
@@ -330,8 +329,7 @@ static int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg,
}
/* Indirect write to single pointer-data register with an Update bit */
-static int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg,
- u16 update)
+int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg, u16 update)
{
u16 val;
int err;
@@ -2878,330 +2876,6 @@ static int mv88e6xxx_g1_setup(struct mv88e6xxx_chip *chip)
return 0;
}
-static int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip,
- int target, int port)
-{
- u16 val = (target << 8) | (port & 0xf);
-
- return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_DEVICE_MAPPING, val);
-}
-
-static int mv88e6xxx_g2_set_device_mapping(struct mv88e6xxx_chip *chip)
-{
- int target, port;
- int err;
-
- /* Initialize the routing port to the 32 possible target devices */
- for (target = 0; target < 32; ++target) {
- port = 0xf;
-
- if (target < DSA_MAX_SWITCHES) {
- port = chip->ds->rtable[target];
- if (port == DSA_RTABLE_NONE)
- port = 0xf;
- }
-
- err = mv88e6xxx_g2_device_mapping_write(chip, target, port);
- if (err)
- break;
- }
-
- return err;
-}
-
-static int mv88e6xxx_g2_trunk_mask_write(struct mv88e6xxx_chip *chip, int num,
- bool hask, u16 mask)
-{
- const u16 port_mask = BIT(chip->info->num_ports) - 1;
- u16 val = (num << 12) | (mask & port_mask);
-
- if (hask)
- val |= GLOBAL2_TRUNK_MASK_HASK;
-
- return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_TRUNK_MASK, val);
-}
-
-static int mv88e6xxx_g2_trunk_mapping_write(struct mv88e6xxx_chip *chip, int id,
- u16 map)
-{
- const u16 port_mask = BIT(chip->info->num_ports) - 1;
- u16 val = (id << 11) | (map & port_mask);
-
- return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_TRUNK_MAPPING, val);
-}
-
-static int mv88e6xxx_g2_clear_trunk(struct mv88e6xxx_chip *chip)
-{
- const u16 port_mask = BIT(chip->info->num_ports) - 1;
- int i, err;
-
- /* Clear all eight possible Trunk Mask vectors */
- for (i = 0; i < 8; ++i) {
- err = mv88e6xxx_g2_trunk_mask_write(chip, i, false, port_mask);
- if (err)
- return err;
- }
-
- /* Clear all sixteen possible Trunk ID routing vectors */
- for (i = 0; i < 16; ++i) {
- err = mv88e6xxx_g2_trunk_mapping_write(chip, i, 0);
- if (err)
- return err;
- }
-
- return 0;
-}
-
-static int mv88e6xxx_g2_clear_irl(struct mv88e6xxx_chip *chip)
-{
- int port, err;
-
- /* Init all Ingress Rate Limit resources of all ports */
- for (port = 0; port < chip->info->num_ports; ++port) {
- /* XXX newer chips (like 88E6390) have different 2-bit ops */
- err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD,
- GLOBAL2_IRL_CMD_OP_INIT_ALL |
- (port << 8));
- if (err)
- break;
-
- /* Wait for the operation to complete */
- err = mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD,
- GLOBAL2_IRL_CMD_BUSY);
- if (err)
- break;
- }
-
- return err;
-}
-
-/* Indirect write to the Switch MAC/WoL/WoF register */
-static int mv88e6xxx_g2_switch_mac_write(struct mv88e6xxx_chip *chip,
- unsigned int pointer, u8 data)
-{
- u16 val = (pointer << 8) | data;
-
- return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_SWITCH_MAC, val);
-}
-
-static int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr)
-{
- int i, err;
-
- for (i = 0; i < 6; i++) {
- err = mv88e6xxx_g2_switch_mac_write(chip, i, addr[i]);
- if (err)
- break;
- }
-
- return err;
-}
-
-static int mv88e6xxx_g2_pot_write(struct mv88e6xxx_chip *chip, int pointer,
- u8 data)
-{
- u16 val = (pointer << 8) | (data & 0x7);
-
- return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_PRIO_OVERRIDE, val);
-}
-
-static int mv88e6xxx_g2_clear_pot(struct mv88e6xxx_chip *chip)
-{
- int i, err;
-
- /* Clear all sixteen possible Priority Override entries */
- for (i = 0; i < 16; i++) {
- err = mv88e6xxx_g2_pot_write(chip, i, 0);
- if (err)
- break;
- }
-
- return err;
-}
-
-static int mv88e6xxx_g2_eeprom_wait(struct mv88e6xxx_chip *chip)
-{
- return mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD,
- GLOBAL2_EEPROM_CMD_BUSY |
- GLOBAL2_EEPROM_CMD_RUNNING);
-}
-
-static int mv88e6xxx_g2_eeprom_cmd(struct mv88e6xxx_chip *chip, u16 cmd)
-{
- int err;
-
- err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, cmd);
- if (err)
- return err;
-
- return mv88e6xxx_g2_eeprom_wait(chip);
-}
-
-static int mv88e6xxx_g2_eeprom_read16(struct mv88e6xxx_chip *chip,
- u8 addr, u16 *data)
-{
- u16 cmd = GLOBAL2_EEPROM_CMD_OP_READ | addr;
- int err;
-
- err = mv88e6xxx_g2_eeprom_wait(chip);
- if (err)
- return err;
-
- err = mv88e6xxx_g2_eeprom_cmd(chip, cmd);
- if (err)
- return err;
-
- return mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_EEPROM_DATA, data);
-}
-
-static int mv88e6xxx_g2_eeprom_write16(struct mv88e6xxx_chip *chip,
- u8 addr, u16 data)
-{
- u16 cmd = GLOBAL2_EEPROM_CMD_OP_WRITE | addr;
- int err;
-
- err = mv88e6xxx_g2_eeprom_wait(chip);
- if (err)
- return err;
-
- err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_EEPROM_DATA, data);
- if (err)
- return err;
-
- return mv88e6xxx_g2_eeprom_cmd(chip, cmd);
-}
-
-static int mv88e6xxx_g2_smi_phy_wait(struct mv88e6xxx_chip *chip)
-{
- return mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_CMD,
- GLOBAL2_SMI_PHY_CMD_BUSY);
-}
-
-static int mv88e6xxx_g2_smi_phy_cmd(struct mv88e6xxx_chip *chip, u16 cmd)
-{
- int err;
-
- err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_CMD, cmd);
- if (err)
- return err;
-
- return mv88e6xxx_g2_smi_phy_wait(chip);
-}
-
-static int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, int addr,
- int reg, u16 *val)
-{
- u16 cmd = GLOBAL2_SMI_PHY_CMD_OP_22_READ_DATA | (addr << 5) | reg;
- int err;
-
- err = mv88e6xxx_g2_smi_phy_wait(chip);
- if (err)
- return err;
-
- err = mv88e6xxx_g2_smi_phy_cmd(chip, cmd);
- if (err)
- return err;
-
- return mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_DATA, val);
-}
-
-static int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, int addr,
- int reg, u16 val)
-{
- u16 cmd = GLOBAL2_SMI_PHY_CMD_OP_22_WRITE_DATA | (addr << 5) | reg;
- int err;
-
- err = mv88e6xxx_g2_smi_phy_wait(chip);
- if (err)
- return err;
-
- err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_DATA, val);
- if (err)
- return err;
-
- return mv88e6xxx_g2_smi_phy_cmd(chip, cmd);
-}
-
-static const struct mv88e6xxx_ops mv88e6xxx_g2_smi_phy_ops = {
- .read = mv88e6xxx_g2_smi_phy_read,
- .write = mv88e6xxx_g2_smi_phy_write,
-};
-
-static int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip)
-{
- u16 reg;
- int err;
-
- if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X)) {
- /* Consider the frames with reserved multicast destination
- * addresses matching 01:80:c2:00:00:2x as MGMT.
- */
- err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_MGMT_EN_2X,
- 0xffff);
- if (err)
- return err;
- }
-
- if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X)) {
- /* Consider the frames with reserved multicast destination
- * addresses matching 01:80:c2:00:00:0x as MGMT.
- */
- err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_MGMT_EN_0X,
- 0xffff);
- if (err)
- return err;
- }
-
- /* Ignore removed tag data on doubly tagged packets, disable
- * flow control messages, force flow control priority to the
- * highest, and send all special multicast frames to the CPU
- * port at the highest priority.
- */
- reg = GLOBAL2_SWITCH_MGMT_FORCE_FLOW_CTRL_PRI | (0x7 << 4);
- if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X) ||
- mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X))
- reg |= GLOBAL2_SWITCH_MGMT_RSVD2CPU | 0x7;
- err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SWITCH_MGMT, reg);
- if (err)
- return err;
-
- /* Program the DSA routing table. */
- err = mv88e6xxx_g2_set_device_mapping(chip);
- if (err)
- return err;
-
- /* Clear all trunk masks and mapping. */
- err = mv88e6xxx_g2_clear_trunk(chip);
- if (err)
- return err;
-
- if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_IRL)) {
- /* Disable ingress rate limiting by resetting all per port
- * ingress rate limit resources to their initial state.
- */
- err = mv88e6xxx_g2_clear_irl(chip);
- if (err)
- return err;
- }
-
- if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_PVT)) {
- /* Initialize Cross-chip Port VLAN Table to reset defaults */
- err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_PVT_ADDR,
- GLOBAL2_PVT_ADDR_OP_INIT_ONES);
- if (err)
- return err;
- }
-
- if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_POT)) {
- /* Clear the priority override table. */
- err = mv88e6xxx_g2_clear_pot(chip);
- if (err)
- return err;
- }
-
- return 0;
-}
-
static int mv88e6xxx_setup(struct dsa_switch *ds)
{
struct mv88e6xxx_chip *chip = ds->priv;
@@ -3503,56 +3177,6 @@ static int mv88e6xxx_get_eeprom_len(struct dsa_switch *ds)
return chip->eeprom_len;
}
-static int mv88e6xxx_get_eeprom16(struct mv88e6xxx_chip *chip,
- struct ethtool_eeprom *eeprom, u8 *data)
-{
- unsigned int offset = eeprom->offset;
- unsigned int len = eeprom->len;
- u16 val;
- int err;
-
- eeprom->len = 0;
-
- if (offset & 1) {
- err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
- if (err)
- return err;
-
- *data++ = (val >> 8) & 0xff;
-
- offset++;
- len--;
- eeprom->len++;
- }
-
- while (len >= 2) {
- err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
- if (err)
- return err;
-
- *data++ = val & 0xff;
- *data++ = (val >> 8) & 0xff;
-
- offset += 2;
- len -= 2;
- eeprom->len += 2;
- }
-
- if (len) {
- err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
- if (err)
- return err;
-
- *data++ = val & 0xff;
-
- offset++;
- len--;
- eeprom->len++;
- }
-
- return 0;
-}
-
static int mv88e6xxx_get_eeprom(struct dsa_switch *ds,
struct ethtool_eeprom *eeprom, u8 *data)
{
@@ -3562,7 +3186,7 @@ static int mv88e6xxx_get_eeprom(struct dsa_switch *ds,
mutex_lock(&chip->reg_lock);
if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_EEPROM16))
- err = mv88e6xxx_get_eeprom16(chip, eeprom, data);
+ err = mv88e6xxx_g2_get_eeprom16(chip, eeprom, data);
else
err = -EOPNOTSUPP;
@@ -3576,72 +3200,6 @@ static int mv88e6xxx_get_eeprom(struct dsa_switch *ds,
return 0;
}
-static int mv88e6xxx_set_eeprom16(struct mv88e6xxx_chip *chip,
- struct ethtool_eeprom *eeprom, u8 *data)
-{
- unsigned int offset = eeprom->offset;
- unsigned int len = eeprom->len;
- u16 val;
- int err;
-
- /* Ensure the RO WriteEn bit is set */
- err = mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, &val);
- if (err)
- return err;
-
- if (!(val & GLOBAL2_EEPROM_CMD_WRITE_EN))
- return -EROFS;
-
- eeprom->len = 0;
-
- if (offset & 1) {
- err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
- if (err)
- return err;
-
- val = (*data++ << 8) | (val & 0xff);
-
- err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val);
- if (err)
- return err;
-
- offset++;
- len--;
- eeprom->len++;
- }
-
- while (len >= 2) {
- val = *data++;
- val |= *data++ << 8;
-
- err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val);
- if (err)
- return err;
-
- offset += 2;
- len -= 2;
- eeprom->len += 2;
- }
-
- if (len) {
- err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
- if (err)
- return err;
-
- val = (val & 0xff00) | *data++;
-
- err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val);
- if (err)
- return err;
-
- offset++;
- len--;
- eeprom->len++;
- }
-
- return 0;
-}
-
static int mv88e6xxx_set_eeprom(struct dsa_switch *ds,
struct ethtool_eeprom *eeprom, u8 *data)
{
@@ -3654,7 +3212,7 @@ static int mv88e6xxx_set_eeprom(struct dsa_switch *ds,
mutex_lock(&chip->reg_lock);
if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_EEPROM16))
- err = mv88e6xxx_set_eeprom16(chip, eeprom, data);
+ err = mv88e6xxx_g2_set_eeprom16(chip, eeprom, data);
else
err = -EOPNOTSUPP;
@@ -3886,6 +3444,10 @@ static int mv88e6xxx_detect(struct mv88e6xxx_chip *chip)
/* Update the compatible info with the probed one */
chip->info = info;
+ err = mv88e6xxx_g2_require(chip);
+ if (err)
+ return err;
+
dev_info(chip->dev, "switch 0x%x detected: %s, revision %u\n",
chip->info->prod_num, chip->info->name, rev);
@@ -3907,6 +3469,11 @@ static struct mv88e6xxx_chip *mv88e6xxx_alloc_chip(struct device *dev)
return chip;
}
+static const struct mv88e6xxx_ops mv88e6xxx_g2_smi_phy_ops = {
+ .read = mv88e6xxx_g2_smi_phy_read,
+ .write = mv88e6xxx_g2_smi_phy_write,
+};
+
static const struct mv88e6xxx_ops mv88e6xxx_phy_ops = {
.read = mv88e6xxx_read,
.write = mv88e6xxx_write,
diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c
new file mode 100644
index 000000000000..99ed028298ac
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/global2.c
@@ -0,0 +1,471 @@
+/*
+ * Marvell 88E6xxx Switch Global 2 Registers support (device address 0x1C)
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2016 Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "mv88e6xxx.h"
+#include "global2.h"
+
+/* Offset 0x06: Device Mapping Table register */
+
+static int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip,
+ int target, int port)
+{
+ u16 val = (target << 8) | (port & 0xf);
+
+ return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_DEVICE_MAPPING, val);
+}
+
+static int mv88e6xxx_g2_set_device_mapping(struct mv88e6xxx_chip *chip)
+{
+ int target, port;
+ int err;
+
+ /* Initialize the routing port to the 32 possible target devices */
+ for (target = 0; target < 32; ++target) {
+ port = 0xf;
+
+ if (target < DSA_MAX_SWITCHES) {
+ port = chip->ds->rtable[target];
+ if (port == DSA_RTABLE_NONE)
+ port = 0xf;
+ }
+
+ err = mv88e6xxx_g2_device_mapping_write(chip, target, port);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+/* Offset 0x07: Trunk Mask Table register */
+
+static int mv88e6xxx_g2_trunk_mask_write(struct mv88e6xxx_chip *chip, int num,
+ bool hask, u16 mask)
+{
+ const u16 port_mask = BIT(chip->info->num_ports) - 1;
+ u16 val = (num << 12) | (mask & port_mask);
+
+ if (hask)
+ val |= GLOBAL2_TRUNK_MASK_HASK;
+
+ return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_TRUNK_MASK, val);
+}
+
+/* Offset 0x08: Trunk Mapping Table register */
+
+static int mv88e6xxx_g2_trunk_mapping_write(struct mv88e6xxx_chip *chip, int id,
+ u16 map)
+{
+ const u16 port_mask = BIT(chip->info->num_ports) - 1;
+ u16 val = (id << 11) | (map & port_mask);
+
+ return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_TRUNK_MAPPING, val);
+}
+
+static int mv88e6xxx_g2_clear_trunk(struct mv88e6xxx_chip *chip)
+{
+ const u16 port_mask = BIT(chip->info->num_ports) - 1;
+ int i, err;
+
+ /* Clear all eight possible Trunk Mask vectors */
+ for (i = 0; i < 8; ++i) {
+ err = mv88e6xxx_g2_trunk_mask_write(chip, i, false, port_mask);
+ if (err)
+ return err;
+ }
+
+ /* Clear all sixteen possible Trunk ID routing vectors */
+ for (i = 0; i < 16; ++i) {
+ err = mv88e6xxx_g2_trunk_mapping_write(chip, i, 0);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+/* Offset 0x09: Ingress Rate Command register
+ * Offset 0x0A: Ingress Rate Data register
+ */
+
+static int mv88e6xxx_g2_clear_irl(struct mv88e6xxx_chip *chip)
+{
+ int port, err;
+
+ /* Init all Ingress Rate Limit resources of all ports */
+ for (port = 0; port < chip->info->num_ports; ++port) {
+ /* XXX newer chips (like 88E6390) have different 2-bit ops */
+ err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD,
+ GLOBAL2_IRL_CMD_OP_INIT_ALL |
+ (port << 8));
+ if (err)
+ break;
+
+ /* Wait for the operation to complete */
+ err = mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD,
+ GLOBAL2_IRL_CMD_BUSY);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+/* Offset 0x0D: Switch MAC/WoL/WoF register */
+
+static int mv88e6xxx_g2_switch_mac_write(struct mv88e6xxx_chip *chip,
+ unsigned int pointer, u8 data)
+{
+ u16 val = (pointer << 8) | data;
+
+ return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_SWITCH_MAC, val);
+}
+
+int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr)
+{
+ int i, err;
+
+ for (i = 0; i < 6; i++) {
+ err = mv88e6xxx_g2_switch_mac_write(chip, i, addr[i]);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+/* Offset 0x0F: Priority Override Table */
+
+static int mv88e6xxx_g2_pot_write(struct mv88e6xxx_chip *chip, int pointer,
+ u8 data)
+{
+ u16 val = (pointer << 8) | (data & 0x7);
+
+ return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_PRIO_OVERRIDE, val);
+}
+
+static int mv88e6xxx_g2_clear_pot(struct mv88e6xxx_chip *chip)
+{
+ int i, err;
+
+ /* Clear all sixteen possible Priority Override entries */
+ for (i = 0; i < 16; i++) {
+ err = mv88e6xxx_g2_pot_write(chip, i, 0);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+/* Offset 0x14: EEPROM Command
+ * Offset 0x15: EEPROM Data
+ */
+
+static int mv88e6xxx_g2_eeprom_wait(struct mv88e6xxx_chip *chip)
+{
+ return mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD,
+ GLOBAL2_EEPROM_CMD_BUSY |
+ GLOBAL2_EEPROM_CMD_RUNNING);
+}
+
+static int mv88e6xxx_g2_eeprom_cmd(struct mv88e6xxx_chip *chip, u16 cmd)
+{
+ int err;
+
+ err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, cmd);
+ if (err)
+ return err;
+
+ return mv88e6xxx_g2_eeprom_wait(chip);
+}
+
+static int mv88e6xxx_g2_eeprom_read16(struct mv88e6xxx_chip *chip,
+ u8 addr, u16 *data)
+{
+ u16 cmd = GLOBAL2_EEPROM_CMD_OP_READ | addr;
+ int err;
+
+ err = mv88e6xxx_g2_eeprom_wait(chip);
+ if (err)
+ return err;
+
+ err = mv88e6xxx_g2_eeprom_cmd(chip, cmd);
+ if (err)
+ return err;
+
+ return mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_EEPROM_DATA, data);
+}
+
+static int mv88e6xxx_g2_eeprom_write16(struct mv88e6xxx_chip *chip,
+ u8 addr, u16 data)
+{
+ u16 cmd = GLOBAL2_EEPROM_CMD_OP_WRITE | addr;
+ int err;
+
+ err = mv88e6xxx_g2_eeprom_wait(chip);
+ if (err)
+ return err;
+
+ err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_EEPROM_DATA, data);
+ if (err)
+ return err;
+
+ return mv88e6xxx_g2_eeprom_cmd(chip, cmd);
+}
+
+int mv88e6xxx_g2_get_eeprom16(struct mv88e6xxx_chip *chip,
+ struct ethtool_eeprom *eeprom, u8 *data)
+{
+ unsigned int offset = eeprom->offset;
+ unsigned int len = eeprom->len;
+ u16 val;
+ int err;
+
+ eeprom->len = 0;
+
+ if (offset & 1) {
+ err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
+ if (err)
+ return err;
+
+ *data++ = (val >> 8) & 0xff;
+
+ offset++;
+ len--;
+ eeprom->len++;
+ }
+
+ while (len >= 2) {
+ err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
+ if (err)
+ return err;
+
+ *data++ = val & 0xff;
+ *data++ = (val >> 8) & 0xff;
+
+ offset += 2;
+ len -= 2;
+ eeprom->len += 2;
+ }
+
+ if (len) {
+ err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
+ if (err)
+ return err;
+
+ *data++ = val & 0xff;
+
+ offset++;
+ len--;
+ eeprom->len++;
+ }
+
+ return 0;
+}
+
+int mv88e6xxx_g2_set_eeprom16(struct mv88e6xxx_chip *chip,
+ struct ethtool_eeprom *eeprom, u8 *data)
+{
+ unsigned int offset = eeprom->offset;
+ unsigned int len = eeprom->len;
+ u16 val;
+ int err;
+
+ /* Ensure the RO WriteEn bit is set */
+ err = mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, &val);
+ if (err)
+ return err;
+
+ if (!(val & GLOBAL2_EEPROM_CMD_WRITE_EN))
+ return -EROFS;
+
+ eeprom->len = 0;
+
+ if (offset & 1) {
+ err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
+ if (err)
+ return err;
+
+ val = (*data++ << 8) | (val & 0xff);
+
+ err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val);
+ if (err)
+ return err;
+
+ offset++;
+ len--;
+ eeprom->len++;
+ }
+
+ while (len >= 2) {
+ val = *data++;
+ val |= *data++ << 8;
+
+ err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val);
+ if (err)
+ return err;
+
+ offset += 2;
+ len -= 2;
+ eeprom->len += 2;
+ }
+
+ if (len) {
+ err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
+ if (err)
+ return err;
+
+ val = (val & 0xff00) | *data++;
+
+ err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val);
+ if (err)
+ return err;
+
+ offset++;
+ len--;
+ eeprom->len++;
+ }
+
+ return 0;
+}
+
+/* Offset 0x18: SMI PHY Command Register
+ * Offset 0x19: SMI PHY Data Register
+ */
+
+static int mv88e6xxx_g2_smi_phy_wait(struct mv88e6xxx_chip *chip)
+{
+ return mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_CMD,
+ GLOBAL2_SMI_PHY_CMD_BUSY);
+}
+
+static int mv88e6xxx_g2_smi_phy_cmd(struct mv88e6xxx_chip *chip, u16 cmd)
+{
+ int err;
+
+ err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_CMD, cmd);
+ if (err)
+ return err;
+
+ return mv88e6xxx_g2_smi_phy_wait(chip);
+}
+
+int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, int addr, int reg,
+ u16 *val)
+{
+ u16 cmd = GLOBAL2_SMI_PHY_CMD_OP_22_READ_DATA | (addr << 5) | reg;
+ int err;
+
+ err = mv88e6xxx_g2_smi_phy_wait(chip);
+ if (err)
+ return err;
+
+ err = mv88e6xxx_g2_smi_phy_cmd(chip, cmd);
+ if (err)
+ return err;
+
+ return mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_DATA, val);
+}
+
+int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, int addr, int reg,
+ u16 val)
+{
+ u16 cmd = GLOBAL2_SMI_PHY_CMD_OP_22_WRITE_DATA | (addr << 5) | reg;
+ int err;
+
+ err = mv88e6xxx_g2_smi_phy_wait(chip);
+ if (err)
+ return err;
+
+ err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_DATA, val);
+ if (err)
+ return err;
+
+ return mv88e6xxx_g2_smi_phy_cmd(chip, cmd);
+}
+
+int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip)
+{
+ u16 reg;
+ int err;
+
+ if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X)) {
+ /* Consider the frames with reserved multicast destination
+ * addresses matching 01:80:c2:00:00:2x as MGMT.
+ */
+ err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_MGMT_EN_2X,
+ 0xffff);
+ if (err)
+ return err;
+ }
+
+ if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X)) {
+ /* Consider the frames with reserved multicast destination
+ * addresses matching 01:80:c2:00:00:0x as MGMT.
+ */
+ err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_MGMT_EN_0X,
+ 0xffff);
+ if (err)
+ return err;
+ }
+
+ /* Ignore removed tag data on doubly tagged packets, disable
+ * flow control messages, force flow control priority to the
+ * highest, and send all special multicast frames to the CPU
+ * port at the highest priority.
+ */
+ reg = GLOBAL2_SWITCH_MGMT_FORCE_FLOW_CTRL_PRI | (0x7 << 4);
+ if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X) ||
+ mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X))
+ reg |= GLOBAL2_SWITCH_MGMT_RSVD2CPU | 0x7;
+ err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SWITCH_MGMT, reg);
+ if (err)
+ return err;
+
+ /* Program the DSA routing table. */
+ err = mv88e6xxx_g2_set_device_mapping(chip);
+ if (err)
+ return err;
+
+ /* Clear all trunk masks and mapping. */
+ err = mv88e6xxx_g2_clear_trunk(chip);
+ if (err)
+ return err;
+
+ if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_IRL)) {
+ /* Disable ingress rate limiting by resetting all per port
+ * ingress rate limit resources to their initial state.
+ */
+ err = mv88e6xxx_g2_clear_irl(chip);
+ if (err)
+ return err;
+ }
+
+ if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_PVT)) {
+ /* Initialize Cross-chip Port VLAN Table to reset defaults */
+ err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_PVT_ADDR,
+ GLOBAL2_PVT_ADDR_OP_INIT_ONES);
+ if (err)
+ return err;
+ }
+
+ if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_POT)) {
+ /* Clear the priority override table. */
+ err = mv88e6xxx_g2_clear_pot(chip);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h
new file mode 100644
index 000000000000..c4bb9035ee3a
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/global2.h
@@ -0,0 +1,88 @@
+/*
+ * Marvell 88E6xxx Switch Global 2 Registers support (device address 0x1C)
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2016 Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _MV88E6XXX_GLOBAL2_H
+#define _MV88E6XXX_GLOBAL2_H
+
+#include "mv88e6xxx.h"
+
+#ifdef CONFIG_NET_DSA_MV88E6XXX_GLOBAL2
+
+static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
+{
+ return 0;
+}
+
+int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, int addr, int reg,
+ u16 *val);
+int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, int addr, int reg,
+ u16 val);
+int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr);
+int mv88e6xxx_g2_get_eeprom16(struct mv88e6xxx_chip *chip,
+ struct ethtool_eeprom *eeprom, u8 *data);
+int mv88e6xxx_g2_set_eeprom16(struct mv88e6xxx_chip *chip,
+ struct ethtool_eeprom *eeprom, u8 *data);
+int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip);
+
+#else /* !CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */
+
+static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
+{
+ if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_GLOBAL2)) {
+ dev_err(chip->dev, "this chip requires CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 enabled\n");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static inline int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip,
+ int addr, int reg, u16 *val)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip,
+ int addr, int reg, u16 val)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip,
+ u8 *addr)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_get_eeprom16(struct mv88e6xxx_chip *chip,
+ struct ethtool_eeprom *eeprom,
+ u8 *data)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_set_eeprom16(struct mv88e6xxx_chip *chip,
+ struct ethtool_eeprom *eeprom,
+ u8 *data)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip)
+{
+ return -EOPNOTSUPP;
+}
+
+#endif /* CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */
+
+#endif /* _MV88E6XXX_GLOBAL2_H */
diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
index e157d4f69864..52f3f5231f51 100644
--- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
+++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
@@ -718,4 +718,10 @@ static inline bool mv88e6xxx_has(struct mv88e6xxx_chip *chip,
return (chip->info->flags & flags) == flags;
}
+int mv88e6xxx_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val);
+int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val);
+int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg,
+ u16 update);
+int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask);
+
#endif
diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c
index 2d812064731a..bddb198c0b74 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c
@@ -887,6 +887,67 @@ static irqreturn_t cn23xx_interrupt_handler(void *dev)
return IRQ_HANDLED;
}
+static void cn23xx_bar1_idx_setup(struct octeon_device *oct, u64 core_addr,
+ u32 idx, int valid)
+{
+ u64 bar1;
+ u64 reg_adr;
+
+ if (!valid) {
+ reg_adr = lio_pci_readq(
+ oct, CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx));
+ WRITE_ONCE(bar1, reg_adr);
+ lio_pci_writeq(oct, (READ_ONCE(bar1) & 0xFFFFFFFEULL),
+ CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx));
+ reg_adr = lio_pci_readq(
+ oct, CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx));
+ WRITE_ONCE(bar1, reg_adr);
+ return;
+ }
+
+ /* The PEM(0..3)_BAR1_INDEX(0..15)[ADDR_IDX]<23:4> stores
+ * bits <41:22> of the Core Addr
+ */
+ lio_pci_writeq(oct, (((core_addr >> 22) << 4) | PCI_BAR1_MASK),
+ CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx));
+
+ WRITE_ONCE(bar1, lio_pci_readq(
+ oct, CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx)));
+}
+
+static void cn23xx_bar1_idx_write(struct octeon_device *oct, u32 idx, u32 mask)
+{
+ lio_pci_writeq(oct, mask,
+ CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx));
+}
+
+static u32 cn23xx_bar1_idx_read(struct octeon_device *oct, u32 idx)
+{
+ return (u32)lio_pci_readq(
+ oct, CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx));
+}
+
+/* always call with lock held */
+static u32 cn23xx_update_read_index(struct octeon_instr_queue *iq)
+{
+ u32 new_idx;
+ u32 last_done;
+ u32 pkt_in_done = readl(iq->inst_cnt_reg);
+
+ last_done = pkt_in_done - iq->pkt_in_done;
+ iq->pkt_in_done = pkt_in_done;
+
+ /* Modulo of the new index with the IQ size will give us
+ * the new index. The iq->reset_instr_cnt is always zero for
+ * cn23xx, so no extra adjustments are needed.
+ */
+ new_idx = (iq->octeon_read_index +
+ (u32)(last_done & CN23XX_PKT_IN_DONE_CNT_MASK)) %
+ iq->max_count;
+
+ return new_idx;
+}
+
static void cn23xx_enable_pf_interrupt(struct octeon_device *oct, u8 intr_flag)
{
struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip;
@@ -1063,6 +1124,11 @@ int setup_cn23xx_octeon_pf_device(struct octeon_device *oct)
oct->fn_list.soft_reset = cn23xx_pf_soft_reset;
oct->fn_list.setup_device_regs = cn23xx_setup_pf_device_regs;
+ oct->fn_list.update_iq_read_idx = cn23xx_update_read_index;
+
+ oct->fn_list.bar1_idx_setup = cn23xx_bar1_idx_setup;
+ oct->fn_list.bar1_idx_write = cn23xx_bar1_idx_write;
+ oct->fn_list.bar1_idx_read = cn23xx_bar1_idx_read;
oct->fn_list.enable_interrupt = cn23xx_enable_pf_interrupt;
oct->fn_list.disable_interrupt = cn23xx_disable_pf_interrupt;
diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h
index 33b758992eb2..21b5c9051967 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h
@@ -51,6 +51,8 @@ int setup_cn23xx_octeon_pf_device(struct octeon_device *oct);
int validate_cn23xx_pf_config_info(struct octeon_device *oct,
struct octeon_config *conf23xx);
+u32 cn23xx_pf_get_oq_ticks(struct octeon_device *oct, u32 time_intr_in_us);
+
void cn23xx_dump_pf_initialized_regs(struct octeon_device *oct);
int cn23xx_fw_loaded(struct octeon_device *oct);
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c
index 809179c040a7..201eddb3013a 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_core.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c
@@ -156,14 +156,19 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr)
dev_info(&oct->pci_dev->dev, "%s MTU Changed from %d to %d\n",
netdev->name, netdev->mtu,
nctrl->ncmd.s.param1);
- rtnl_lock();
netdev->mtu = nctrl->ncmd.s.param1;
- call_netdevice_notifiers(NETDEV_CHANGEMTU, netdev);
- rtnl_unlock();
+ queue_delayed_work(lio->link_status_wq.wq,
+ &lio->link_status_wq.wk.work, 0);
break;
case OCTNET_CMD_GPIO_ACCESS:
netif_info(lio, probe, lio->netdev, "LED Flashing visual identification\n");
+
+ break;
+
+ case OCTNET_CMD_ID_ACTIVE:
+ netif_info(lio, probe, lio->netdev, "LED Flashing visual identification\n");
+
break;
case OCTNET_CMD_LRO_ENABLE:
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
index f3ce7441cb5f..f163e0abbeb2 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
@@ -32,6 +32,7 @@
#include "octeon_network.h"
#include "cn66xx_regs.h"
#include "cn66xx_device.h"
+#include "cn23xx_pf_device.h"
static int octnet_get_link_stats(struct net_device *netdev);
@@ -75,6 +76,7 @@ enum {
#define ARRAY_LENGTH(a) (sizeof(a) / sizeof((a)[0]))
#define OCT_ETHTOOL_REGDUMP_LEN 4096
+#define OCT_ETHTOOL_REGDUMP_LEN_23XX (4096 * 11)
#define OCT_ETHTOOL_REGSVER 1
/* statistics of PF */
@@ -188,6 +190,10 @@ static const char oct_droq_stats_strings[][ETH_GSTRING_LEN] = {
"buffer_alloc_failure",
};
+/* LiquidIO driver private flags */
+static const char oct_priv_flags_strings[][ETH_GSTRING_LEN] = {
+};
+
#define OCTNIC_NCMD_AUTONEG_ON 0x1
#define OCTNIC_NCMD_PHY_ON 0x2
@@ -259,6 +265,13 @@ lio_ethtool_get_channels(struct net_device *dev,
max_tx = CFG_GET_IQ_MAX_Q(conf6x);
rx_count = CFG_GET_NUM_RXQS_NIC_IF(conf6x, lio->ifidx);
tx_count = CFG_GET_NUM_TXQS_NIC_IF(conf6x, lio->ifidx);
+ } else if (OCTEON_CN23XX_PF(oct)) {
+ struct octeon_config *conf23 = CHIP_FIELD(oct, cn23xx_pf, conf);
+
+ max_rx = CFG_GET_OQ_MAX_Q(conf23);
+ max_tx = CFG_GET_IQ_MAX_Q(conf23);
+ rx_count = CFG_GET_NUM_RXQS_NIC_IF(conf23, lio->ifidx);
+ tx_count = CFG_GET_NUM_TXQS_NIC_IF(conf23, lio->ifidx);
}
channel->max_rx = max_rx;
@@ -331,6 +344,32 @@ static int octnet_gpio_access(struct net_device *netdev, int addr, int val)
return 0;
}
+static int octnet_id_active(struct net_device *netdev, int val)
+{
+ struct lio *lio = GET_LIO(netdev);
+ struct octeon_device *oct = lio->oct_dev;
+ struct octnic_ctrl_pkt nctrl;
+ int ret = 0;
+
+ memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+
+ nctrl.ncmd.u64 = 0;
+ nctrl.ncmd.s.cmd = OCTNET_CMD_ID_ACTIVE;
+ nctrl.ncmd.s.param1 = val;
+ nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+ nctrl.wait_time = 100;
+ nctrl.netpndev = (u64)netdev;
+ nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+
+ ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+ if (ret < 0) {
+ dev_err(&oct->pci_dev->dev, "Failed to configure gpio value\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
/* Callback for when mdio command response arrives
*/
static void octnet_mdio_resp_callback(struct octeon_device *oct,
@@ -474,6 +513,11 @@ static int lio_set_phys_id(struct net_device *netdev,
&value);
if (ret)
return ret;
+ } else if (oct->chip_id == OCTEON_CN23XX_PF_VID) {
+ octnet_id_active(netdev, LED_IDENTIFICATION_ON);
+
+ /* returns 0 since updates are asynchronous */
+ return 0;
} else {
return -EINVAL;
}
@@ -519,7 +563,10 @@ static int lio_set_phys_id(struct net_device *netdev,
&lio->phy_beacon_val);
if (ret)
return ret;
+ } else if (oct->chip_id == OCTEON_CN23XX_PF_VID) {
+ octnet_id_active(netdev, LED_IDENTIFICATION_OFF);
+ return 0;
} else {
return -EINVAL;
}
@@ -548,6 +595,13 @@ lio_ethtool_get_ringparam(struct net_device *netdev,
rx_max_pending = CN6XXX_MAX_OQ_DESCRIPTORS;
rx_pending = CFG_GET_NUM_RX_DESCS_NIC_IF(conf6x, lio->ifidx);
tx_pending = CFG_GET_NUM_TX_DESCS_NIC_IF(conf6x, lio->ifidx);
+ } else if (OCTEON_CN23XX_PF(oct)) {
+ struct octeon_config *conf23 = CHIP_FIELD(oct, cn23xx_pf, conf);
+
+ tx_max_pending = CN23XX_MAX_IQ_DESCRIPTORS;
+ rx_max_pending = CN23XX_MAX_OQ_DESCRIPTORS;
+ rx_pending = CFG_GET_NUM_RX_DESCS_NIC_IF(conf23, lio->ifidx);
+ tx_pending = CFG_GET_NUM_TX_DESCS_NIC_IF(conf23, lio->ifidx);
}
if (lio->mtu > OCTNET_DEFAULT_FRM_SIZE - OCTNET_FRM_HEADER_SIZE) {
@@ -608,6 +662,69 @@ lio_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
pause->rx_pause = oct->rx_pause;
}
+static int
+lio_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
+{
+ /* Notes: Not supporting any auto negotiation in these
+ * drivers.
+ */
+ struct lio *lio = GET_LIO(netdev);
+ struct octeon_device *oct = lio->oct_dev;
+ struct octnic_ctrl_pkt nctrl;
+ struct oct_link_info *linfo = &lio->linfo;
+
+ int ret = 0;
+
+ if (oct->chip_id != OCTEON_CN23XX_PF_VID)
+ return -EINVAL;
+
+ if (linfo->link.s.duplex == 0) {
+ /*no flow control for half duplex*/
+ if (pause->rx_pause || pause->tx_pause)
+ return -EINVAL;
+ }
+
+ /*do not support autoneg of link flow control*/
+ if (pause->autoneg == AUTONEG_ENABLE)
+ return -EINVAL;
+
+ memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+
+ nctrl.ncmd.u64 = 0;
+ nctrl.ncmd.s.cmd = OCTNET_CMD_SET_FLOW_CTL;
+ nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+ nctrl.wait_time = 100;
+ nctrl.netpndev = (u64)netdev;
+ nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+
+ if (pause->rx_pause) {
+ /*enable rx pause*/
+ nctrl.ncmd.s.param1 = 1;
+ } else {
+ /*disable rx pause*/
+ nctrl.ncmd.s.param1 = 0;
+ }
+
+ if (pause->tx_pause) {
+ /*enable tx pause*/
+ nctrl.ncmd.s.param2 = 1;
+ } else {
+ /*disable tx pause*/
+ nctrl.ncmd.s.param2 = 0;
+ }
+
+ ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+ if (ret < 0) {
+ dev_err(&oct->pci_dev->dev, "Failed to set pause parameter\n");
+ return -EINVAL;
+ }
+
+ oct->rx_pause = pause->rx_pause;
+ oct->tx_pause = pause->tx_pause;
+
+ return 0;
+}
+
static void
lio_get_ethtool_stats(struct net_device *netdev,
struct ethtool_stats *stats __attribute__((unused)),
@@ -875,6 +992,27 @@ lio_get_ethtool_stats(struct net_device *netdev,
}
}
+static void lio_get_priv_flags_strings(struct lio *lio, u8 *data)
+{
+ struct octeon_device *oct_dev = lio->oct_dev;
+ int i;
+
+ switch (oct_dev->chip_id) {
+ case OCTEON_CN23XX_PF_VID:
+ for (i = 0; i < ARRAY_SIZE(oct_priv_flags_strings); i++) {
+ sprintf(data, "%s", oct_priv_flags_strings[i]);
+ data += ETH_GSTRING_LEN;
+ }
+ break;
+ case OCTEON_CN68XX:
+ case OCTEON_CN66XX:
+ break;
+ default:
+ netif_info(lio, drv, lio->netdev, "Unknown Chip !!\n");
+ break;
+ }
+}
+
static void lio_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
{
struct lio *lio = GET_LIO(netdev);
@@ -914,12 +1052,31 @@ static void lio_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
}
break;
+ case ETH_SS_PRIV_FLAGS:
+ lio_get_priv_flags_strings(lio, data);
+ break;
default:
netif_info(lio, drv, lio->netdev, "Unknown Stringset !!\n");
break;
}
}
+static int lio_get_priv_flags_ss_count(struct lio *lio)
+{
+ struct octeon_device *oct_dev = lio->oct_dev;
+
+ switch (oct_dev->chip_id) {
+ case OCTEON_CN23XX_PF_VID:
+ return ARRAY_SIZE(oct_priv_flags_strings);
+ case OCTEON_CN68XX:
+ case OCTEON_CN66XX:
+ return -EOPNOTSUPP;
+ default:
+ netif_info(lio, drv, lio->netdev, "Unknown Chip !!\n");
+ return -EOPNOTSUPP;
+ }
+}
+
static int lio_get_sset_count(struct net_device *netdev, int sset)
{
struct lio *lio = GET_LIO(netdev);
@@ -930,6 +1087,8 @@ static int lio_get_sset_count(struct net_device *netdev, int sset)
return (ARRAY_SIZE(oct_stats_strings) +
ARRAY_SIZE(oct_iq_stats_strings) * oct_dev->num_iqs +
ARRAY_SIZE(oct_droq_stats_strings) * oct_dev->num_oqs);
+ case ETH_SS_PRIV_FLAGS:
+ return lio_get_priv_flags_ss_count(lio);
default:
return -EOPNOTSUPP;
}
@@ -946,6 +1105,16 @@ static int lio_get_intr_coalesce(struct net_device *netdev,
intrmod_cfg = &oct->intrmod;
switch (oct->chip_id) {
+ case OCTEON_CN23XX_PF_VID:
+ if (!intrmod_cfg->rx_enable) {
+ intr_coal->rx_coalesce_usecs = intrmod_cfg->rx_usecs;
+ intr_coal->rx_max_coalesced_frames =
+ intrmod_cfg->rx_frames;
+ }
+ if (!intrmod_cfg->tx_enable)
+ intr_coal->tx_max_coalesced_frames =
+ intrmod_cfg->tx_frames;
+ break;
case OCTEON_CN68XX:
case OCTEON_CN66XX: {
struct octeon_cn6xxx *cn6xxx =
@@ -981,7 +1150,15 @@ static int lio_get_intr_coalesce(struct net_device *netdev,
intr_coal->rx_coalesce_usecs_low =
intrmod_cfg->rx_mintmr_trigger;
intr_coal->rx_max_coalesced_frames_low =
- intrmod_cfg->rx_mincnt_trigger;
+ intrmod_cfg->rx_mincnt_trigger;
+ }
+ if (OCTEON_CN23XX_PF(oct) &&
+ (intrmod_cfg->tx_enable)) {
+ intr_coal->use_adaptive_tx_coalesce = intrmod_cfg->tx_enable;
+ intr_coal->tx_max_coalesced_frames_high =
+ intrmod_cfg->tx_maxcnt_trigger;
+ intr_coal->tx_max_coalesced_frames_low =
+ intrmod_cfg->tx_mincnt_trigger;
}
return 0;
}
@@ -1058,11 +1235,11 @@ static void
octnet_nic_stats_callback(struct octeon_device *oct_dev,
u32 status, void *ptr)
{
- struct octeon_soft_command *sc = (struct octeon_soft_command *)ptr;
- struct oct_nic_stats_resp *resp = (struct oct_nic_stats_resp *)
- sc->virtrptr;
- struct oct_nic_stats_ctrl *ctrl = (struct oct_nic_stats_ctrl *)
- sc->ctxptr;
+ struct octeon_soft_command *sc = (struct octeon_soft_command *)ptr;
+ struct oct_nic_stats_resp *resp =
+ (struct oct_nic_stats_resp *)sc->virtrptr;
+ struct oct_nic_stats_ctrl *ctrl =
+ (struct oct_nic_stats_ctrl *)sc->ctxptr;
struct nic_rx_stats *rsp_rstats = &resp->stats.fromwire;
struct nic_tx_stats *rsp_tstats = &resp->stats.fromhost;
@@ -1312,6 +1489,27 @@ oct_cfg_rx_intrcnt(struct lio *lio, struct ethtool_coalesce *intr_coal)
CFG_SET_OQ_INTR_PKT(cn6xxx->conf, rx_max_coalesced_frames);
break;
}
+ case OCTEON_CN23XX_PF_VID: {
+ int q_no;
+
+ if (!intr_coal->rx_max_coalesced_frames)
+ rx_max_coalesced_frames = oct->intrmod.rx_frames;
+ else
+ rx_max_coalesced_frames =
+ intr_coal->rx_max_coalesced_frames;
+ for (q_no = 0; q_no < oct->num_oqs; q_no++) {
+ q_no += oct->sriov_info.pf_srn;
+ octeon_write_csr64(
+ oct, CN23XX_SLI_OQ_PKT_INT_LEVELS(q_no),
+ (octeon_read_csr64(
+ oct, CN23XX_SLI_OQ_PKT_INT_LEVELS(q_no)) &
+ (0x3fffff00000000UL)) |
+ rx_max_coalesced_frames);
+ /*consider setting resend bit*/
+ }
+ oct->intrmod.rx_frames = rx_max_coalesced_frames;
+ break;
+ }
default:
return -EINVAL;
}
@@ -1344,6 +1542,27 @@ static int oct_cfg_rx_intrtime(struct lio *lio,
CFG_SET_OQ_INTR_TIME(cn6xxx->conf, rx_coalesce_usecs);
break;
}
+ case OCTEON_CN23XX_PF_VID: {
+ u64 time_threshold;
+ int q_no;
+
+ if (!intr_coal->rx_coalesce_usecs)
+ rx_coalesce_usecs = oct->intrmod.rx_usecs;
+ else
+ rx_coalesce_usecs = intr_coal->rx_coalesce_usecs;
+ time_threshold =
+ cn23xx_pf_get_oq_ticks(oct, (u32)rx_coalesce_usecs);
+ for (q_no = 0; q_no < oct->num_oqs; q_no++) {
+ q_no += oct->sriov_info.pf_srn;
+ octeon_write_csr64(oct,
+ CN23XX_SLI_OQ_PKT_INT_LEVELS(q_no),
+ (oct->intrmod.rx_frames |
+ (time_threshold << 32)));
+ /*consider writing to resend bit here*/
+ }
+ oct->intrmod.rx_usecs = rx_coalesce_usecs;
+ break;
+ }
default:
return -EINVAL;
}
@@ -1356,12 +1575,37 @@ oct_cfg_tx_intrcnt(struct lio *lio, struct ethtool_coalesce *intr_coal
__attribute__((unused)))
{
struct octeon_device *oct = lio->oct_dev;
+ u32 iq_intr_pkt;
+ void __iomem *inst_cnt_reg;
+ u64 val;
/* Config Cnt based interrupt values */
switch (oct->chip_id) {
case OCTEON_CN68XX:
case OCTEON_CN66XX:
break;
+ case OCTEON_CN23XX_PF_VID: {
+ int q_no;
+
+ if (!intr_coal->tx_max_coalesced_frames)
+ iq_intr_pkt = CN23XX_DEF_IQ_INTR_THRESHOLD &
+ CN23XX_PKT_IN_DONE_WMARK_MASK;
+ else
+ iq_intr_pkt = intr_coal->tx_max_coalesced_frames &
+ CN23XX_PKT_IN_DONE_WMARK_MASK;
+ for (q_no = 0; q_no < oct->num_iqs; q_no++) {
+ inst_cnt_reg = (oct->instr_queue[q_no])->inst_cnt_reg;
+ val = readq(inst_cnt_reg);
+ /*clear wmark and count.dont want to write count back*/
+ val = (val & 0xFFFF000000000000ULL) |
+ ((u64)iq_intr_pkt
+ << CN23XX_PKT_IN_DONE_WMARK_BIT_POS);
+ writeq(val, inst_cnt_reg);
+ /*consider setting resend bit*/
+ }
+ oct->intrmod.tx_frames = iq_intr_pkt;
+ break;
+ }
default:
return -EINVAL;
}
@@ -1397,6 +1641,8 @@ static int lio_set_intr_coalesce(struct net_device *netdev,
return -EINVAL;
}
break;
+ case OCTEON_CN23XX_PF_VID:
+ break;
default:
return -EINVAL;
}
@@ -1539,9 +1785,237 @@ static int lio_nway_reset(struct net_device *netdev)
}
/* Return register dump len. */
-static int lio_get_regs_len(struct net_device *dev __attribute__((unused)))
+static int lio_get_regs_len(struct net_device *dev)
{
- return OCT_ETHTOOL_REGDUMP_LEN;
+ struct lio *lio = GET_LIO(dev);
+ struct octeon_device *oct = lio->oct_dev;
+
+ switch (oct->chip_id) {
+ case OCTEON_CN23XX_PF_VID:
+ return OCT_ETHTOOL_REGDUMP_LEN_23XX;
+ default:
+ return OCT_ETHTOOL_REGDUMP_LEN;
+ }
+}
+
+static int cn23xx_read_csr_reg(char *s, struct octeon_device *oct)
+{
+ u32 reg;
+ u8 pf_num = oct->pf_num;
+ int len = 0;
+ int i;
+
+ /* PCI Window Registers */
+
+ len += sprintf(s + len, "\n\t Octeon CSR Registers\n\n");
+
+ /*0x29030 or 0x29040*/
+ reg = CN23XX_SLI_PKT_MAC_RINFO64(oct->pcie_port, oct->pf_num);
+ len += sprintf(s + len,
+ "\n[%08x] (SLI_PKT_MAC%d_PF%d_RINFO): %016llx\n",
+ reg, oct->pcie_port, oct->pf_num,
+ (u64)octeon_read_csr64(oct, reg));
+
+ /*0x27080 or 0x27090*/
+ reg = CN23XX_SLI_MAC_PF_INT_ENB64(oct->pcie_port, oct->pf_num);
+ len +=
+ sprintf(s + len, "\n[%08x] (SLI_MAC%d_PF%d_INT_ENB): %016llx\n",
+ reg, oct->pcie_port, oct->pf_num,
+ (u64)octeon_read_csr64(oct, reg));
+
+ /*0x27000 or 0x27010*/
+ reg = CN23XX_SLI_MAC_PF_INT_SUM64(oct->pcie_port, oct->pf_num);
+ len +=
+ sprintf(s + len, "\n[%08x] (SLI_MAC%d_PF%d_INT_SUM): %016llx\n",
+ reg, oct->pcie_port, oct->pf_num,
+ (u64)octeon_read_csr64(oct, reg));
+
+ /*0x29120*/
+ reg = 0x29120;
+ len += sprintf(s + len, "\n[%08x] (SLI_PKT_MEM_CTL): %016llx\n", reg,
+ (u64)octeon_read_csr64(oct, reg));
+
+ /*0x27300*/
+ reg = 0x27300 + oct->pcie_port * CN23XX_MAC_INT_OFFSET +
+ (oct->pf_num) * CN23XX_PF_INT_OFFSET;
+ len += sprintf(
+ s + len, "\n[%08x] (SLI_MAC%d_PF%d_PKT_VF_INT): %016llx\n", reg,
+ oct->pcie_port, oct->pf_num, (u64)octeon_read_csr64(oct, reg));
+
+ /*0x27200*/
+ reg = 0x27200 + oct->pcie_port * CN23XX_MAC_INT_OFFSET +
+ (oct->pf_num) * CN23XX_PF_INT_OFFSET;
+ len += sprintf(s + len,
+ "\n[%08x] (SLI_MAC%d_PF%d_PP_VF_INT): %016llx\n",
+ reg, oct->pcie_port, oct->pf_num,
+ (u64)octeon_read_csr64(oct, reg));
+
+ /*29130*/
+ reg = CN23XX_SLI_PKT_CNT_INT;
+ len += sprintf(s + len, "\n[%08x] (SLI_PKT_CNT_INT): %016llx\n", reg,
+ (u64)octeon_read_csr64(oct, reg));
+
+ /*0x29140*/
+ reg = CN23XX_SLI_PKT_TIME_INT;
+ len += sprintf(s + len, "\n[%08x] (SLI_PKT_TIME_INT): %016llx\n", reg,
+ (u64)octeon_read_csr64(oct, reg));
+
+ /*0x29160*/
+ reg = 0x29160;
+ len += sprintf(s + len, "\n[%08x] (SLI_PKT_INT): %016llx\n", reg,
+ (u64)octeon_read_csr64(oct, reg));
+
+ /*0x29180*/
+ reg = CN23XX_SLI_OQ_WMARK;
+ len += sprintf(s + len, "\n[%08x] (SLI_PKT_OUTPUT_WMARK): %016llx\n",
+ reg, (u64)octeon_read_csr64(oct, reg));
+
+ /*0x291E0*/
+ reg = CN23XX_SLI_PKT_IOQ_RING_RST;
+ len += sprintf(s + len, "\n[%08x] (SLI_PKT_RING_RST): %016llx\n", reg,
+ (u64)octeon_read_csr64(oct, reg));
+
+ /*0x29210*/
+ reg = CN23XX_SLI_GBL_CONTROL;
+ len += sprintf(s + len,
+ "\n[%08x] (SLI_PKT_GBL_CONTROL): %016llx\n", reg,
+ (u64)octeon_read_csr64(oct, reg));
+
+ /*0x29220*/
+ reg = 0x29220;
+ len += sprintf(s + len, "\n[%08x] (SLI_PKT_BIST_STATUS): %016llx\n",
+ reg, (u64)octeon_read_csr64(oct, reg));
+
+ /*PF only*/
+ if (pf_num == 0) {
+ /*0x29260*/
+ reg = CN23XX_SLI_OUT_BP_EN_W1S;
+ len += sprintf(s + len,
+ "\n[%08x] (SLI_PKT_OUT_BP_EN_W1S): %016llx\n",
+ reg, (u64)octeon_read_csr64(oct, reg));
+ } else if (pf_num == 1) {
+ /*0x29270*/
+ reg = CN23XX_SLI_OUT_BP_EN2_W1S;
+ len += sprintf(s + len,
+ "\n[%08x] (SLI_PKT_OUT_BP_EN2_W1S): %016llx\n",
+ reg, (u64)octeon_read_csr64(oct, reg));
+ }
+
+ for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) {
+ reg = CN23XX_SLI_OQ_BUFF_INFO_SIZE(i);
+ len +=
+ sprintf(s + len, "\n[%08x] (SLI_PKT%d_OUT_SIZE): %016llx\n",
+ reg, i, (u64)octeon_read_csr64(oct, reg));
+ }
+
+ /*0x10040*/
+ for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) {
+ reg = CN23XX_SLI_IQ_INSTR_COUNT64(i);
+ len += sprintf(s + len,
+ "\n[%08x] (SLI_PKT_IN_DONE%d_CNTS): %016llx\n",
+ reg, i, (u64)octeon_read_csr64(oct, reg));
+ }
+
+ /*0x10080*/
+ for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) {
+ reg = CN23XX_SLI_OQ_PKTS_CREDIT(i);
+ len += sprintf(s + len,
+ "\n[%08x] (SLI_PKT%d_SLIST_BAOFF_DBELL): %016llx\n",
+ reg, i, (u64)octeon_read_csr64(oct, reg));
+ }
+
+ /*0x10090*/
+ for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) {
+ reg = CN23XX_SLI_OQ_SIZE(i);
+ len += sprintf(
+ s + len, "\n[%08x] (SLI_PKT%d_SLIST_FIFO_RSIZE): %016llx\n",
+ reg, i, (u64)octeon_read_csr64(oct, reg));
+ }
+
+ /*0x10050*/
+ for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) {
+ reg = CN23XX_SLI_OQ_PKT_CONTROL(i);
+ len += sprintf(
+ s + len,
+ "\n[%08x] (SLI_PKT%d__OUTPUT_CONTROL): %016llx\n",
+ reg, i, (u64)octeon_read_csr64(oct, reg));
+ }
+
+ /*0x10070*/
+ for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) {
+ reg = CN23XX_SLI_OQ_BASE_ADDR64(i);
+ len += sprintf(s + len,
+ "\n[%08x] (SLI_PKT%d_SLIST_BADDR): %016llx\n",
+ reg, i, (u64)octeon_read_csr64(oct, reg));
+ }
+
+ /*0x100a0*/
+ for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) {
+ reg = CN23XX_SLI_OQ_PKT_INT_LEVELS(i);
+ len += sprintf(s + len,
+ "\n[%08x] (SLI_PKT%d_INT_LEVELS): %016llx\n",
+ reg, i, (u64)octeon_read_csr64(oct, reg));
+ }
+
+ /*0x100b0*/
+ for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) {
+ reg = CN23XX_SLI_OQ_PKTS_SENT(i);
+ len += sprintf(s + len, "\n[%08x] (SLI_PKT%d_CNTS): %016llx\n",
+ reg, i, (u64)octeon_read_csr64(oct, reg));
+ }
+
+ /*0x100c0*/
+ for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) {
+ reg = 0x100c0 + i * CN23XX_OQ_OFFSET;
+ len += sprintf(s + len,
+ "\n[%08x] (SLI_PKT%d_ERROR_INFO): %016llx\n",
+ reg, i, (u64)octeon_read_csr64(oct, reg));
+
+ /*0x10000*/
+ for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) {
+ reg = CN23XX_SLI_IQ_PKT_CONTROL64(i);
+ len += sprintf(
+ s + len,
+ "\n[%08x] (SLI_PKT%d_INPUT_CONTROL): %016llx\n",
+ reg, i, (u64)octeon_read_csr64(oct, reg));
+ }
+
+ /*0x10010*/
+ for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) {
+ reg = CN23XX_SLI_IQ_BASE_ADDR64(i);
+ len += sprintf(
+ s + len,
+ "\n[%08x] (SLI_PKT%d_INSTR_BADDR): %016llx\n", reg,
+ i, (u64)octeon_read_csr64(oct, reg));
+ }
+
+ /*0x10020*/
+ for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) {
+ reg = CN23XX_SLI_IQ_DOORBELL(i);
+ len += sprintf(
+ s + len,
+ "\n[%08x] (SLI_PKT%d_INSTR_BAOFF_DBELL): %016llx\n",
+ reg, i, (u64)octeon_read_csr64(oct, reg));
+ }
+
+ /*0x10030*/
+ for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) {
+ reg = CN23XX_SLI_IQ_SIZE(i);
+ len += sprintf(
+ s + len,
+ "\n[%08x] (SLI_PKT%d_INSTR_FIFO_RSIZE): %016llx\n",
+ reg, i, (u64)octeon_read_csr64(oct, reg));
+ }
+
+ /*0x10040*/
+ for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++)
+ reg = CN23XX_SLI_IQ_INSTR_COUNT64(i);
+ len += sprintf(s + len,
+ "\n[%08x] (SLI_PKT_IN_DONE%d_CNTS): %016llx\n",
+ reg, i, (u64)octeon_read_csr64(oct, reg));
+ }
+
+ return len;
}
static int cn6xxx_read_csr_reg(char *s, struct octeon_device *oct)
@@ -1686,6 +2160,10 @@ static void lio_get_regs(struct net_device *dev,
regs->version = OCT_ETHTOOL_REGSVER;
switch (oct->chip_id) {
+ case OCTEON_CN23XX_PF_VID:
+ memset(regbuf, 0, OCT_ETHTOOL_REGDUMP_LEN_23XX);
+ len += cn23xx_read_csr_reg(regbuf + len, oct);
+ break;
case OCTEON_CN68XX:
case OCTEON_CN66XX:
memset(regbuf, 0, OCT_ETHTOOL_REGDUMP_LEN);
@@ -1727,6 +2205,7 @@ static const struct ethtool_ops lio_ethtool_ops = {
.get_strings = lio_get_strings,
.get_ethtool_stats = lio_get_ethtool_stats,
.get_pauseparam = lio_get_pauseparam,
+ .set_pauseparam = lio_set_pauseparam,
.get_regs_len = lio_get_regs_len,
.get_regs = lio_get_regs,
.get_msglevel = lio_get_msglevel,
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 866c075cc7ea..afc6f9dc8119 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -24,6 +24,7 @@
#include <linux/firmware.h>
#include <linux/ptp_clock_kernel.h>
#include <net/vxlan.h>
+#include <linux/kthread.h>
#include "liquidio_common.h"
#include "octeon_droq.h"
#include "octeon_iq.h"
@@ -96,6 +97,14 @@ struct liquidio_if_cfg_resp {
u64 status;
};
+struct liquidio_rx_ctl_context {
+ int octeon_id;
+
+ wait_queue_head_t wc;
+
+ int cond;
+};
+
struct oct_link_status_resp {
u64 rh;
struct oct_link_info link_info;
@@ -857,6 +866,52 @@ static void print_link_info(struct net_device *netdev)
}
/**
+ * \brief Routine to notify MTU change
+ * @param work work_struct data structure
+ */
+static void octnet_link_status_change(struct work_struct *work)
+{
+ struct cavium_wk *wk = (struct cavium_wk *)work;
+ struct lio *lio = (struct lio *)wk->ctxptr;
+
+ rtnl_lock();
+ call_netdevice_notifiers(NETDEV_CHANGEMTU, lio->netdev);
+ rtnl_unlock();
+}
+
+/**
+ * \brief Sets up the mtu status change work
+ * @param netdev network device
+ */
+static inline int setup_link_status_change_wq(struct net_device *netdev)
+{
+ struct lio *lio = GET_LIO(netdev);
+ struct octeon_device *oct = lio->oct_dev;
+
+ lio->link_status_wq.wq = alloc_workqueue("link-status",
+ WQ_MEM_RECLAIM, 0);
+ if (!lio->link_status_wq.wq) {
+ dev_err(&oct->pci_dev->dev, "unable to create cavium link status wq\n");
+ return -1;
+ }
+ INIT_DELAYED_WORK(&lio->link_status_wq.wk.work,
+ octnet_link_status_change);
+ lio->link_status_wq.wk.ctxptr = lio;
+
+ return 0;
+}
+
+static inline void cleanup_link_status_change_wq(struct net_device *netdev)
+{
+ struct lio *lio = GET_LIO(netdev);
+
+ if (lio->link_status_wq.wq) {
+ cancel_delayed_work_sync(&lio->link_status_wq.wk.work);
+ destroy_workqueue(lio->link_status_wq.wq);
+ }
+}
+
+/**
* \brief Update link status
* @param netdev network device
* @param ls link status structure
@@ -894,8 +949,6 @@ static void update_txq_status(struct octeon_device *oct, int iq_num)
struct lio *lio;
struct octeon_instr_queue *iq = oct->instr_queue[iq_num];
- /*octeon_update_iq_read_idx(oct, iq);*/
-
netdev = oct->props[iq->ifidx].netdev;
/* This is needed because the first IQ does not have
@@ -948,8 +1001,7 @@ int liquidio_schedule_msix_droq_pkt_handler(struct octeon_droq *droq, u64 ret)
* \brief Droq packet processor sceduler
* @param oct octeon device
*/
-static
-void liquidio_schedule_droq_pkt_handlers(struct octeon_device *oct)
+static void liquidio_schedule_droq_pkt_handlers(struct octeon_device *oct)
{
struct octeon_device_priv *oct_priv =
(struct octeon_device_priv *)oct->priv;
@@ -1133,6 +1185,102 @@ static int octeon_setup_interrupt(struct octeon_device *oct)
return 0;
}
+static int liquidio_watchdog(void *param)
+{
+ u64 wdog;
+ u16 mask_of_stuck_cores = 0;
+ u16 mask_of_crashed_cores = 0;
+ int core_num;
+ u8 core_is_stuck[LIO_MAX_CORES];
+ u8 core_crashed[LIO_MAX_CORES];
+ struct octeon_device *oct = param;
+
+ memset(core_is_stuck, 0, sizeof(core_is_stuck));
+ memset(core_crashed, 0, sizeof(core_crashed));
+
+ while (!kthread_should_stop()) {
+ mask_of_crashed_cores =
+ (u16)octeon_read_csr64(oct, CN23XX_SLI_SCRATCH2);
+
+ for (core_num = 0; core_num < LIO_MAX_CORES; core_num++) {
+ if (!core_is_stuck[core_num]) {
+ wdog = lio_pci_readq(oct, CIU3_WDOG(core_num));
+
+ /* look at watchdog state field */
+ wdog &= CIU3_WDOG_MASK;
+ if (wdog) {
+ /* this watchdog timer has expired */
+ core_is_stuck[core_num] =
+ LIO_MONITOR_WDOG_EXPIRE;
+ mask_of_stuck_cores |= (1 << core_num);
+ }
+ }
+
+ if (!core_crashed[core_num])
+ core_crashed[core_num] =
+ (mask_of_crashed_cores >> core_num) & 1;
+ }
+
+ if (mask_of_stuck_cores) {
+ for (core_num = 0; core_num < LIO_MAX_CORES;
+ core_num++) {
+ if (core_is_stuck[core_num] == 1) {
+ dev_err(&oct->pci_dev->dev,
+ "ERROR: Octeon core %d is stuck!\n",
+ core_num);
+ /* 2 means we have printk'd an error
+ * so no need to repeat the same printk
+ */
+ core_is_stuck[core_num] =
+ LIO_MONITOR_CORE_STUCK_MSGD;
+ }
+ }
+ }
+
+ if (mask_of_crashed_cores) {
+ for (core_num = 0; core_num < LIO_MAX_CORES;
+ core_num++) {
+ if (core_crashed[core_num] == 1) {
+ dev_err(&oct->pci_dev->dev,
+ "ERROR: Octeon core %d crashed! See oct-fwdump for details.\n",
+ core_num);
+ /* 2 means we have printk'd an error
+ * so no need to repeat the same printk
+ */
+ core_crashed[core_num] =
+ LIO_MONITOR_CORE_STUCK_MSGD;
+ }
+ }
+ }
+#ifdef CONFIG_MODULE_UNLOAD
+ if (mask_of_stuck_cores || mask_of_crashed_cores) {
+ /* make module refcount=0 so that rmmod will work */
+ long refcount;
+
+ refcount = module_refcount(THIS_MODULE);
+
+ while (refcount > 0) {
+ module_put(THIS_MODULE);
+ refcount = module_refcount(THIS_MODULE);
+ }
+
+ /* compensate for and withstand an unlikely (but still
+ * possible) race condition
+ */
+ while (refcount < 0) {
+ try_module_get(THIS_MODULE);
+ refcount = module_refcount(THIS_MODULE);
+ }
+ }
+#endif
+ /* sleep for two seconds */
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(2 * HZ);
+ }
+
+ return 0;
+}
+
/**
* \brief PCI probe handler
* @param pdev PCI device structure
@@ -1178,6 +1326,30 @@ liquidio_probe(struct pci_dev *pdev,
return -ENOMEM;
}
+ if (OCTEON_CN23XX_PF(oct_dev)) {
+ u64 scratch1;
+ u8 bus, device, function;
+
+ scratch1 = octeon_read_csr64(oct_dev, CN23XX_SLI_SCRATCH1);
+ if (!(scratch1 & 4ULL)) {
+ /* Bit 2 of SLI_SCRATCH_1 is a flag that indicates that
+ * the lio watchdog kernel thread is running for this
+ * NIC. Each NIC gets one watchdog kernel thread.
+ */
+ scratch1 |= 4ULL;
+ octeon_write_csr64(oct_dev, CN23XX_SLI_SCRATCH1,
+ scratch1);
+
+ bus = pdev->bus->number;
+ device = PCI_SLOT(pdev->devfn);
+ function = PCI_FUNC(pdev->devfn);
+ oct_dev->watchdog_task = kthread_create(
+ liquidio_watchdog, oct_dev,
+ "liowd/%02hhx:%02hhx.%hhx", bus, device, function);
+ wake_up_process(oct_dev->watchdog_task);
+ }
+ }
+
oct_dev->rx_pause = 1;
oct_dev->tx_pause = 1;
@@ -1294,16 +1466,15 @@ static void octeon_destroy_resources(struct octeon_device *oct)
octeon_delete_response_list(oct);
/* fallthrough */
- case OCT_DEV_SC_BUFF_POOL_INIT_DONE:
- octeon_free_sc_buffer_pool(oct);
-
- /* fallthrough */
case OCT_DEV_INSTR_QUEUE_INIT_DONE:
for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
- if (!(oct->io_qmask.iq & (1ULL << i)))
+ if (!(oct->io_qmask.iq & BIT_ULL(i)))
continue;
octeon_delete_instr_queue(oct, i);
}
+ /* fallthrough */
+ case OCT_DEV_SC_BUFF_POOL_INIT_DONE:
+ octeon_free_sc_buffer_pool(oct);
/* fallthrough */
case OCT_DEV_DISPATCH_INIT_DONE:
@@ -1332,23 +1503,89 @@ static void octeon_destroy_resources(struct octeon_device *oct)
}
/**
+ * \brief Callback for rx ctrl
+ * @param status status of request
+ * @param buf pointer to resp structure
+ */
+static void rx_ctl_callback(struct octeon_device *oct,
+ u32 status,
+ void *buf)
+{
+ struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
+ struct liquidio_rx_ctl_context *ctx;
+
+ ctx = (struct liquidio_rx_ctl_context *)sc->ctxptr;
+
+ oct = lio_get_device(ctx->octeon_id);
+ if (status)
+ dev_err(&oct->pci_dev->dev, "rx ctl instruction failed. Status: %llx\n",
+ CVM_CAST64(status));
+ WRITE_ONCE(ctx->cond, 1);
+
+ /* This barrier is required to be sure that the response has been
+ * written fully before waking up the handler
+ */
+ wmb();
+
+ wake_up_interruptible(&ctx->wc);
+}
+
+/**
* \brief Send Rx control command
* @param lio per-network private data
* @param start_stop whether to start or stop
*/
static void send_rx_ctrl_cmd(struct lio *lio, int start_stop)
{
- struct octnic_ctrl_pkt nctrl;
+ struct octeon_soft_command *sc;
+ struct liquidio_rx_ctl_context *ctx;
+ union octnet_cmd *ncmd;
+ int ctx_size = sizeof(struct liquidio_rx_ctl_context);
+ struct octeon_device *oct = (struct octeon_device *)lio->oct_dev;
+ int retval;
- memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+ if (oct->props[lio->ifidx].rx_on == start_stop)
+ return;
- nctrl.ncmd.s.cmd = OCTNET_CMD_RX_CTL;
- nctrl.ncmd.s.param1 = start_stop;
- nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
- nctrl.netpndev = (u64)lio->netdev;
+ sc = (struct octeon_soft_command *)
+ octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE,
+ 16, ctx_size);
+
+ ncmd = (union octnet_cmd *)sc->virtdptr;
+ ctx = (struct liquidio_rx_ctl_context *)sc->ctxptr;
+
+ WRITE_ONCE(ctx->cond, 0);
+ ctx->octeon_id = lio_get_device_id(oct);
+ init_waitqueue_head(&ctx->wc);
+
+ ncmd->u64 = 0;
+ ncmd->s.cmd = OCTNET_CMD_RX_CTL;
+ ncmd->s.param1 = start_stop;
+
+ octeon_swap_8B_data((u64 *)ncmd, (OCTNET_CMD_SIZE >> 3));
+
+ sc->iq_no = lio->linfo.txpciq[0].s.q_no;
+
+ octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
+ OPCODE_NIC_CMD, 0, 0, 0);
- if (octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl) < 0)
+ sc->callback = rx_ctl_callback;
+ sc->callback_arg = sc;
+ sc->wait_time = 5000;
+
+ retval = octeon_send_soft_command(oct, sc);
+ if (retval == IQ_SEND_FAILED) {
netif_info(lio, rx_err, lio->netdev, "Failed to send RX Control message\n");
+ } else {
+ /* Sleep on a wait queue till the cond flag indicates that the
+ * response arrived or timed-out.
+ */
+ if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR)
+ return;
+ oct->props[lio->ifidx].rx_on = start_stop;
+ }
+
+ octeon_free_soft_command(oct, sc);
}
/**
@@ -1375,21 +1612,24 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
dev_dbg(&oct->pci_dev->dev, "NIC device cleanup\n");
- send_rx_ctrl_cmd(lio, 0);
-
if (atomic_read(&lio->ifstate) & LIO_IFSTATE_RUNNING)
- txqs_stop(netdev);
+ liquidio_stop(netdev);
if (oct->props[lio->ifidx].napi_enabled == 1) {
list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
napi_disable(napi);
oct->props[lio->ifidx].napi_enabled = 0;
+
+ if (OCTEON_CN23XX_PF(oct))
+ oct->droq[0]->ops.poll_mode = 0;
}
if (atomic_read(&lio->ifstate) & LIO_IFSTATE_REGISTERED)
unregister_netdev(netdev);
+ cleanup_link_status_change_wq(netdev);
+
delete_glists(lio);
free_netdev(netdev);
@@ -1442,6 +1682,9 @@ static void liquidio_remove(struct pci_dev *pdev)
dev_dbg(&oct_dev->pci_dev->dev, "Stopping device\n");
+ if (oct_dev->watchdog_task)
+ kthread_stop(oct_dev->watchdog_task);
+
if (oct_dev->app_mode && (oct_dev->app_mode == CVM_DRV_NIC_APP))
liquidio_stop_nic_module(oct_dev);
@@ -1941,7 +2184,7 @@ static void if_cfg_callback(struct octeon_device *oct,
struct liquidio_if_cfg_context *ctx;
resp = (struct liquidio_if_cfg_resp *)sc->virtrptr;
- ctx = (struct liquidio_if_cfg_context *)sc->ctxptr;
+ ctx = (struct liquidio_if_cfg_context *)sc->ctxptr;
oct = lio_get_device(ctx->octeon_id);
if (resp->status)
@@ -2134,11 +2377,14 @@ static void napi_schedule_wrapper(void *param)
*/
static void liquidio_napi_drv_callback(void *arg)
{
+ struct octeon_device *oct;
struct octeon_droq *droq = arg;
int this_cpu = smp_processor_id();
- if (droq->cpu_id == this_cpu) {
- napi_schedule(&droq->napi);
+ oct = droq->oct_dev;
+
+ if (OCTEON_CN23XX_PF(oct) || droq->cpu_id == this_cpu) {
+ napi_schedule_irqoff(&droq->napi);
} else {
struct call_single_data *csd = &droq->csd;
@@ -2267,6 +2513,14 @@ static inline int setup_io_queues(struct octeon_device *octeon_dev,
octeon_register_droq_ops(octeon_dev, q_no, &droq_ops);
}
+ if (OCTEON_CN23XX_PF(octeon_dev)) {
+ /* 23XX PF can receive control messages (via the first PF-owned
+ * droq) from the firmware even if the ethX interface is down,
+ * so that's why poll_mode must be off for the first droq.
+ */
+ octeon_dev->droq[0]->ops.poll_mode = 0;
+ }
+
/* set up IQs. */
for (q = 0; q < lio->linfo.num_txpciq; q++) {
num_tx_descs = CFG_GET_NUM_TX_DESCS_NIC_IF(octeon_get_conf
@@ -2351,12 +2605,20 @@ static int liquidio_open(struct net_device *netdev)
napi_enable(napi);
oct->props[lio->ifidx].napi_enabled = 1;
+
+ if (OCTEON_CN23XX_PF(oct))
+ oct->droq[0]->ops.poll_mode = 1;
}
oct_ptp_open(netdev);
ifstate_set(lio, LIO_IFSTATE_RUNNING);
+ /* Ready for link status updates */
+ lio->intf_open = 1;
+
+ netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n");
+
if (OCTEON_CN23XX_PF(oct)) {
if (!oct->msix_on)
if (setup_tx_poll_fn(netdev))
@@ -2368,14 +2630,9 @@ static int liquidio_open(struct net_device *netdev)
start_txq(netdev);
- netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n");
-
/* tell Octeon to start forwarding packets to host */
send_rx_ctrl_cmd(lio, 1);
- /* Ready for link status updates */
- lio->intf_open = 1;
-
dev_info(&oct->pci_dev->dev, "%s interface is opened\n",
netdev->name);
@@ -2795,9 +3052,15 @@ static inline int send_nic_timestamp_pkt(struct octeon_device *oct,
sc->callback_arg = finfo->skb;
sc->iq_no = ndata->q_no;
- len = (u32)((struct octeon_instr_ih2 *)(&sc->cmd.cmd2.ih2))->dlengsz;
+ if (OCTEON_CN23XX_PF(oct))
+ len = (u32)((struct octeon_instr_ih3 *)
+ (&sc->cmd.cmd3.ih3))->dlengsz;
+ else
+ len = (u32)((struct octeon_instr_ih2 *)
+ (&sc->cmd.cmd2.ih2))->dlengsz;
ring_doorbell = 1;
+
retval = octeon_send_command(oct, sc->iq_no, ring_doorbell, &sc->cmd,
sc, len, ndata->reqtype);
@@ -2929,7 +3192,10 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
return NETDEV_TX_BUSY;
}
- ndata.cmd.cmd2.dptr = dptr;
+ if (OCTEON_CN23XX_PF(oct))
+ ndata.cmd.cmd3.dptr = dptr;
+ else
+ ndata.cmd.cmd2.dptr = dptr;
finfo->dptr = dptr;
ndata.reqtype = REQTYPE_NORESP_NET;
@@ -3004,15 +3270,23 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
g->sg_size, DMA_TO_DEVICE);
dptr = g->sg_dma_ptr;
- ndata.cmd.cmd2.dptr = dptr;
+ if (OCTEON_CN23XX_PF(oct))
+ ndata.cmd.cmd3.dptr = dptr;
+ else
+ ndata.cmd.cmd2.dptr = dptr;
finfo->dptr = dptr;
finfo->g = g;
ndata.reqtype = REQTYPE_NORESP_NET_SG;
}
- irh = (struct octeon_instr_irh *)&ndata.cmd.cmd2.irh;
- tx_info = (union tx_info *)&ndata.cmd.cmd2.ossp[0];
+ if (OCTEON_CN23XX_PF(oct)) {
+ irh = (struct octeon_instr_irh *)&ndata.cmd.cmd3.irh;
+ tx_info = (union tx_info *)&ndata.cmd.cmd3.ossp[0];
+ } else {
+ irh = (struct octeon_instr_irh *)&ndata.cmd.cmd2.irh;
+ tx_info = (union tx_info *)&ndata.cmd.cmd2.ossp[0];
+ }
if (skb_shinfo(skb)->gso_size) {
tx_info->s.gso_size = skb_shinfo(skb)->gso_size;
@@ -3505,7 +3779,11 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
/* Sleep on a wait queue till the cond flag indicates that the
* response arrived or timed-out.
*/
- sleep_cond(&ctx->wc, &ctx->cond);
+ if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR) {
+ dev_err(&octeon_dev->pci_dev->dev, "Wait interrupted\n");
+ goto setup_nic_wait_intr;
+ }
+
retval = resp->status;
if (retval) {
dev_err(&octeon_dev->pci_dev->dev, "iq/oq config failed\n");
@@ -3656,7 +3934,10 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
/* Register ethtool support */
liquidio_set_ethtool_ops(netdev);
- octeon_dev->priv_flags = 0x0;
+ if (lio->oct_dev->chip_id == OCTEON_CN23XX_PF_VID)
+ octeon_dev->priv_flags = OCT_PRIV_FLAG_DEFAULT;
+ else
+ octeon_dev->priv_flags = 0x0;
if (netdev->features & NETIF_F_LRO)
liquidio_set_feature(netdev, OCTNET_CMD_LRO_ENABLE,
@@ -3668,6 +3949,9 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
liquidio_set_feature(netdev,
OCTNET_CMD_VERBOSE_ENABLE, 0);
+ if (setup_link_status_change_wq(netdev))
+ goto setup_nic_dev_fail;
+
/* Register the network device with the OS */
if (register_netdev(netdev)) {
dev_err(&octeon_dev->pci_dev->dev, "Device registration failed\n");
@@ -3703,6 +3987,8 @@ setup_nic_dev_fail:
octeon_free_soft_command(octeon_dev, sc);
+setup_nic_wait_intr:
+
while (i--) {
dev_err(&octeon_dev->pci_dev->dev,
"NIC ifidx:%d Setup failed\n", i);
@@ -3732,8 +4018,7 @@ static int liquidio_init_nic_module(struct octeon_device *oct)
/* run port_config command for each port */
oct->ifcount = num_nic_ports;
- memset(oct->props, 0,
- sizeof(struct octdev_props) * num_nic_ports);
+ memset(oct->props, 0, sizeof(struct octdev_props) * num_nic_ports);
for (i = 0; i < MAX_OCTEON_LINKS; i++)
oct->props[i].gmxport = -1;
@@ -3749,7 +4034,7 @@ static int liquidio_init_nic_module(struct octeon_device *oct)
/* Initialize interrupt moderation params */
intrmod_cfg = &((struct octeon_device *)oct)->intrmod;
intrmod_cfg->rx_enable = 1;
- intrmod_cfg->check_intrvl = LIO_INTRMOD_CHECK_INTERVAL;
+ intrmod_cfg->check_intrvl = LIO_INTRMOD_CHECK_INTERVAL;
intrmod_cfg->maxpkt_ratethr = LIO_INTRMOD_MAXPKT_RATETHR;
intrmod_cfg->minpkt_ratethr = LIO_INTRMOD_MINPKT_RATETHR;
intrmod_cfg->rx_maxcnt_trigger = LIO_INTRMOD_RXMAXCNT_TRIGGER;
@@ -3761,6 +4046,7 @@ static int liquidio_init_nic_module(struct octeon_device *oct)
intrmod_cfg->tx_mincnt_trigger = LIO_INTRMOD_TXMINCNT_TRIGGER;
intrmod_cfg->rx_frames = CFG_GET_OQ_INTR_PKT(octeon_get_conf(oct));
intrmod_cfg->rx_usecs = CFG_GET_OQ_INTR_TIME(octeon_get_conf(oct));
+ intrmod_cfg->tx_frames = CFG_GET_IQ_INTR_PKT(octeon_get_conf(oct));
dev_dbg(&oct->pci_dev->dev, "Network interfaces ready\n");
return retval;
diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
index 8ffd3b8c4d0f..0d990accb65e 100644
--- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
+++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
@@ -232,6 +232,9 @@ static inline void add_sg_size(struct octeon_sg_entry *sg_entry,
#define OCTNET_CMD_ADD_VLAN_FILTER 0x17
#define OCTNET_CMD_DEL_VLAN_FILTER 0x18
#define OCTNET_CMD_VXLAN_PORT_CONFIG 0x19
+
+#define OCTNET_CMD_ID_ACTIVE 0x1a
+
#define OCTNET_CMD_VXLAN_PORT_ADD 0x0
#define OCTNET_CMD_VXLAN_PORT_DEL 0x1
#define OCTNET_CMD_RXCSUM_ENABLE 0x0
@@ -310,6 +313,13 @@ union octnet_cmd {
#define OCTNET_CMD_SIZE (sizeof(union octnet_cmd))
+/*pkiih3 + irh + ossp[0] + ossp[1] + rdp + rptr = 40 bytes */
+#define LIO_SOFTCMDRESP_IH2 40
+#define LIO_SOFTCMDRESP_IH3 (40 + 8)
+
+#define LIO_PCICMD_O2 24
+#define LIO_PCICMD_O3 (24 + 8)
+
/* Instruction Header(DPI) - for OCTEON-III models */
struct octeon_instr_ih3 {
#ifdef __BIG_ENDIAN_BITFIELD
@@ -828,6 +838,8 @@ struct oct_link_stats {
#define VITESSE_PHY_GPIO_DRIVEOFF 0x4
#define VITESSE_PHY_GPIO_HIGH 0x2
#define VITESSE_PHY_GPIO_LOW 0x3
+#define LED_IDENTIFICATION_ON 0x1
+#define LED_IDENTIFICATION_OFF 0x0
struct oct_mdio_cmd {
u64 op;
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
index 52527638d413..586b68899b06 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
@@ -793,7 +793,6 @@ int octeon_setup_instr_queues(struct octeon_device *oct)
union oct_txpciq txpciq;
int numa_node = cpu_to_node(iq_no % num_online_cpus());
- /* this causes queue 0 to be default queue */
if (OCTEON_CN6XXX(oct))
num_descs =
CFG_GET_NUM_DEF_TX_DESCS(CHIP_FIELD(oct, cn6xxx, conf));
@@ -816,6 +815,7 @@ int octeon_setup_instr_queues(struct octeon_device *oct)
oct->instr_queue[0]->ifidx = 0;
txpciq.u64 = 0;
txpciq.s.q_no = iq_no;
+ txpciq.s.pkind = oct->pfvf_hsword.pkind;
txpciq.s.use_qpg = 0;
txpciq.s.qpg = 0;
if (octeon_init_instr_queue(oct, txpciq, num_descs)) {
@@ -835,7 +835,6 @@ int octeon_setup_output_queues(struct octeon_device *oct)
u32 oq_no = 0;
int numa_node = cpu_to_node(oq_no % num_online_cpus());
- /* this causes queue 0 to be default queue */
if (OCTEON_CN6XXX(oct)) {
num_descs =
CFG_GET_NUM_DEF_RX_DESCS(CHIP_FIELD(oct, cn6xxx, conf));
@@ -863,10 +862,10 @@ int octeon_setup_output_queues(struct octeon_device *oct)
void octeon_set_io_queues_off(struct octeon_device *oct)
{
- /* Disable the i/p and o/p queues for this Octeon. */
-
- octeon_write_csr(oct, CN6XXX_SLI_PKT_INSTR_ENB, 0);
- octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_ENB, 0);
+ if (OCTEON_CN6XXX(oct)) {
+ octeon_write_csr(oct, CN6XXX_SLI_PKT_INSTR_ENB, 0);
+ octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_ENB, 0);
+ }
}
void octeon_set_droq_pkt_op(struct octeon_device *oct,
@@ -876,14 +875,16 @@ void octeon_set_droq_pkt_op(struct octeon_device *oct,
u32 reg_val = 0;
/* Disable the i/p and o/p queues for this Octeon. */
- reg_val = octeon_read_csr(oct, CN6XXX_SLI_PKT_OUT_ENB);
+ if (OCTEON_CN6XXX(oct)) {
+ reg_val = octeon_read_csr(oct, CN6XXX_SLI_PKT_OUT_ENB);
- if (enable)
- reg_val = reg_val | (1 << q_no);
- else
- reg_val = reg_val & (~(1 << q_no));
+ if (enable)
+ reg_val = reg_val | (1 << q_no);
+ else
+ reg_val = reg_val & (~(1 << q_no));
- octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_ENB, reg_val);
+ octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_ENB, reg_val);
+ }
}
int octeon_init_dispatch_list(struct octeon_device *oct)
@@ -1100,6 +1101,12 @@ int octeon_core_drv_init(struct octeon_recv_info *recv_info, void *buf)
}
oct->fw_info.app_cap_flags = recv_pkt->rh.r_core_drv_init.app_cap_flags;
oct->fw_info.app_mode = (u32)recv_pkt->rh.r_core_drv_init.app_mode;
+ oct->pfvf_hsword.app_mode = (u32)recv_pkt->rh.r_core_drv_init.app_mode;
+
+ oct->pfvf_hsword.pkind = recv_pkt->rh.r_core_drv_init.pkind;
+
+ for (i = 0; i < oct->num_iqs; i++)
+ oct->instr_queue[i]->txpciq.s.pkind = oct->pfvf_hsword.pkind;
atomic_set(&oct->status, OCT_DEV_CORE_OK);
@@ -1294,17 +1301,36 @@ int lio_get_device_id(void *dev)
void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq)
{
+ u64 instr_cnt;
+ struct octeon_device *oct = NULL;
+
/* the whole thing needs to be atomic, ideally */
if (droq) {
spin_lock_bh(&droq->lock);
writel(droq->pkt_count, droq->pkts_sent_reg);
droq->pkt_count = 0;
spin_unlock_bh(&droq->lock);
+ oct = droq->oct_dev;
}
if (iq) {
spin_lock_bh(&iq->lock);
writel(iq->pkt_in_done, iq->inst_cnt_reg);
iq->pkt_in_done = 0;
spin_unlock_bh(&iq->lock);
+ oct = iq->oct_dev;
+ }
+ /*write resend. Writing RESEND in SLI_PKTX_CNTS should be enough
+ *to trigger tx interrupts as well, if they are pending.
+ */
+ if (oct && OCTEON_CN23XX_PF(oct)) {
+ if (droq)
+ writeq(CN23XX_INTR_RESEND, droq->pkts_sent_reg);
+ /*we race with firmrware here. read and write the IN_DONE_CNTS*/
+ else if (iq) {
+ instr_cnt = readq(iq->inst_cnt_reg);
+ writeq(((instr_cnt & 0xFFFFFFFF00000000ULL) |
+ CN23XX_INTR_RESEND),
+ iq->inst_cnt_reg);
+ }
}
}
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
index 99fc1d899208..da15c2ae9330 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
@@ -275,6 +275,7 @@ struct octdev_props {
/* Each interface in the Octeon device has a network
* device pointer (used for OS specific calls).
*/
+ int rx_on;
int napi_enabled;
int gmxport;
struct net_device *netdev;
@@ -483,6 +484,8 @@ struct octeon_device {
/* private flags to control driver-specific features through ethtool */
u32 priv_flags;
+
+ void *watchdog_task;
};
#define OCT_DRV_ONLINE 1
@@ -752,8 +755,15 @@ enum {
OCT_PRIV_FLAG_TX_BYTES = 0, /* Tx interrupts by pending byte count */
};
-static inline void lio_set_priv_flag(struct octeon_device *octdev, u32 flag,
- u32 val)
+#define OCT_PRIV_FLAG_DEFAULT 0x0
+
+static inline u32 lio_get_priv_flag(struct octeon_device *octdev, u32 flag)
+{
+ return !!(octdev->priv_flags & (0x1 << flag));
+}
+
+static inline void lio_set_priv_flag(struct octeon_device *octdev,
+ u32 flag, u32 val)
{
if (val)
octdev->priv_flags |= (0x1 << flag);
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
index 5dfc23d70d05..f60e5320daf4 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
@@ -31,6 +31,7 @@
#include "octeon_network.h"
#include "cn66xx_regs.h"
#include "cn66xx_device.h"
+#include "cn23xx_pf_device.h"
#define CVM_MIN(d1, d2) (((d1) < (d2)) ? (d1) : (d2))
#define CVM_MAX(d1, d2) (((d1) > (d2)) ? (d1) : (d2))
@@ -262,6 +263,11 @@ int octeon_init_droq(struct octeon_device *oct,
c_pkts_per_intr = (u32)CFG_GET_OQ_PKTS_PER_INTR(conf6x);
c_refill_threshold =
(u32)CFG_GET_OQ_REFILL_THRESHOLD(conf6x);
+ } else if (OCTEON_CN23XX_PF(oct)) {
+ struct octeon_config *conf23 = CHIP_FIELD(oct, cn23xx_pf, conf);
+
+ c_pkts_per_intr = (u32)CFG_GET_OQ_PKTS_PER_INTR(conf23);
+ c_refill_threshold = (u32)CFG_GET_OQ_REFILL_THRESHOLD(conf23);
} else {
return 1;
}
@@ -567,7 +573,7 @@ octeon_droq_dispatch_pkt(struct octeon_device *oct,
(unsigned int)rh->r.opcode,
(unsigned int)rh->r.subcode);
droq->stats.dropped_nodispatch++;
- } /* else (dispatch_fn ... */
+ }
return cnt;
}
@@ -881,8 +887,11 @@ octeon_process_droq_poll_cmd(struct octeon_device *oct, u32 q_no, int cmd,
return 0;
}
break;
+ case OCTEON_CN23XX_PF_VID: {
+ lio_enable_irq(oct->droq[q_no], oct->instr_queue[q_no]);
+ }
+ break;
}
-
return 0;
}
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_main.h b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
index ebeef95ed9b0..366298f7bcb2 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_main.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
@@ -181,22 +181,26 @@ cnnic_numa_alloc_aligned_dma(u32 size,
#define cnnic_free_aligned_dma(pci_dev, ptr, size, orig_ptr, dma_addr) \
free_pages(orig_ptr, get_order(size))
-static inline void
+static inline int
sleep_cond(wait_queue_head_t *wait_queue, int *condition)
{
+ int errno = 0;
wait_queue_t we;
init_waitqueue_entry(&we, current);
add_wait_queue(wait_queue, &we);
while (!(READ_ONCE(*condition))) {
set_current_state(TASK_INTERRUPTIBLE);
- if (signal_pending(current))
+ if (signal_pending(current)) {
+ errno = -EINTR;
goto out;
+ }
schedule();
}
out:
set_current_state(TASK_RUNNING);
remove_wait_queue(wait_queue, &we);
+ return errno;
}
static inline void
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index 7cfbdf9b039c..e5d1debd05ad 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -122,11 +122,21 @@ struct lio {
/* work queue for txq status */
struct cavium_wq txq_status_wq;
+
+ /* work queue for link status */
+ struct cavium_wq link_status_wq;
+
};
#define LIO_SIZE (sizeof(struct lio))
#define GET_LIO(netdev) ((struct lio *)netdev_priv(netdev))
+#define CIU3_WDOG(c) (0x1010000020000ULL + (c << 3))
+#define CIU3_WDOG_MASK 12ULL
+#define LIO_MONITOR_WDOG_EXPIRE 1
+#define LIO_MONITOR_CORE_STUCK_MSGD 2
+#define LIO_MAX_CORES 12
+
/**
* \brief Enable or disable feature
* @param netdev pointer to network device
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c
index 0c4013de599a..40ac1fe88956 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c
@@ -35,6 +35,7 @@ octeon_alloc_soft_command_resp(struct octeon_device *oct,
u32 rdatasize)
{
struct octeon_soft_command *sc;
+ struct octeon_instr_ih3 *ih3;
struct octeon_instr_ih2 *ih2;
struct octeon_instr_irh *irh;
struct octeon_instr_rdp *rdp;
@@ -51,10 +52,19 @@ octeon_alloc_soft_command_resp(struct octeon_device *oct,
/* Add in the response related fields. Opcode and Param are already
* there.
*/
- ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
- rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
- irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
- ih2->fsz = 40; /* irh + ossp[0] + ossp[1] + rdp + rptr = 40 bytes */
+ if (OCTEON_CN23XX_PF(oct)) {
+ ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
+ rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp;
+ irh = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
+ /*pkiih3 + irh + ossp[0] + ossp[1] + rdp + rptr = 40 bytes */
+ ih3->fsz = LIO_SOFTCMDRESP_IH3;
+ } else {
+ ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
+ rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
+ irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
+ /* irh + ossp[0] + ossp[1] + rdp + rptr = 40 bytes */
+ ih2->fsz = LIO_SOFTCMDRESP_IH2;
+ }
irh->rflag = 1; /* a response is required */
@@ -63,7 +73,10 @@ octeon_alloc_soft_command_resp(struct octeon_device *oct,
*sc->status_word = COMPLETION_WORD_INIT;
- sc->cmd.cmd2.rptr = sc->dmarptr;
+ if (OCTEON_CN23XX_PF(oct))
+ sc->cmd.cmd3.rptr = sc->dmarptr;
+ else
+ sc->cmd.cmd2.rptr = sc->dmarptr;
sc->wait_time = 1000;
sc->timeout = jiffies + sc->wait_time;
@@ -179,8 +192,8 @@ octnet_send_nic_ctrl_pkt(struct octeon_device *oct,
retval = octeon_send_soft_command(oct, sc);
if (retval == IQ_SEND_FAILED) {
octeon_free_soft_command(oct, sc);
- dev_err(&oct->pci_dev->dev, "%s soft command:%d send failed status: %x\n",
- __func__, nctrl->ncmd.s.cmd, retval);
+ dev_err(&oct->pci_dev->dev, "%s pf_num:%d soft command:%d send failed status: %x\n",
+ __func__, oct->pf_num, nctrl->ncmd.s.cmd, retval);
spin_unlock_bh(&oct->cmd_resp_wqlock);
return -1;
}
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.h b/drivers/net/ethernet/cavium/liquidio/octeon_nic.h
index e55400c91574..4b8da67b995f 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.h
@@ -138,7 +138,7 @@ octnet_prepare_pci_cmd_o2(struct octeon_device *oct,
/* assume that rflag is cleared so therefore front data will only have
* irh and ossp[0], ossp[1] for a total of 32 bytes
*/
- ih2->fsz = 24;
+ ih2->fsz = LIO_PCICMD_O2;
ih2->tagtype = ORDERED_TAG;
ih2->grp = DEFAULT_POW_GRP;
@@ -196,7 +196,7 @@ octnet_prepare_pci_cmd_o3(struct octeon_device *oct,
*/
ih3->pkind = oct->instr_queue[setup->s.iq_no]->txpciq.s.pkind;
/*PKI IH*/
- ih3->fsz = 24 + 8;
+ ih3->fsz = LIO_PCICMD_O3;
if (!setup->s.gather) {
ih3->dlengsz = setup->s.u.datasize;
diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c
index 3ee5d023c789..90866bb50033 100644
--- a/drivers/net/ethernet/cavium/liquidio/request_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c
@@ -30,6 +30,7 @@
#include "octeon_main.h"
#include "octeon_network.h"
#include "cn66xx_device.h"
+#include "cn23xx_pf_device.h"
#define INCR_INSTRQUEUE_PKT_COUNT(octeon_dev_ptr, iq_no, field, count) \
(octeon_dev_ptr->instr_queue[iq_no]->stats.field += count)
@@ -71,7 +72,8 @@ int octeon_init_instr_queue(struct octeon_device *oct,
if (OCTEON_CN6XXX(oct))
conf = &(CFG_GET_IQ_CFG(CHIP_FIELD(oct, cn6xxx, conf)));
-
+ else if (OCTEON_CN23XX_PF(oct))
+ conf = &(CFG_GET_IQ_CFG(CHIP_FIELD(oct, cn23xx_pf, conf)));
if (!conf) {
dev_err(&oct->pci_dev->dev, "Unsupported Chip %x\n",
oct->chip_id);
@@ -88,6 +90,7 @@ int octeon_init_instr_queue(struct octeon_device *oct,
q_size = (u32)conf->instr_type * num_descs;
iq = oct->instr_queue[iq_no];
+
iq->oct_dev = oct;
set_dev_node(&oct->pci_dev->dev, numa_node);
@@ -181,6 +184,9 @@ int octeon_delete_instr_queue(struct octeon_device *oct, u32 iq_no)
if (OCTEON_CN6XXX(oct))
desc_size =
CFG_GET_IQ_INSTR_TYPE(CHIP_FIELD(oct, cn6xxx, conf));
+ else if (OCTEON_CN23XX_PF(oct))
+ desc_size =
+ CFG_GET_IQ_INSTR_TYPE(CHIP_FIELD(oct, cn23xx_pf, conf));
vfree(iq->request_list);
@@ -383,7 +389,12 @@ lio_process_iq_request_list(struct octeon_device *oct,
case REQTYPE_SOFT_COMMAND:
sc = buf;
- irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
+ if (OCTEON_CN23XX_PF(oct))
+ irh = (struct octeon_instr_irh *)
+ &sc->cmd.cmd3.irh;
+ else
+ irh = (struct octeon_instr_irh *)
+ &sc->cmd.cmd2.irh;
if (irh->rflag) {
/* We're expecting a response from Octeon.
* It's up to lio_process_ordered_list() to
@@ -583,6 +594,8 @@ octeon_prepare_soft_command(struct octeon_device *oct,
{
struct octeon_config *oct_cfg;
struct octeon_instr_ih2 *ih2;
+ struct octeon_instr_ih3 *ih3;
+ struct octeon_instr_pki_ih3 *pki_ih3;
struct octeon_instr_irh *irh;
struct octeon_instr_rdp *rdp;
@@ -591,36 +604,88 @@ octeon_prepare_soft_command(struct octeon_device *oct,
oct_cfg = octeon_get_conf(oct);
- ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
- ih2->tagtype = ATOMIC_TAG;
- ih2->tag = LIO_CONTROL;
- ih2->raw = 1;
- ih2->grp = CFG_GET_CTRL_Q_GRP(oct_cfg);
-
- if (sc->datasize) {
- ih2->dlengsz = sc->datasize;
- ih2->rs = 1;
- }
-
- irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
- irh->opcode = opcode;
- irh->subcode = subcode;
-
- /* opcode/subcode specific parameters (ossp) */
- irh->ossp = irh_ossp;
- sc->cmd.cmd2.ossp[0] = ossp0;
- sc->cmd.cmd2.ossp[1] = ossp1;
-
- if (sc->rdatasize) {
- rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
- rdp->pcie_port = oct->pcie_port;
- rdp->rlen = sc->rdatasize;
+ if (OCTEON_CN23XX_PF(oct)) {
+ ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
+
+ ih3->pkind = oct->instr_queue[sc->iq_no]->txpciq.s.pkind;
+
+ pki_ih3 = (struct octeon_instr_pki_ih3 *)&sc->cmd.cmd3.pki_ih3;
+
+ pki_ih3->w = 1;
+ pki_ih3->raw = 1;
+ pki_ih3->utag = 1;
+ pki_ih3->uqpg =
+ oct->instr_queue[sc->iq_no]->txpciq.s.use_qpg;
+ pki_ih3->utt = 1;
+ pki_ih3->tag = LIO_CONTROL;
+ pki_ih3->tagtype = ATOMIC_TAG;
+ pki_ih3->qpg =
+ oct->instr_queue[sc->iq_no]->txpciq.s.qpg;
+ pki_ih3->pm = 0x7;
+ pki_ih3->sl = 8;
+
+ if (sc->datasize)
+ ih3->dlengsz = sc->datasize;
+
+ irh = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
+ irh->opcode = opcode;
+ irh->subcode = subcode;
+
+ /* opcode/subcode specific parameters (ossp) */
+ irh->ossp = irh_ossp;
+ sc->cmd.cmd3.ossp[0] = ossp0;
+ sc->cmd.cmd3.ossp[1] = ossp1;
+
+ if (sc->rdatasize) {
+ rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp;
+ rdp->pcie_port = oct->pcie_port;
+ rdp->rlen = sc->rdatasize;
+
+ irh->rflag = 1;
+ /*PKI IH3*/
+ /* pki_ih3 irh+ossp[0]+ossp[1]+rdp+rptr = 48 bytes */
+ ih3->fsz = LIO_SOFTCMDRESP_IH3;
+ } else {
+ irh->rflag = 0;
+ /*PKI IH3*/
+ /* pki_h3 + irh + ossp[0] + ossp[1] = 32 bytes */
+ ih3->fsz = LIO_PCICMD_O3;
+ }
- irh->rflag = 1;
- ih2->fsz = 40; /* irh+ossp[0]+ossp[1]+rdp+rptr = 40 bytes */
} else {
- irh->rflag = 0;
- ih2->fsz = 24; /* irh + ossp[0] + ossp[1] = 24 bytes */
+ ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
+ ih2->tagtype = ATOMIC_TAG;
+ ih2->tag = LIO_CONTROL;
+ ih2->raw = 1;
+ ih2->grp = CFG_GET_CTRL_Q_GRP(oct_cfg);
+
+ if (sc->datasize) {
+ ih2->dlengsz = sc->datasize;
+ ih2->rs = 1;
+ }
+
+ irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
+ irh->opcode = opcode;
+ irh->subcode = subcode;
+
+ /* opcode/subcode specific parameters (ossp) */
+ irh->ossp = irh_ossp;
+ sc->cmd.cmd2.ossp[0] = ossp0;
+ sc->cmd.cmd2.ossp[1] = ossp1;
+
+ if (sc->rdatasize) {
+ rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
+ rdp->pcie_port = oct->pcie_port;
+ rdp->rlen = sc->rdatasize;
+
+ irh->rflag = 1;
+ /* irh+ossp[0]+ossp[1]+rdp+rptr = 40 bytes */
+ ih2->fsz = LIO_SOFTCMDRESP_IH2;
+ } else {
+ irh->rflag = 0;
+ /* irh + ossp[0] + ossp[1] = 24 bytes */
+ ih2->fsz = LIO_PCICMD_O2;
+ }
}
}
@@ -628,23 +693,39 @@ int octeon_send_soft_command(struct octeon_device *oct,
struct octeon_soft_command *sc)
{
struct octeon_instr_ih2 *ih2;
+ struct octeon_instr_ih3 *ih3;
struct octeon_instr_irh *irh;
u32 len;
- ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
- if (ih2->dlengsz) {
- WARN_ON(!sc->dmadptr);
- sc->cmd.cmd2.dptr = sc->dmadptr;
- }
- irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
- if (irh->rflag) {
- WARN_ON(!sc->dmarptr);
- WARN_ON(!sc->status_word);
- *sc->status_word = COMPLETION_WORD_INIT;
-
- sc->cmd.cmd2.rptr = sc->dmarptr;
+ if (OCTEON_CN23XX_PF(oct)) {
+ ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
+ if (ih3->dlengsz) {
+ WARN_ON(!sc->dmadptr);
+ sc->cmd.cmd3.dptr = sc->dmadptr;
+ }
+ irh = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
+ if (irh->rflag) {
+ WARN_ON(!sc->dmarptr);
+ WARN_ON(!sc->status_word);
+ *sc->status_word = COMPLETION_WORD_INIT;
+ sc->cmd.cmd3.rptr = sc->dmarptr;
+ }
+ len = (u32)ih3->dlengsz;
+ } else {
+ ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
+ if (ih2->dlengsz) {
+ WARN_ON(!sc->dmadptr);
+ sc->cmd.cmd2.dptr = sc->dmadptr;
+ }
+ irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
+ if (irh->rflag) {
+ WARN_ON(!sc->dmarptr);
+ WARN_ON(!sc->status_word);
+ *sc->status_word = COMPLETION_WORD_INIT;
+ sc->cmd.cmd2.rptr = sc->dmarptr;
+ }
+ len = (u32)ih2->dlengsz;
}
- len = (u32)ih2->dlengsz;
if (sc->wait_time)
sc->timeout = jiffies + sc->wait_time;
diff --git a/drivers/net/ethernet/cavium/liquidio/response_manager.c b/drivers/net/ethernet/cavium/liquidio/response_manager.c
index 709049e36627..be52178d8cb6 100644
--- a/drivers/net/ethernet/cavium/liquidio/response_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/response_manager.c
@@ -91,8 +91,13 @@ int lio_process_ordered_list(struct octeon_device *octeon_dev,
sc = (struct octeon_soft_command *)ordered_sc_list->
head.next;
- rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
- rptr = sc->cmd.cmd2.rptr;
+ if (OCTEON_CN23XX_PF(octeon_dev)) {
+ rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp;
+ rptr = sc->cmd.cmd3.rptr;
+ } else {
+ rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
+ rptr = sc->cmd.cmd2.rptr;
+ }
status = OCTEON_REQUEST_PENDING;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 3f7b33aa5ec5..45955691edc7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -788,6 +788,11 @@ struct uld_msix_info {
char desc[IFNAMSIZ + 10];
};
+struct vf_info {
+ unsigned char vf_mac_addr[ETH_ALEN];
+ bool pf_set_mac;
+};
+
struct adapter {
void __iomem *regs;
void __iomem *bar2;
@@ -821,6 +826,9 @@ struct adapter {
struct net_device *port[MAX_NPORTS];
u8 chan_map[NCHAN]; /* channel -> port map */
+ struct vf_info *vfinfo;
+ u8 num_vfs;
+
u32 filter_mode;
unsigned int l2t_start;
unsigned int l2t_end;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 44019bdd526d..44cc9767936f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -3094,10 +3094,44 @@ static int dummy_open(struct net_device *dev)
return 0;
}
+/* Fill MAC address that will be assigned by the FW */
+static void fill_vf_station_mac_addr(struct adapter *adap)
+{
+ unsigned int i;
+ u8 hw_addr[ETH_ALEN], macaddr[ETH_ALEN];
+ int err;
+ u8 *na;
+ u16 a, b;
+
+ err = t4_get_raw_vpd_params(adap, &adap->params.vpd);
+ if (!err) {
+ na = adap->params.vpd.na;
+ for (i = 0; i < ETH_ALEN; i++)
+ hw_addr[i] = (hex2val(na[2 * i + 0]) * 16 +
+ hex2val(na[2 * i + 1]));
+ a = (hw_addr[0] << 8) | hw_addr[1];
+ b = (hw_addr[1] << 8) | hw_addr[2];
+ a ^= b;
+ a |= 0x0200; /* locally assigned Ethernet MAC address */
+ a &= ~0x0100; /* not a multicast Ethernet MAC address */
+ macaddr[0] = a >> 8;
+ macaddr[1] = a & 0xff;
+
+ for (i = 2; i < 5; i++)
+ macaddr[i] = hw_addr[i + 1];
+
+ for (i = 0; i < adap->num_vfs; i++) {
+ macaddr[5] = adap->pf * 16 + i;
+ ether_addr_copy(adap->vfinfo[i].vf_mac_addr, macaddr);
+ }
+ }
+}
+
static int cxgb_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
{
struct port_info *pi = netdev_priv(dev);
struct adapter *adap = pi->adapter;
+ int ret;
/* verify MAC addr is valid */
if (!is_valid_ether_addr(mac)) {
@@ -3109,7 +3143,23 @@ static int cxgb_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
dev_info(pi->adapter->pdev_dev,
"Setting MAC %pM on VF %d\n", mac, vf);
- return t4_set_vf_mac_acl(adap, vf + 1, 1, mac);
+ ret = t4_set_vf_mac_acl(adap, vf + 1, 1, mac);
+ if (!ret)
+ ether_addr_copy(adap->vfinfo[vf].vf_mac_addr, mac);
+ return ret;
+}
+
+static int cxgb_get_vf_config(struct net_device *dev,
+ int vf, struct ifla_vf_info *ivi)
+{
+ struct port_info *pi = netdev_priv(dev);
+ struct adapter *adap = pi->adapter;
+
+ if (vf >= adap->num_vfs)
+ return -EINVAL;
+ ivi->vf = vf;
+ ether_addr_copy(ivi->mac, adap->vfinfo[vf].vf_mac_addr);
+ return 0;
}
#endif
@@ -3259,6 +3309,7 @@ static const struct net_device_ops cxgb4_netdev_ops = {
static const struct net_device_ops cxgb4_mgmt_netdev_ops = {
.ndo_open = dummy_open,
.ndo_set_vf_mac = cxgb_set_vf_mac,
+ .ndo_get_vf_config = cxgb_get_vf_config,
};
#endif
@@ -5116,6 +5167,10 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs)
unregister_netdev(adap->port[0]);
adap->port[0] = NULL;
}
+ /* free VF resources */
+ kfree(adap->vfinfo);
+ adap->vfinfo = NULL;
+ adap->num_vfs = 0;
return num_vfs;
}
@@ -5124,10 +5179,16 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs)
if (err)
return err;
+ adap->num_vfs = num_vfs;
err = config_mgmt_dev(pdev);
if (err)
return err;
}
+
+ adap->vfinfo = kcalloc(adap->num_vfs,
+ sizeof(struct vf_info), GFP_KERNEL);
+ if (adap->vfinfo)
+ fill_vf_station_mac_addr(adap);
return num_vfs;
}
#endif
@@ -5621,6 +5682,7 @@ static void remove_one(struct pci_dev *pdev)
if (adapter->port[0])
unregister_netdev(adapter->port[0]);
iounmap(adapter->regs);
+ kfree(adapter->vfinfo);
kfree(adapter);
pci_disable_sriov(pdev);
pci_release_regions(pdev);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index de451ee2ba75..15be54324e3d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -2729,7 +2729,7 @@ int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p)
out:
vfree(vpd);
- return ret;
+ return ret < 0 ? ret : 0;
}
/**
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 2dadfa961898..473977d6e9e8 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -895,17 +895,18 @@ release_desc:
rxd->rxd2 = RX_DMA_PLEN0(ring->buf_size);
ring->calc_idx = idx;
+
+ done++;
+ }
+
+ if (done) {
/* make sure that all changes to the dma ring are flushed before
* we continue
*/
wmb();
mtk_w32(eth, ring->calc_idx, MTK_PRX_CRX_IDX0);
- done++;
}
- if (done < budget)
- mtk_w32(eth, MTK_RX_DONE_INT, MTK_PDMA_INT_STATUS);
-
return done;
}
@@ -1024,10 +1025,13 @@ static int mtk_napi_rx(struct napi_struct *napi, int budget)
struct mtk_eth *eth = container_of(napi, struct mtk_eth, rx_napi);
u32 status, mask;
int rx_done = 0;
+ int remain_budget = budget;
mtk_handle_status_irq(eth);
+
+poll_again:
mtk_w32(eth, MTK_RX_DONE_INT, MTK_PDMA_INT_STATUS);
- rx_done = mtk_poll_rx(napi, budget, eth);
+ rx_done = mtk_poll_rx(napi, remain_budget, eth);
if (unlikely(netif_msg_intr(eth))) {
status = mtk_r32(eth, MTK_PDMA_INT_STATUS);
@@ -1036,18 +1040,18 @@ static int mtk_napi_rx(struct napi_struct *napi, int budget)
"done rx %d, intr 0x%08x/0x%x\n",
rx_done, status, mask);
}
-
- if (rx_done == budget)
+ if (rx_done == remain_budget)
return budget;
status = mtk_r32(eth, MTK_PDMA_INT_STATUS);
- if (status & MTK_RX_DONE_INT)
- return budget;
-
+ if (status & MTK_RX_DONE_INT) {
+ remain_budget -= rx_done;
+ goto poll_again;
+ }
napi_complete(napi);
mtk_irq_enable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
- return rx_done;
+ return rx_done + budget - remain_budget;
}
static int mtk_tx_alloc(struct mtk_eth *eth)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 4198e9bf89d0..31a41add5b4c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2642,12 +2642,16 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
if (IS_ERR(prog))
return PTR_ERR(prog);
}
+ mutex_lock(&mdev->state_lock);
for (i = 0; i < priv->rx_ring_num; i++) {
- /* This xchg is paired with READ_ONCE in the fastpath */
- old_prog = xchg(&priv->rx_ring[i]->xdp_prog, prog);
+ old_prog = rcu_dereference_protected(
+ priv->rx_ring[i]->xdp_prog,
+ lockdep_is_held(&mdev->state_lock));
+ rcu_assign_pointer(priv->rx_ring[i]->xdp_prog, prog);
if (old_prog)
bpf_prog_put(old_prog);
}
+ mutex_unlock(&mdev->state_lock);
return 0;
}
@@ -2680,7 +2684,10 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
priv->xdp_ring_num);
for (i = 0; i < priv->rx_ring_num; i++) {
- old_prog = xchg(&priv->rx_ring[i]->xdp_prog, prog);
+ old_prog = rcu_dereference_protected(
+ priv->rx_ring[i]->xdp_prog,
+ lockdep_is_held(&mdev->state_lock));
+ rcu_assign_pointer(priv->rx_ring[i]->xdp_prog, prog);
if (old_prog)
bpf_prog_put(old_prog);
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 2040dad8611d..6758292311f4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -537,7 +537,9 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_rx_ring *ring = *pring;
struct bpf_prog *old_prog;
- old_prog = READ_ONCE(ring->xdp_prog);
+ old_prog = rcu_dereference_protected(
+ ring->xdp_prog,
+ lockdep_is_held(&mdev->state_lock));
if (old_prog)
bpf_prog_put(old_prog);
mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
@@ -800,7 +802,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
if (budget <= 0)
return polled;
- xdp_prog = READ_ONCE(ring->xdp_prog);
+ /* Protect accesses to: ring->xdp_prog, priv->mac_hash list */
+ rcu_read_lock();
+ xdp_prog = rcu_dereference(ring->xdp_prog);
doorbell_pending = 0;
tx_index = (priv->tx_ring_num - priv->xdp_ring_num) + cq->ring;
@@ -858,15 +862,11 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
/* Drop the packet, since HW loopback-ed it */
mac_hash = ethh->h_source[MLX4_EN_MAC_HASH_IDX];
bucket = &priv->mac_hash[mac_hash];
- rcu_read_lock();
hlist_for_each_entry_rcu(entry, bucket, hlist) {
if (ether_addr_equal_64bits(entry->mac,
- ethh->h_source)) {
- rcu_read_unlock();
+ ethh->h_source))
goto next;
- }
}
- rcu_read_unlock();
}
}
@@ -1077,6 +1077,7 @@ consumed:
}
out:
+ rcu_read_unlock();
if (doorbell_pending)
mlx4_en_xmit_doorbell(priv->tx_ring[tx_index]);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 2c2913dcae98..47867c49f91c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -340,7 +340,7 @@ struct mlx4_en_rx_ring {
u8 fcs_del;
void *buf;
void *rx_info;
- struct bpf_prog *xdp_prog;
+ struct bpf_prog __rcu *xdp_prog;
struct mlx4_en_page_cache page_cache;
unsigned long bytes;
unsigned long packets;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index 92105916ef40..3740a4417fa0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -30,6 +30,7 @@
#include <linux/delay.h>
#include <linux/mfd/syscon.h>
#include <linux/regmap.h>
+#include <linux/pm_runtime.h>
#include "stmmac_platform.h"
@@ -301,6 +302,118 @@ static const struct rk_gmac_ops rk3288_ops = {
.set_rmii_speed = rk3288_set_rmii_speed,
};
+#define RK3366_GRF_SOC_CON6 0x0418
+#define RK3366_GRF_SOC_CON7 0x041c
+
+/* RK3366_GRF_SOC_CON6 */
+#define RK3366_GMAC_PHY_INTF_SEL_RGMII (GRF_BIT(9) | GRF_CLR_BIT(10) | \
+ GRF_CLR_BIT(11))
+#define RK3366_GMAC_PHY_INTF_SEL_RMII (GRF_CLR_BIT(9) | GRF_CLR_BIT(10) | \
+ GRF_BIT(11))
+#define RK3366_GMAC_FLOW_CTRL GRF_BIT(8)
+#define RK3366_GMAC_FLOW_CTRL_CLR GRF_CLR_BIT(8)
+#define RK3366_GMAC_SPEED_10M GRF_CLR_BIT(7)
+#define RK3366_GMAC_SPEED_100M GRF_BIT(7)
+#define RK3366_GMAC_RMII_CLK_25M GRF_BIT(3)
+#define RK3366_GMAC_RMII_CLK_2_5M GRF_CLR_BIT(3)
+#define RK3366_GMAC_CLK_125M (GRF_CLR_BIT(4) | GRF_CLR_BIT(5))
+#define RK3366_GMAC_CLK_25M (GRF_BIT(4) | GRF_BIT(5))
+#define RK3366_GMAC_CLK_2_5M (GRF_CLR_BIT(4) | GRF_BIT(5))
+#define RK3366_GMAC_RMII_MODE GRF_BIT(6)
+#define RK3366_GMAC_RMII_MODE_CLR GRF_CLR_BIT(6)
+
+/* RK3366_GRF_SOC_CON7 */
+#define RK3366_GMAC_TXCLK_DLY_ENABLE GRF_BIT(7)
+#define RK3366_GMAC_TXCLK_DLY_DISABLE GRF_CLR_BIT(7)
+#define RK3366_GMAC_RXCLK_DLY_ENABLE GRF_BIT(15)
+#define RK3366_GMAC_RXCLK_DLY_DISABLE GRF_CLR_BIT(15)
+#define RK3366_GMAC_CLK_RX_DL_CFG(val) HIWORD_UPDATE(val, 0x7F, 8)
+#define RK3366_GMAC_CLK_TX_DL_CFG(val) HIWORD_UPDATE(val, 0x7F, 0)
+
+static void rk3366_set_to_rgmii(struct rk_priv_data *bsp_priv,
+ int tx_delay, int rx_delay)
+{
+ struct device *dev = &bsp_priv->pdev->dev;
+
+ if (IS_ERR(bsp_priv->grf)) {
+ dev_err(dev, "%s: Missing rockchip,grf property\n", __func__);
+ return;
+ }
+
+ regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6,
+ RK3366_GMAC_PHY_INTF_SEL_RGMII |
+ RK3366_GMAC_RMII_MODE_CLR);
+ regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON7,
+ RK3366_GMAC_RXCLK_DLY_ENABLE |
+ RK3366_GMAC_TXCLK_DLY_ENABLE |
+ RK3366_GMAC_CLK_RX_DL_CFG(rx_delay) |
+ RK3366_GMAC_CLK_TX_DL_CFG(tx_delay));
+}
+
+static void rk3366_set_to_rmii(struct rk_priv_data *bsp_priv)
+{
+ struct device *dev = &bsp_priv->pdev->dev;
+
+ if (IS_ERR(bsp_priv->grf)) {
+ dev_err(dev, "%s: Missing rockchip,grf property\n", __func__);
+ return;
+ }
+
+ regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6,
+ RK3366_GMAC_PHY_INTF_SEL_RMII | RK3366_GMAC_RMII_MODE);
+}
+
+static void rk3366_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed)
+{
+ struct device *dev = &bsp_priv->pdev->dev;
+
+ if (IS_ERR(bsp_priv->grf)) {
+ dev_err(dev, "%s: Missing rockchip,grf property\n", __func__);
+ return;
+ }
+
+ if (speed == 10)
+ regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6,
+ RK3366_GMAC_CLK_2_5M);
+ else if (speed == 100)
+ regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6,
+ RK3366_GMAC_CLK_25M);
+ else if (speed == 1000)
+ regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6,
+ RK3366_GMAC_CLK_125M);
+ else
+ dev_err(dev, "unknown speed value for RGMII! speed=%d", speed);
+}
+
+static void rk3366_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed)
+{
+ struct device *dev = &bsp_priv->pdev->dev;
+
+ if (IS_ERR(bsp_priv->grf)) {
+ dev_err(dev, "%s: Missing rockchip,grf property\n", __func__);
+ return;
+ }
+
+ if (speed == 10) {
+ regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6,
+ RK3366_GMAC_RMII_CLK_2_5M |
+ RK3366_GMAC_SPEED_10M);
+ } else if (speed == 100) {
+ regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6,
+ RK3366_GMAC_RMII_CLK_25M |
+ RK3366_GMAC_SPEED_100M);
+ } else {
+ dev_err(dev, "unknown speed value for RMII! speed=%d", speed);
+ }
+}
+
+static const struct rk_gmac_ops rk3366_ops = {
+ .set_to_rgmii = rk3366_set_to_rgmii,
+ .set_to_rmii = rk3366_set_to_rmii,
+ .set_rgmii_speed = rk3366_set_rgmii_speed,
+ .set_rmii_speed = rk3366_set_rmii_speed,
+};
+
#define RK3368_GRF_SOC_CON15 0x043c
#define RK3368_GRF_SOC_CON16 0x0440
@@ -413,6 +526,118 @@ static const struct rk_gmac_ops rk3368_ops = {
.set_rmii_speed = rk3368_set_rmii_speed,
};
+#define RK3399_GRF_SOC_CON5 0xc214
+#define RK3399_GRF_SOC_CON6 0xc218
+
+/* RK3399_GRF_SOC_CON5 */
+#define RK3399_GMAC_PHY_INTF_SEL_RGMII (GRF_BIT(9) | GRF_CLR_BIT(10) | \
+ GRF_CLR_BIT(11))
+#define RK3399_GMAC_PHY_INTF_SEL_RMII (GRF_CLR_BIT(9) | GRF_CLR_BIT(10) | \
+ GRF_BIT(11))
+#define RK3399_GMAC_FLOW_CTRL GRF_BIT(8)
+#define RK3399_GMAC_FLOW_CTRL_CLR GRF_CLR_BIT(8)
+#define RK3399_GMAC_SPEED_10M GRF_CLR_BIT(7)
+#define RK3399_GMAC_SPEED_100M GRF_BIT(7)
+#define RK3399_GMAC_RMII_CLK_25M GRF_BIT(3)
+#define RK3399_GMAC_RMII_CLK_2_5M GRF_CLR_BIT(3)
+#define RK3399_GMAC_CLK_125M (GRF_CLR_BIT(4) | GRF_CLR_BIT(5))
+#define RK3399_GMAC_CLK_25M (GRF_BIT(4) | GRF_BIT(5))
+#define RK3399_GMAC_CLK_2_5M (GRF_CLR_BIT(4) | GRF_BIT(5))
+#define RK3399_GMAC_RMII_MODE GRF_BIT(6)
+#define RK3399_GMAC_RMII_MODE_CLR GRF_CLR_BIT(6)
+
+/* RK3399_GRF_SOC_CON6 */
+#define RK3399_GMAC_TXCLK_DLY_ENABLE GRF_BIT(7)
+#define RK3399_GMAC_TXCLK_DLY_DISABLE GRF_CLR_BIT(7)
+#define RK3399_GMAC_RXCLK_DLY_ENABLE GRF_BIT(15)
+#define RK3399_GMAC_RXCLK_DLY_DISABLE GRF_CLR_BIT(15)
+#define RK3399_GMAC_CLK_RX_DL_CFG(val) HIWORD_UPDATE(val, 0x7F, 8)
+#define RK3399_GMAC_CLK_TX_DL_CFG(val) HIWORD_UPDATE(val, 0x7F, 0)
+
+static void rk3399_set_to_rgmii(struct rk_priv_data *bsp_priv,
+ int tx_delay, int rx_delay)
+{
+ struct device *dev = &bsp_priv->pdev->dev;
+
+ if (IS_ERR(bsp_priv->grf)) {
+ dev_err(dev, "%s: Missing rockchip,grf property\n", __func__);
+ return;
+ }
+
+ regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5,
+ RK3399_GMAC_PHY_INTF_SEL_RGMII |
+ RK3399_GMAC_RMII_MODE_CLR);
+ regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON6,
+ RK3399_GMAC_RXCLK_DLY_ENABLE |
+ RK3399_GMAC_TXCLK_DLY_ENABLE |
+ RK3399_GMAC_CLK_RX_DL_CFG(rx_delay) |
+ RK3399_GMAC_CLK_TX_DL_CFG(tx_delay));
+}
+
+static void rk3399_set_to_rmii(struct rk_priv_data *bsp_priv)
+{
+ struct device *dev = &bsp_priv->pdev->dev;
+
+ if (IS_ERR(bsp_priv->grf)) {
+ dev_err(dev, "%s: Missing rockchip,grf property\n", __func__);
+ return;
+ }
+
+ regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5,
+ RK3399_GMAC_PHY_INTF_SEL_RMII | RK3399_GMAC_RMII_MODE);
+}
+
+static void rk3399_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed)
+{
+ struct device *dev = &bsp_priv->pdev->dev;
+
+ if (IS_ERR(bsp_priv->grf)) {
+ dev_err(dev, "%s: Missing rockchip,grf property\n", __func__);
+ return;
+ }
+
+ if (speed == 10)
+ regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5,
+ RK3399_GMAC_CLK_2_5M);
+ else if (speed == 100)
+ regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5,
+ RK3399_GMAC_CLK_25M);
+ else if (speed == 1000)
+ regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5,
+ RK3399_GMAC_CLK_125M);
+ else
+ dev_err(dev, "unknown speed value for RGMII! speed=%d", speed);
+}
+
+static void rk3399_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed)
+{
+ struct device *dev = &bsp_priv->pdev->dev;
+
+ if (IS_ERR(bsp_priv->grf)) {
+ dev_err(dev, "%s: Missing rockchip,grf property\n", __func__);
+ return;
+ }
+
+ if (speed == 10) {
+ regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5,
+ RK3399_GMAC_RMII_CLK_2_5M |
+ RK3399_GMAC_SPEED_10M);
+ } else if (speed == 100) {
+ regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5,
+ RK3399_GMAC_RMII_CLK_25M |
+ RK3399_GMAC_SPEED_100M);
+ } else {
+ dev_err(dev, "unknown speed value for RMII! speed=%d", speed);
+ }
+}
+
+static const struct rk_gmac_ops rk3399_ops = {
+ .set_to_rgmii = rk3399_set_to_rgmii,
+ .set_to_rmii = rk3399_set_to_rmii,
+ .set_rgmii_speed = rk3399_set_rgmii_speed,
+ .set_rmii_speed = rk3399_set_rmii_speed,
+};
+
static int gmac_clk_init(struct rk_priv_data *bsp_priv)
{
struct device *dev = &bsp_priv->pdev->dev;
@@ -629,6 +854,16 @@ static struct rk_priv_data *rk_gmac_setup(struct platform_device *pdev,
"rockchip,grf");
bsp_priv->pdev = pdev;
+ gmac_clk_init(bsp_priv);
+
+ return bsp_priv;
+}
+
+static int rk_gmac_powerup(struct rk_priv_data *bsp_priv)
+{
+ int ret;
+ struct device *dev = &bsp_priv->pdev->dev;
+
/*rmii or rgmii*/
if (bsp_priv->phy_iface == PHY_INTERFACE_MODE_RGMII) {
dev_info(dev, "init for RGMII\n");
@@ -641,15 +876,6 @@ static struct rk_priv_data *rk_gmac_setup(struct platform_device *pdev,
dev_err(dev, "NO interface defined!\n");
}
- gmac_clk_init(bsp_priv);
-
- return bsp_priv;
-}
-
-static int rk_gmac_powerup(struct rk_priv_data *bsp_priv)
-{
- int ret;
-
ret = phy_power_on(bsp_priv, true);
if (ret)
return ret;
@@ -658,11 +884,19 @@ static int rk_gmac_powerup(struct rk_priv_data *bsp_priv)
if (ret)
return ret;
+ pm_runtime_enable(dev);
+ pm_runtime_get_sync(dev);
+
return 0;
}
static void rk_gmac_powerdown(struct rk_priv_data *gmac)
{
+ struct device *dev = &gmac->pdev->dev;
+
+ pm_runtime_put_sync(dev);
+ pm_runtime_disable(dev);
+
phy_power_on(gmac, false);
gmac_clk_enable(gmac, false);
}
@@ -760,7 +994,9 @@ static int rk_gmac_probe(struct platform_device *pdev)
static const struct of_device_id rk_gmac_dwmac_match[] = {
{ .compatible = "rockchip,rk3228-gmac", .data = &rk3228_ops },
{ .compatible = "rockchip,rk3288-gmac", .data = &rk3288_ops },
+ { .compatible = "rockchip,rk3366-gmac", .data = &rk3366_ops },
{ .compatible = "rockchip,rk3368-gmac", .data = &rk3368_ops },
+ { .compatible = "rockchip,rk3399-gmac", .data = &rk3399_ops },
{ }
};
MODULE_DEVICE_TABLE(of, rk_gmac_dwmac_match);
diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c
index d300d536d06f..fa0cfda24fd9 100644
--- a/drivers/net/ethernet/ti/cpmac.c
+++ b/drivers/net/ethernet/ti/cpmac.c
@@ -546,7 +546,8 @@ fatal_error:
static int cpmac_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
- int queue, len;
+ int queue;
+ unsigned int len;
struct cpmac_desc *desc;
struct cpmac_priv *priv = netdev_priv(dev);
@@ -556,7 +557,7 @@ static int cpmac_start_xmit(struct sk_buff *skb, struct net_device *dev)
if (unlikely(skb_padto(skb, ETH_ZLEN)))
return NETDEV_TX_OK;
- len = max(skb->len, ETH_ZLEN);
+ len = max_t(unsigned int, skb->len, ETH_ZLEN);
queue = skb_get_queue_mapping(skb);
netif_stop_subqueue(dev, queue);
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index c5544d36c54f..e7b516342678 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -50,6 +50,8 @@
*
*****************************************************************************/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/init.h>
@@ -108,23 +110,12 @@
/*****************************************************************************/
/* Debugging functions */
/*****************************************************************************/
-#define D__(lvl_, fmt, arg...) \
- do { \
- printk(lvl_ "[%d:%s]: " fmt "\n", \
- __LINE__, __func__, ## arg); \
- } while (0)
-
-#define D_(lvl, args...) \
- do { \
- if (lvl & debug) \
- D__(KERN_INFO, args); \
- } while (0)
-
-#define D1(args...) D_(0x01, ##args)
-#define D2(args...) D_(0x02, ##args)
-#define D3(args...) D_(0x04, ##args)
-#define D4(args...) D_(0x08, ##args)
-#define D5(args...) D_(0x10, ##args)
+#define hso_dbg(lvl, fmt, ...) \
+do { \
+ if ((lvl) & debug) \
+ pr_info("[%d:%s] " fmt, \
+ __LINE__, __func__, ##__VA_ARGS__); \
+} while (0)
/*****************************************************************************/
/* Enumerators */
@@ -649,7 +640,7 @@ static int get_free_serial_index(void)
}
spin_unlock_irqrestore(&serial_table_lock, flags);
- printk(KERN_ERR "%s: no free serial devices in table\n", __func__);
+ pr_err("%s: no free serial devices in table\n", __func__);
return -1;
}
@@ -709,7 +700,8 @@ static void handle_usb_error(int status, const char *function,
}
/* log a meaningful explanation of an USB status */
- D1("%s: received USB status - %s (%d)", function, explanation, status);
+ hso_dbg(0x1, "%s: received USB status - %s (%d)\n",
+ function, explanation, status);
}
/* Network interface functions */
@@ -808,7 +800,7 @@ static netdev_tx_t hso_net_start_xmit(struct sk_buff *skb,
DUMP1(skb->data, skb->len);
/* Copy it from kernel memory to OUR memory */
memcpy(odev->mux_bulk_tx_buf, skb->data, skb->len);
- D1("len: %d/%d", skb->len, MUX_BULK_TX_BUF_SIZE);
+ hso_dbg(0x1, "len: %d/%d\n", skb->len, MUX_BULK_TX_BUF_SIZE);
/* Fill in the URB for shipping it out. */
usb_fill_bulk_urb(odev->mux_bulk_tx_urb,
@@ -872,7 +864,7 @@ static void packetizeRx(struct hso_net *odev, unsigned char *ip_pkt,
unsigned char *tmp_rx_buf;
/* log if needed */
- D1("Rx %d bytes", count);
+ hso_dbg(0x1, "Rx %d bytes\n", count);
DUMP(ip_pkt, min(128, (int)count));
while (count) {
@@ -912,7 +904,7 @@ static void packetizeRx(struct hso_net *odev, unsigned char *ip_pkt,
frame_len);
if (!odev->skb_rx_buf) {
/* We got no receive buffer. */
- D1("could not allocate memory");
+ hso_dbg(0x1, "could not allocate memory\n");
odev->rx_parse_state = WAIT_SYNC;
continue;
}
@@ -972,11 +964,11 @@ static void packetizeRx(struct hso_net *odev, unsigned char *ip_pkt,
break;
case WAIT_SYNC:
- D1(" W_S");
+ hso_dbg(0x1, " W_S\n");
count = 0;
break;
default:
- D1(" ");
+ hso_dbg(0x1, "\n");
count--;
break;
}
@@ -1020,7 +1012,7 @@ static void read_bulk_callback(struct urb *urb)
/* Sanity check */
if (!odev || !test_bit(HSO_NET_RUNNING, &odev->flags)) {
- D1("BULK IN callback but driver is not active!");
+ hso_dbg(0x1, "BULK IN callback but driver is not active!\n");
return;
}
usb_mark_last_busy(urb->dev);
@@ -1112,11 +1104,11 @@ static void _hso_serial_set_termios(struct tty_struct *tty,
struct hso_serial *serial = tty->driver_data;
if (!serial) {
- printk(KERN_ERR "%s: no tty structures", __func__);
+ pr_err("%s: no tty structures", __func__);
return;
}
- D4("port %d", serial->minor);
+ hso_dbg(0x8, "port %d\n", serial->minor);
/*
* Fix up unsupported bits
@@ -1205,11 +1197,11 @@ static void hso_std_serial_read_bulk_callback(struct urb *urb)
struct hso_serial *serial = urb->context;
int status = urb->status;
- D4("\n--- Got serial_read_bulk callback %02x ---", status);
+ hso_dbg(0x8, "--- Got serial_read_bulk callback %02x ---\n", status);
/* sanity check */
if (!serial) {
- D1("serial == NULL");
+ hso_dbg(0x1, "serial == NULL\n");
return;
}
if (status) {
@@ -1217,7 +1209,7 @@ static void hso_std_serial_read_bulk_callback(struct urb *urb)
return;
}
- D1("Actual length = %d\n", urb->actual_length);
+ hso_dbg(0x1, "Actual length = %d\n", urb->actual_length);
DUMP1(urb->transfer_buffer, urb->actual_length);
/* Anyone listening? */
@@ -1266,7 +1258,7 @@ static int hso_serial_open(struct tty_struct *tty, struct file *filp)
if (serial == NULL || serial->magic != HSO_SERIAL_MAGIC) {
WARN_ON(1);
tty->driver_data = NULL;
- D1("Failed to open port");
+ hso_dbg(0x1, "Failed to open port\n");
return -ENODEV;
}
@@ -1275,7 +1267,7 @@ static int hso_serial_open(struct tty_struct *tty, struct file *filp)
if (result < 0)
goto err_out;
- D1("Opening %d", serial->minor);
+ hso_dbg(0x1, "Opening %d\n", serial->minor);
/* setup */
tty->driver_data = serial;
@@ -1298,7 +1290,7 @@ static int hso_serial_open(struct tty_struct *tty, struct file *filp)
kref_get(&serial->parent->ref);
}
} else {
- D1("Port was already open");
+ hso_dbg(0x1, "Port was already open\n");
}
usb_autopm_put_interface(serial->parent->interface);
@@ -1317,7 +1309,7 @@ static void hso_serial_close(struct tty_struct *tty, struct file *filp)
struct hso_serial *serial = tty->driver_data;
u8 usb_gone;
- D1("Closing serial port");
+ hso_dbg(0x1, "Closing serial port\n");
/* Open failed, no close cleanup required */
if (serial == NULL)
@@ -1357,7 +1349,7 @@ static int hso_serial_write(struct tty_struct *tty, const unsigned char *buf,
/* sanity check */
if (serial == NULL) {
- printk(KERN_ERR "%s: serial is NULL\n", __func__);
+ pr_err("%s: serial is NULL\n", __func__);
return -ENODEV;
}
@@ -1412,8 +1404,8 @@ static void hso_serial_set_termios(struct tty_struct *tty, struct ktermios *old)
unsigned long flags;
if (old)
- D5("Termios called with: cflags new[%d] - old[%d]",
- tty->termios.c_cflag, old->c_cflag);
+ hso_dbg(0x16, "Termios called with: cflags new[%d] - old[%d]\n",
+ tty->termios.c_cflag, old->c_cflag);
/* the actual setup */
spin_lock_irqsave(&serial->serial_lock, flags);
@@ -1649,7 +1641,7 @@ static int hso_serial_tiocmget(struct tty_struct *tty)
/* sanity check */
if (!serial) {
- D1("no tty structures");
+ hso_dbg(0x1, "no tty structures\n");
return -EINVAL;
}
spin_lock_irq(&serial->serial_lock);
@@ -1682,7 +1674,7 @@ static int hso_serial_tiocmset(struct tty_struct *tty,
/* sanity check */
if (!serial) {
- D1("no tty structures");
+ hso_dbg(0x1, "no tty structures\n");
return -EINVAL;
}
@@ -1721,7 +1713,7 @@ static int hso_serial_ioctl(struct tty_struct *tty,
{
struct hso_serial *serial = tty->driver_data;
int ret = 0;
- D4("IOCTL cmd: %d, arg: %ld", cmd, arg);
+ hso_dbg(0x8, "IOCTL cmd: %d, arg: %ld\n", cmd, arg);
if (!serial)
return -ENODEV;
@@ -1783,7 +1775,7 @@ static int mux_device_request(struct hso_serial *serial, u8 type, u16 port,
/* Sanity check */
if (!serial || !ctrl_urb || !ctrl_req) {
- printk(KERN_ERR "%s: Wrong arguments\n", __func__);
+ pr_err("%s: Wrong arguments\n", __func__);
return -EINVAL;
}
@@ -1808,9 +1800,9 @@ static int mux_device_request(struct hso_serial *serial, u8 type, u16 port,
pipe = usb_sndctrlpipe(serial->parent->usb, 0);
}
/* syslog */
- D2("%s command (%02x) len: %d, port: %d",
- type == USB_CDC_GET_ENCAPSULATED_RESPONSE ? "Read" : "Write",
- ctrl_req->bRequestType, ctrl_req->wLength, port);
+ hso_dbg(0x2, "%s command (%02x) len: %d, port: %d\n",
+ type == USB_CDC_GET_ENCAPSULATED_RESPONSE ? "Read" : "Write",
+ ctrl_req->bRequestType, ctrl_req->wLength, port);
/* Load ctrl urb */
ctrl_urb->transfer_flags = 0;
@@ -1876,11 +1868,11 @@ static void intr_callback(struct urb *urb)
handle_usb_error(status, __func__, NULL);
return;
}
- D4("\n--- Got intr callback 0x%02X ---", status);
+ hso_dbg(0x8, "--- Got intr callback 0x%02X ---\n", status);
/* what request? */
port_req = urb->transfer_buffer;
- D4(" port_req = 0x%.2X\n", *port_req);
+ hso_dbg(0x8, "port_req = 0x%.2X\n", *port_req);
/* loop over all muxed ports to find the one sending this */
for (i = 0; i < 8; i++) {
/* max 8 channels on MUX */
@@ -1888,7 +1880,8 @@ static void intr_callback(struct urb *urb)
serial = get_serial_by_shared_int_and_type(shared_int,
(1 << i));
if (serial != NULL) {
- D1("Pending read interrupt on port %d\n", i);
+ hso_dbg(0x1, "Pending read interrupt on port %d\n",
+ i);
spin_lock(&serial->serial_lock);
if (serial->rx_state == RX_IDLE &&
serial->port.count > 0) {
@@ -1900,8 +1893,8 @@ static void intr_callback(struct urb *urb)
} else
serial->rx_state = RX_PENDING;
} else {
- D1("Already a read pending on "
- "port %d or port not open\n", i);
+ hso_dbg(0x1, "Already a read pending on port %d or port not open\n",
+ i);
}
spin_unlock(&serial->serial_lock);
}
@@ -1933,7 +1926,7 @@ static void hso_std_serial_write_bulk_callback(struct urb *urb)
/* sanity check */
if (!serial) {
- D1("serial == NULL");
+ hso_dbg(0x1, "serial == NULL\n");
return;
}
@@ -1948,7 +1941,7 @@ static void hso_std_serial_write_bulk_callback(struct urb *urb)
tty_port_tty_wakeup(&serial->port);
hso_kick_transmit(serial);
- D1(" ");
+ hso_dbg(0x1, "\n");
}
/* called for writing diag or CS serial port */
@@ -1996,8 +1989,8 @@ static void ctrl_callback(struct urb *urb)
/* what request? */
req = (struct usb_ctrlrequest *)(urb->setup_packet);
- D4("\n--- Got muxed ctrl callback 0x%02X ---", status);
- D4("Actual length of urb = %d\n", urb->actual_length);
+ hso_dbg(0x8, "--- Got muxed ctrl callback 0x%02X ---\n", status);
+ hso_dbg(0x8, "Actual length of urb = %d\n", urb->actual_length);
DUMP1(urb->transfer_buffer, urb->actual_length);
if (req->bRequestType ==
@@ -2023,7 +2016,7 @@ static int put_rxbuf_data(struct urb *urb, struct hso_serial *serial)
/* Sanity check */
if (urb == NULL || serial == NULL) {
- D1("serial = NULL");
+ hso_dbg(0x1, "serial = NULL\n");
return -2;
}
@@ -2035,7 +2028,7 @@ static int put_rxbuf_data(struct urb *urb, struct hso_serial *serial)
}
/* Push data to tty */
- D1("data to push to tty");
+ hso_dbg(0x1, "data to push to tty\n");
count = tty_buffer_request_room(&serial->port, urb->actual_length);
if (count >= urb->actual_length) {
tty_insert_flip_string(&serial->port, urb->transfer_buffer,
@@ -2415,7 +2408,7 @@ static void hso_net_init(struct net_device *net)
{
struct hso_net *hso_net = netdev_priv(net);
- D1("sizeof hso_net is %d", (int)sizeof(*hso_net));
+ hso_dbg(0x1, "sizeof hso_net is %zu\n", sizeof(*hso_net));
/* fill in the other fields */
net->netdev_ops = &hso_netdev_ops;
@@ -3229,7 +3222,7 @@ static int __init hso_init(void)
int result;
/* put it in the log */
- printk(KERN_INFO "hso: %s\n", version);
+ pr_info("%s\n", version);
/* Initialise the serial table semaphore and table */
spin_lock_init(&serial_table_lock);
@@ -3260,16 +3253,15 @@ static int __init hso_init(void)
/* register the tty driver */
result = tty_register_driver(tty_drv);
if (result) {
- printk(KERN_ERR "%s - tty_register_driver failed(%d)\n",
- __func__, result);
+ pr_err("%s - tty_register_driver failed(%d)\n",
+ __func__, result);
goto err_free_tty;
}
/* register this module as an usb driver */
result = usb_register(&hso_driver);
if (result) {
- printk(KERN_ERR "Could not register hso driver? error: %d\n",
- result);
+ pr_err("Could not register hso driver - error: %d\n", result);
goto err_unreg_tty;
}
@@ -3284,7 +3276,7 @@ err_free_tty:
static void __exit hso_exit(void)
{
- printk(KERN_INFO "hso: unloaded\n");
+ pr_info("unloaded\n");
tty_unregister_driver(tty_drv);
put_tty_driver(tty_drv);
@@ -3301,7 +3293,7 @@ MODULE_DESCRIPTION(MOD_DESCRIPTION);
MODULE_LICENSE(MOD_LICENSE);
/* change the debug level (eg: insmod hso.ko debug=0x04) */
-MODULE_PARM_DESC(debug, "Level of debug [0x01 | 0x02 | 0x04 | 0x08 | 0x10]");
+MODULE_PARM_DESC(debug, "debug level mask [0x01 | 0x02 | 0x04 | 0x08 | 0x10]");
module_param(debug, int, S_IRUGO | S_IWUSR);
/* set the major tty number (eg: insmod hso.ko tty_major=245) */
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index dc989a8b5afb..831aa33d078a 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -33,7 +33,7 @@
#include "smsc95xx.h"
#define SMSC_CHIPNAME "smsc95xx"
-#define SMSC_DRIVER_VERSION "1.0.4"
+#define SMSC_DRIVER_VERSION "1.0.5"
#define HS_USB_PKT_SIZE (512)
#define FS_USB_PKT_SIZE (64)
#define DEFAULT_HS_BURST_CAP_SIZE (16 * 1024 + 5 * HS_USB_PKT_SIZE)
@@ -64,6 +64,7 @@
#define CARRIER_CHECK_DELAY (2 * HZ)
struct smsc95xx_priv {
+ u32 chip_id;
u32 mac_cr;
u32 hash_hi;
u32 hash_lo;
@@ -71,6 +72,7 @@ struct smsc95xx_priv {
spinlock_t mac_cr_lock;
u8 features;
u8 suspend_flags;
+ u8 mdix_ctrl;
bool link_ok;
struct delayed_work carrier_check;
struct usbnet *dev;
@@ -782,14 +784,113 @@ static int smsc95xx_ethtool_set_wol(struct net_device *net,
return ret;
}
+static int get_mdix_status(struct net_device *net)
+{
+ struct usbnet *dev = netdev_priv(net);
+ u32 val;
+ int buf;
+
+ buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id, SPECIAL_CTRL_STS);
+ if (buf & SPECIAL_CTRL_STS_OVRRD_AMDIX_) {
+ if (buf & SPECIAL_CTRL_STS_AMDIX_ENABLE_)
+ return ETH_TP_MDI_AUTO;
+ else if (buf & SPECIAL_CTRL_STS_AMDIX_STATE_)
+ return ETH_TP_MDI_X;
+ } else {
+ buf = smsc95xx_read_reg(dev, STRAP_STATUS, &val);
+ if (val & STRAP_STATUS_AMDIX_EN_)
+ return ETH_TP_MDI_AUTO;
+ }
+
+ return ETH_TP_MDI;
+}
+
+static void set_mdix_status(struct net_device *net, __u8 mdix_ctrl)
+{
+ struct usbnet *dev = netdev_priv(net);
+ struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
+ int buf;
+
+ if ((pdata->chip_id == ID_REV_CHIP_ID_9500A_) ||
+ (pdata->chip_id == ID_REV_CHIP_ID_9530_) ||
+ (pdata->chip_id == ID_REV_CHIP_ID_89530_) ||
+ (pdata->chip_id == ID_REV_CHIP_ID_9730_)) {
+ /* Extend Manual AutoMDIX timer for 9500A/9500Ai */
+ buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id,
+ PHY_EDPD_CONFIG);
+ buf |= PHY_EDPD_CONFIG_EXT_CROSSOVER_;
+ smsc95xx_mdio_write(dev->net, dev->mii.phy_id,
+ PHY_EDPD_CONFIG, buf);
+ }
+
+ if (mdix_ctrl == ETH_TP_MDI) {
+ buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id,
+ SPECIAL_CTRL_STS);
+ buf |= SPECIAL_CTRL_STS_OVRRD_AMDIX_;
+ buf &= ~(SPECIAL_CTRL_STS_AMDIX_ENABLE_ |
+ SPECIAL_CTRL_STS_AMDIX_STATE_);
+ smsc95xx_mdio_write(dev->net, dev->mii.phy_id,
+ SPECIAL_CTRL_STS, buf);
+ } else if (mdix_ctrl == ETH_TP_MDI_X) {
+ buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id,
+ SPECIAL_CTRL_STS);
+ buf |= SPECIAL_CTRL_STS_OVRRD_AMDIX_;
+ buf &= ~(SPECIAL_CTRL_STS_AMDIX_ENABLE_ |
+ SPECIAL_CTRL_STS_AMDIX_STATE_);
+ buf |= SPECIAL_CTRL_STS_AMDIX_STATE_;
+ smsc95xx_mdio_write(dev->net, dev->mii.phy_id,
+ SPECIAL_CTRL_STS, buf);
+ } else if (mdix_ctrl == ETH_TP_MDI_AUTO) {
+ buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id,
+ SPECIAL_CTRL_STS);
+ buf &= ~SPECIAL_CTRL_STS_OVRRD_AMDIX_;
+ buf &= ~(SPECIAL_CTRL_STS_AMDIX_ENABLE_ |
+ SPECIAL_CTRL_STS_AMDIX_STATE_);
+ buf |= SPECIAL_CTRL_STS_AMDIX_ENABLE_;
+ smsc95xx_mdio_write(dev->net, dev->mii.phy_id,
+ SPECIAL_CTRL_STS, buf);
+ }
+ pdata->mdix_ctrl = mdix_ctrl;
+}
+
+static int smsc95xx_get_settings(struct net_device *net,
+ struct ethtool_cmd *cmd)
+{
+ struct usbnet *dev = netdev_priv(net);
+ struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
+ int retval;
+
+ retval = usbnet_get_settings(net, cmd);
+
+ cmd->eth_tp_mdix = pdata->mdix_ctrl;
+ cmd->eth_tp_mdix_ctrl = pdata->mdix_ctrl;
+
+ return retval;
+}
+
+static int smsc95xx_set_settings(struct net_device *net,
+ struct ethtool_cmd *cmd)
+{
+ struct usbnet *dev = netdev_priv(net);
+ struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
+ int retval;
+
+ if (pdata->mdix_ctrl != cmd->eth_tp_mdix_ctrl)
+ set_mdix_status(net, cmd->eth_tp_mdix_ctrl);
+
+ retval = usbnet_set_settings(net, cmd);
+
+ return retval;
+}
+
static const struct ethtool_ops smsc95xx_ethtool_ops = {
.get_link = usbnet_get_link,
.nway_reset = usbnet_nway_reset,
.get_drvinfo = usbnet_get_drvinfo,
.get_msglevel = usbnet_get_msglevel,
.set_msglevel = usbnet_set_msglevel,
- .get_settings = usbnet_get_settings,
- .set_settings = usbnet_set_settings,
+ .get_settings = smsc95xx_get_settings,
+ .set_settings = smsc95xx_set_settings,
.get_eeprom_len = smsc95xx_ethtool_get_eeprom_len,
.get_eeprom = smsc95xx_ethtool_get_eeprom,
.set_eeprom = smsc95xx_ethtool_set_eeprom,
@@ -1194,6 +1295,8 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf)
if (ret < 0)
return ret;
val >>= 16;
+ pdata->chip_id = val;
+ pdata->mdix_ctrl = get_mdix_status(dev->net);
if ((val == ID_REV_CHIP_ID_9500A_) || (val == ID_REV_CHIP_ID_9530_) ||
(val == ID_REV_CHIP_ID_89530_) || (val == ID_REV_CHIP_ID_9730_))
diff --git a/drivers/net/usb/smsc95xx.h b/drivers/net/usb/smsc95xx.h
index 526faa0c44e6..29a4d9efce7c 100644
--- a/drivers/net/usb/smsc95xx.h
+++ b/drivers/net/usb/smsc95xx.h
@@ -144,6 +144,14 @@
#define BURST_CAP (0x38)
+#define STRAP_STATUS (0x3C)
+#define STRAP_STATUS_PWR_SEL_ (0x00000020)
+#define STRAP_STATUS_AMDIX_EN_ (0x00000010)
+#define STRAP_STATUS_PORT_SWAP_ (0x00000008)
+#define STRAP_STATUS_EEP_SIZE_ (0x00000004)
+#define STRAP_STATUS_RMT_WKP_ (0x00000002)
+#define STRAP_STATUS_EEP_DISABLE_ (0x00000001)
+
#define GPIO_WAKE (0x64)
#define INT_EP_CTL (0x68)
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index f605a3684a7f..199dec033cf8 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -287,7 +287,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
if (!net_eq(dev_net(vxlan->dev), vxlan->net) &&
nla_put_s32(skb, NDA_LINK_NETNSID,
- peernet2id_alloc(dev_net(vxlan->dev), vxlan->net)))
+ peernet2id(dev_net(vxlan->dev), vxlan->net)))
goto nla_put_failure;
if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))
@@ -2103,6 +2103,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
vni, md, flags, udp_sum);
if (err < 0) {
dst_release(ndst);
+ dev->stats.tx_errors++;
return;
}
udp_tunnel6_xmit_skb(ndst, sk, skb, dev,
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 8b72ee710f95..fd82584acd48 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -343,7 +343,8 @@ int rhashtable_init(struct rhashtable *ht,
struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
const void *key,
struct rhash_head *obj,
- struct bucket_table *old_tbl);
+ struct bucket_table *old_tbl,
+ void **data);
int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl);
void rhashtable_walk_enter(struct rhashtable *ht,
@@ -563,8 +564,11 @@ restart:
return NULL;
}
-/* Internal function, please use rhashtable_insert_fast() instead */
-static inline int __rhashtable_insert_fast(
+/* Internal function, please use rhashtable_insert_fast() instead. This
+ * function returns the existing element already in hashes in there is a clash,
+ * otherwise it returns an error via ERR_PTR().
+ */
+static inline void *__rhashtable_insert_fast(
struct rhashtable *ht, const void *key, struct rhash_head *obj,
const struct rhashtable_params params)
{
@@ -577,6 +581,7 @@ static inline int __rhashtable_insert_fast(
spinlock_t *lock;
unsigned int elasticity;
unsigned int hash;
+ void *data = NULL;
int err;
restart:
@@ -601,11 +606,14 @@ restart:
new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
if (unlikely(new_tbl)) {
- tbl = rhashtable_insert_slow(ht, key, obj, new_tbl);
+ tbl = rhashtable_insert_slow(ht, key, obj, new_tbl, &data);
if (!IS_ERR_OR_NULL(tbl))
goto slow_path;
err = PTR_ERR(tbl);
+ if (err == -EEXIST)
+ err = 0;
+
goto out;
}
@@ -619,25 +627,25 @@ slow_path:
err = rhashtable_insert_rehash(ht, tbl);
rcu_read_unlock();
if (err)
- return err;
+ return ERR_PTR(err);
goto restart;
}
- err = -EEXIST;
+ err = 0;
elasticity = ht->elasticity;
rht_for_each(head, tbl, hash) {
if (key &&
unlikely(!(params.obj_cmpfn ?
params.obj_cmpfn(&arg, rht_obj(ht, head)) :
- rhashtable_compare(&arg, rht_obj(ht, head)))))
+ rhashtable_compare(&arg, rht_obj(ht, head))))) {
+ data = rht_obj(ht, head);
goto out;
+ }
if (!--elasticity)
goto slow_path;
}
- err = 0;
-
head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
RCU_INIT_POINTER(obj->next, head);
@@ -652,7 +660,7 @@ out:
spin_unlock_bh(lock);
rcu_read_unlock();
- return err;
+ return err ? ERR_PTR(err) : data;
}
/**
@@ -675,7 +683,13 @@ static inline int rhashtable_insert_fast(
struct rhashtable *ht, struct rhash_head *obj,
const struct rhashtable_params params)
{
- return __rhashtable_insert_fast(ht, NULL, obj, params);
+ void *ret;
+
+ ret = __rhashtable_insert_fast(ht, NULL, obj, params);
+ if (IS_ERR(ret))
+ return PTR_ERR(ret);
+
+ return ret == NULL ? 0 : -EEXIST;
}
/**
@@ -704,11 +718,15 @@ static inline int rhashtable_lookup_insert_fast(
const struct rhashtable_params params)
{
const char *key = rht_obj(ht, obj);
+ void *ret;
BUG_ON(ht->p.obj_hashfn);
- return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj,
- params);
+ ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params);
+ if (IS_ERR(ret))
+ return PTR_ERR(ret);
+
+ return ret == NULL ? 0 : -EEXIST;
}
/**
@@ -737,6 +755,32 @@ static inline int rhashtable_lookup_insert_key(
struct rhashtable *ht, const void *key, struct rhash_head *obj,
const struct rhashtable_params params)
{
+ void *ret;
+
+ BUG_ON(!ht->p.obj_hashfn || !key);
+
+ ret = __rhashtable_insert_fast(ht, key, obj, params);
+ if (IS_ERR(ret))
+ return PTR_ERR(ret);
+
+ return ret == NULL ? 0 : -EEXIST;
+}
+
+/**
+ * rhashtable_lookup_get_insert_key - lookup and insert object into hash table
+ * @ht: hash table
+ * @obj: pointer to hash head inside object
+ * @params: hash table parameters
+ * @data: pointer to element data already in hashes
+ *
+ * Just like rhashtable_lookup_insert_key(), but this function returns the
+ * object if it exists, NULL if it does not and the insertion was successful,
+ * and an ERR_PTR otherwise.
+ */
+static inline void *rhashtable_lookup_get_insert_key(
+ struct rhashtable *ht, const void *key, struct rhash_head *obj,
+ const struct rhashtable_params params)
+{
BUG_ON(!ht->p.obj_hashfn || !key);
return __rhashtable_insert_fast(ht, key, obj, params);
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 445b019c2078..50418052a520 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -42,7 +42,6 @@ union nf_conntrack_expect_proto {
#include <linux/types.h>
#include <linux/skbuff.h>
-#include <linux/timer.h>
#ifdef CONFIG_NETFILTER_DEBUG
#define NF_CT_ASSERT(x) WARN_ON(!(x))
@@ -73,7 +72,7 @@ struct nf_conn_help {
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
struct nf_conn {
- /* Usage count in here is 1 for hash table/destruct timer, 1 per skb,
+ /* Usage count in here is 1 for hash table, 1 per skb,
* plus 1 for any connection(s) we are `master' for
*
* Hint, SKB address this struct and refcnt via skb->nfct and
@@ -96,8 +95,8 @@ struct nf_conn {
/* Have we seen traffic both ways yet? (bitset) */
unsigned long status;
- /* Timer function; drops refcnt when it goes off. */
- struct timer_list timeout;
+ /* jiffies32 when this ct is considered dead */
+ u32 timeout;
possible_net_t ct_net;
@@ -220,21 +219,14 @@ static inline void nf_ct_refresh(struct nf_conn *ct,
__nf_ct_refresh_acct(ct, 0, skb, extra_jiffies, 0);
}
-bool __nf_ct_kill_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
- const struct sk_buff *skb, int do_acct);
-
/* kill conntrack and do accounting */
-static inline bool nf_ct_kill_acct(struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- const struct sk_buff *skb)
-{
- return __nf_ct_kill_acct(ct, ctinfo, skb, 1);
-}
+bool nf_ct_kill_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+ const struct sk_buff *skb);
/* kill conntrack without accounting */
static inline bool nf_ct_kill(struct nf_conn *ct)
{
- return __nf_ct_kill_acct(ct, 0, NULL, 0);
+ return nf_ct_delete(ct, 0, 0);
}
/* These are for NAT. Icky. */
@@ -291,21 +283,55 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb)
return skb->dev && skb->skb_iif && skb->dev->flags & IFF_LOOPBACK;
}
+#define nfct_time_stamp ((u32)(jiffies))
+
/* jiffies until ct expires, 0 if already expired */
static inline unsigned long nf_ct_expires(const struct nf_conn *ct)
{
- long timeout = (long)ct->timeout.expires - (long)jiffies;
+ s32 timeout = ct->timeout - nfct_time_stamp;
return timeout > 0 ? timeout : 0;
}
+static inline bool nf_ct_is_expired(const struct nf_conn *ct)
+{
+ return (__s32)(ct->timeout - nfct_time_stamp) <= 0;
+}
+
+/* use after obtaining a reference count */
+static inline bool nf_ct_should_gc(const struct nf_conn *ct)
+{
+ return nf_ct_is_expired(ct) && nf_ct_is_confirmed(ct) &&
+ !nf_ct_is_dying(ct);
+}
+
struct kernel_param;
int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp);
int nf_conntrack_hash_resize(unsigned int hashsize);
+
+extern struct hlist_nulls_head *nf_conntrack_hash;
extern unsigned int nf_conntrack_htable_size;
+extern seqcount_t nf_conntrack_generation;
extern unsigned int nf_conntrack_max;
+/* must be called with rcu read lock held */
+static inline void
+nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize)
+{
+ struct hlist_nulls_head *hptr;
+ unsigned int sequence, hsz;
+
+ do {
+ sequence = read_seqcount_begin(&nf_conntrack_generation);
+ hsz = nf_conntrack_htable_size;
+ hptr = nf_conntrack_hash;
+ } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+
+ *hash = hptr;
+ *hsize = hsz;
+}
+
struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
const struct nf_conntrack_zone *zone,
gfp_t flags);
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 79d7ac5c9740..62e17d1319ff 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -51,8 +51,6 @@ bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto);
-void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize);
-
/* Find a connection corresponding to a tuple. */
struct nf_conntrack_tuple_hash *
nf_conntrack_find_get(struct net *net,
@@ -83,7 +81,6 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
#define CONNTRACK_LOCKS 1024
-extern struct hlist_nulls_head *nf_conntrack_hash;
extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
void nf_conntrack_lock(spinlock_t *lock);
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index fa36447371c6..12d967b58726 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -12,12 +12,19 @@
#include <linux/netfilter/nf_conntrack_tuple_common.h>
#include <net/netfilter/nf_conntrack_extend.h>
+enum nf_ct_ecache_state {
+ NFCT_ECACHE_UNKNOWN, /* destroy event not sent */
+ NFCT_ECACHE_DESTROY_FAIL, /* tried but failed to send destroy event */
+ NFCT_ECACHE_DESTROY_SENT, /* sent destroy event after failure */
+};
+
struct nf_conntrack_ecache {
- unsigned long cache; /* bitops want long */
- unsigned long missed; /* missed events */
- u16 ctmask; /* bitmask of ct events to be delivered */
- u16 expmask; /* bitmask of expect events to be delivered */
- u32 portid; /* netlink portid of destroyer */
+ unsigned long cache; /* bitops want long */
+ unsigned long missed; /* missed events */
+ u16 ctmask; /* bitmask of ct events to be delivered */
+ u16 expmask; /* bitmask of expect events to be delivered */
+ u32 portid; /* netlink portid of destroyer */
+ enum nf_ct_ecache_state state; /* ecache state */
};
static inline struct nf_conntrack_ecache *
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 1a5fb36f165f..de629f1520df 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -134,14 +134,6 @@ void nf_ct_l4proto_pernet_unregister(struct net *net,
int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto);
void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto);
-static inline void nf_ct_kfree_compat_sysctl_table(struct nf_proto_net *pn)
-{
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
- kfree(pn->ctl_compat_table);
- pn->ctl_compat_table = NULL;
-#endif
-}
-
/* Generic netlink helpers */
int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple);
diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h
index 83d855ba6af1..ee07dc8b0a7b 100644
--- a/include/net/netfilter/nf_log.h
+++ b/include/net/netfilter/nf_log.h
@@ -60,8 +60,7 @@ struct nf_logger {
int nf_log_register(u_int8_t pf, struct nf_logger *logger);
void nf_log_unregister(struct nf_logger *logger);
-void nf_log_set(struct net *net, u_int8_t pf,
- const struct nf_logger *logger);
+int nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger);
void nf_log_unset(struct net *net, const struct nf_logger *logger);
int nf_log_bind_pf(struct net *net, u_int8_t pf,
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index f2f13399ce44..8972468bc94b 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -251,7 +251,8 @@ struct nft_set_ops {
int (*insert)(const struct net *net,
const struct nft_set *set,
- const struct nft_set_elem *elem);
+ const struct nft_set_elem *elem,
+ struct nft_set_ext **ext);
void (*activate)(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem);
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 38b1a80517f0..e469e85de3f9 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -15,10 +15,6 @@ struct nf_proto_net {
#ifdef CONFIG_SYSCTL
struct ctl_table_header *ctl_table_header;
struct ctl_table *ctl_table;
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
- struct ctl_table_header *ctl_compat_header;
- struct ctl_table *ctl_compat_table;
-#endif
#endif
unsigned int users;
};
@@ -58,10 +54,6 @@ struct nf_ip_net {
struct nf_udp_net udp;
struct nf_icmp_net icmp;
struct nf_icmp_net icmpv6;
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
- struct ctl_table_header *ctl_table_header;
- struct ctl_table *ctl_table;
-#endif
};
struct ct_pcpu {
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index c674ba2563b7..28ce01d79707 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -724,6 +724,26 @@ enum nft_meta_keys {
};
/**
+ * enum nft_hash_attributes - nf_tables hash expression netlink attributes
+ *
+ * @NFTA_HASH_SREG: source register (NLA_U32)
+ * @NFTA_HASH_DREG: destination register (NLA_U32)
+ * @NFTA_HASH_LEN: source data length (NLA_U32)
+ * @NFTA_HASH_MODULUS: modulus value (NLA_U32)
+ * @NFTA_HASH_SEED: seed value (NLA_U32)
+ */
+enum nft_hash_attributes {
+ NFTA_HASH_UNSPEC,
+ NFTA_HASH_SREG,
+ NFTA_HASH_DREG,
+ NFTA_HASH_LEN,
+ NFTA_HASH_MODULUS,
+ NFTA_HASH_SEED,
+ __NFTA_HASH_MAX,
+};
+#define NFTA_HASH_MAX (__NFTA_HASH_MAX - 1)
+
+/**
* enum nft_meta_attributes - nf_tables meta expression netlink attributes
*
* @NFTA_META_DREG: destination register (NLA_U32)
@@ -880,6 +900,25 @@ enum nft_queue_attributes {
#define NFT_QUEUE_FLAG_CPU_FANOUT 0x02 /* use current CPU (no hashing) */
#define NFT_QUEUE_FLAG_MASK 0x03
+enum nft_quota_flags {
+ NFT_QUOTA_F_INV = (1 << 0),
+};
+
+/**
+ * enum nft_quota_attributes - nf_tables quota expression netlink attributes
+ *
+ * @NFTA_QUOTA_BYTES: quota in bytes (NLA_U16)
+ * @NFTA_QUOTA_FLAGS: flags (NLA_U32)
+ */
+enum nft_quota_attributes {
+ NFTA_QUOTA_UNSPEC,
+ NFTA_QUOTA_BYTES,
+ NFTA_QUOTA_FLAGS,
+ NFTA_QUOTA_PAD,
+ __NFTA_QUOTA_MAX
+};
+#define NFTA_QUOTA_MAX (__NFTA_QUOTA_MAX - 1)
+
/**
* enum nft_reject_types - nf_tables reject expression reject types
*
@@ -1051,7 +1090,7 @@ enum nft_gen_attributes {
* @NFTA_TRACE_NFPROTO: nf protocol processed (NLA_U32)
* @NFTA_TRACE_POLICY: policy that decided fate of packet (NLA_U32)
*/
-enum nft_trace_attibutes {
+enum nft_trace_attributes {
NFTA_TRACE_UNSPEC,
NFTA_TRACE_TABLE,
NFTA_TRACE_CHAIN,
@@ -1082,4 +1121,28 @@ enum nft_trace_types {
__NFT_TRACETYPE_MAX
};
#define NFT_TRACETYPE_MAX (__NFT_TRACETYPE_MAX - 1)
+
+/**
+ * enum nft_ng_attributes - nf_tables number generator expression netlink attributes
+ *
+ * @NFTA_NG_DREG: destination register (NLA_U32)
+ * @NFTA_NG_UNTIL: source value to increment the counter until reset (NLA_U32)
+ * @NFTA_NG_TYPE: operation type (NLA_U32)
+ */
+enum nft_ng_attributes {
+ NFTA_NG_UNSPEC,
+ NFTA_NG_DREG,
+ NFTA_NG_UNTIL,
+ NFTA_NG_TYPE,
+ __NFTA_NG_MAX
+};
+#define NFTA_NG_MAX (__NFTA_NG_MAX - 1)
+
+enum nft_ng_types {
+ NFT_NG_INCREMENTAL,
+ NFT_NG_RANDOM,
+ __NFT_NG_MAX
+};
+#define NFT_NG_MAX (__NFT_NG_MAX - 1)
+
#endif /* _LINUX_NF_TABLES_H */
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 4320b92ef696..06c28728bb53 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -444,7 +444,8 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_rehash);
struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
const void *key,
struct rhash_head *obj,
- struct bucket_table *tbl)
+ struct bucket_table *tbl,
+ void **data)
{
struct rhash_head *head;
unsigned int hash;
@@ -455,8 +456,11 @@ struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING);
err = -EEXIST;
- if (key && rhashtable_lookup_fast(ht, key, ht->p))
- goto exit;
+ if (key) {
+ *data = rhashtable_lookup_fast(ht, key, ht->p);
+ if (*data)
+ goto exit;
+ }
err = -E2BIG;
if (unlikely(rht_grow_above_max(ht, tbl)))
diff --git a/net/bridge/netfilter/nf_log_bridge.c b/net/bridge/netfilter/nf_log_bridge.c
index 5d9953a90929..1663df598545 100644
--- a/net/bridge/netfilter/nf_log_bridge.c
+++ b/net/bridge/netfilter/nf_log_bridge.c
@@ -50,8 +50,7 @@ static struct nf_logger nf_bridge_logger __read_mostly = {
static int __net_init nf_log_bridge_net_init(struct net *net)
{
- nf_log_set(net, NFPROTO_BRIDGE, &nf_bridge_logger);
- return 0;
+ return nf_log_set(net, NFPROTO_BRIDGE, &nf_bridge_logger);
}
static void __net_exit nf_log_bridge_net_exit(struct net *net)
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 7a77dcabd4e8..42bdda0e616b 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -215,31 +215,29 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id);
*/
int peernet2id_alloc(struct net *net, struct net *peer)
{
- unsigned long flags;
bool alloc;
int id;
- spin_lock_irqsave(&net->nsid_lock, flags);
+ spin_lock_bh(&net->nsid_lock);
alloc = atomic_read(&peer->count) == 0 ? false : true;
id = __peernet2id_alloc(net, peer, &alloc);
- spin_unlock_irqrestore(&net->nsid_lock, flags);
+ spin_unlock_bh(&net->nsid_lock);
if (alloc && id >= 0)
rtnl_net_notifyid(net, RTM_NEWNSID, id);
return id;
}
-EXPORT_SYMBOL(peernet2id_alloc);
/* This function returns, if assigned, the id of a peer netns. */
int peernet2id(struct net *net, struct net *peer)
{
- unsigned long flags;
int id;
- spin_lock_irqsave(&net->nsid_lock, flags);
+ spin_lock_bh(&net->nsid_lock);
id = __peernet2id(net, peer);
- spin_unlock_irqrestore(&net->nsid_lock, flags);
+ spin_unlock_bh(&net->nsid_lock);
return id;
}
+EXPORT_SYMBOL(peernet2id);
/* This function returns true is the peer netns has an id assigned into the
* current netns.
@@ -251,18 +249,17 @@ bool peernet_has_id(struct net *net, struct net *peer)
struct net *get_net_ns_by_id(struct net *net, int id)
{
- unsigned long flags;
struct net *peer;
if (id < 0)
return NULL;
rcu_read_lock();
- spin_lock_irqsave(&net->nsid_lock, flags);
+ spin_lock_bh(&net->nsid_lock);
peer = idr_find(&net->netns_ids, id);
if (peer)
get_net(peer);
- spin_unlock_irqrestore(&net->nsid_lock, flags);
+ spin_unlock_bh(&net->nsid_lock);
rcu_read_unlock();
return peer;
@@ -406,17 +403,17 @@ static void cleanup_net(struct work_struct *work)
for_each_net(tmp) {
int id;
- spin_lock_irq(&tmp->nsid_lock);
+ spin_lock_bh(&tmp->nsid_lock);
id = __peernet2id(tmp, net);
if (id >= 0)
idr_remove(&tmp->netns_ids, id);
- spin_unlock_irq(&tmp->nsid_lock);
+ spin_unlock_bh(&tmp->nsid_lock);
if (id >= 0)
rtnl_net_notifyid(tmp, RTM_DELNSID, id);
}
- spin_lock_irq(&net->nsid_lock);
+ spin_lock_bh(&net->nsid_lock);
idr_destroy(&net->netns_ids);
- spin_unlock_irq(&net->nsid_lock);
+ spin_unlock_bh(&net->nsid_lock);
}
rtnl_unlock();
@@ -544,7 +541,6 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[NETNSA_MAX + 1];
- unsigned long flags;
struct net *peer;
int nsid, err;
@@ -565,15 +561,15 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
if (IS_ERR(peer))
return PTR_ERR(peer);
- spin_lock_irqsave(&net->nsid_lock, flags);
+ spin_lock_bh(&net->nsid_lock);
if (__peernet2id(net, peer) >= 0) {
- spin_unlock_irqrestore(&net->nsid_lock, flags);
+ spin_unlock_bh(&net->nsid_lock);
err = -EEXIST;
goto out;
}
err = alloc_netid(net, peer, nsid);
- spin_unlock_irqrestore(&net->nsid_lock, flags);
+ spin_unlock_bh(&net->nsid_lock);
if (err >= 0) {
rtnl_net_notifyid(net, RTM_NEWNSID, err);
err = 0;
@@ -695,11 +691,10 @@ static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
.idx = 0,
.s_idx = cb->args[0],
};
- unsigned long flags;
- spin_lock_irqsave(&net->nsid_lock, flags);
+ spin_lock_bh(&net->nsid_lock);
idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb);
- spin_unlock_irqrestore(&net->nsid_lock, flags);
+ spin_unlock_bh(&net->nsid_lock);
cb->args[0] = net_cb.idx;
return skb->len;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index c187c60e3e0c..d613309e3e5d 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -25,17 +25,6 @@ config NF_CONNTRACK_IPV4
To compile it as a module, choose M here. If unsure, say N.
-config NF_CONNTRACK_PROC_COMPAT
- bool "proc/sysctl compatibility with old connection tracking"
- depends on NF_CONNTRACK_PROCFS && NF_CONNTRACK_IPV4
- default y
- help
- This option enables /proc and sysctl compatibility with the old
- layer 3 dependent connection tracking. This is needed to keep
- old programs that have not been adapted to the new names working.
-
- If unsure, say Y.
-
if NF_TABLES
config NF_TABLES_IPV4
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 87b073da14c9..853328f8fd05 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -4,11 +4,6 @@
# objects for l3 independent conntrack
nf_conntrack_ipv4-y := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
-ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y)
-ifeq ($(CONFIG_PROC_FS),y)
-nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o
-endif
-endif
# connection tracking
obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index ae1a71a97132..870aebda2932 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -202,47 +202,6 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
},
};
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
-static int log_invalid_proto_min = 0;
-static int log_invalid_proto_max = 255;
-
-static struct ctl_table ip_ct_sysctl_table[] = {
- {
- .procname = "ip_conntrack_max",
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "ip_conntrack_count",
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "ip_conntrack_buckets",
- .maxlen = sizeof(unsigned int),
- .mode = 0444,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "ip_conntrack_checksum",
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "ip_conntrack_log_invalid",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &log_invalid_proto_min,
- .extra2 = &log_invalid_proto_max,
- },
- { }
-};
-#endif /* CONFIG_SYSCTL && CONFIG_NF_CONNTRACK_PROC_COMPAT */
-
/* Fast function for those who don't want to parse /proc (and I don't
blame them). */
/* Reversing the socket's dst/src point of view gives us the reply
@@ -350,20 +309,6 @@ static struct nf_sockopt_ops so_getorigdst = {
static int ipv4_init_net(struct net *net)
{
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
- struct nf_ip_net *in = &net->ct.nf_ct_proto;
- in->ctl_table = kmemdup(ip_ct_sysctl_table,
- sizeof(ip_ct_sysctl_table),
- GFP_KERNEL);
- if (!in->ctl_table)
- return -ENOMEM;
-
- in->ctl_table[0].data = &nf_conntrack_max;
- in->ctl_table[1].data = &net->ct.count;
- in->ctl_table[2].data = &nf_conntrack_htable_size;
- in->ctl_table[3].data = &net->ct.sysctl_checksum;
- in->ctl_table[4].data = &net->ct.sysctl_log_invalid;
-#endif
return 0;
}
@@ -380,9 +325,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
.nlattr_to_tuple = ipv4_nlattr_to_tuple,
.nla_policy = ipv4_nla_policy,
#endif
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
- .ctl_table_path = "net/ipv4/netfilter",
-#endif
.init_net = ipv4_init_net,
.me = THIS_MODULE,
};
@@ -492,16 +434,7 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
goto cleanup_icmpv4;
}
-#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
- ret = nf_conntrack_ipv4_compat_init();
- if (ret < 0)
- goto cleanup_proto;
-#endif
return ret;
-#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
- cleanup_proto:
- nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
-#endif
cleanup_icmpv4:
nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp);
cleanup_udp4:
@@ -520,9 +453,6 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
static void __exit nf_conntrack_l3proto_ipv4_fini(void)
{
synchronize_net();
-#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
- nf_conntrack_ipv4_compat_fini();
-#endif
nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp);
nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
deleted file mode 100644
index 63923710f325..000000000000
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ /dev/null
@@ -1,492 +0,0 @@
-/* ip_conntrack proc compat - based on ip_conntrack_standalone.c
- *
- * (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- * (C) 2006-2010 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/types.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/percpu.h>
-#include <linux/security.h>
-#include <net/net_namespace.h>
-
-#include <linux/netfilter.h>
-#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
-#include <net/netfilter/nf_conntrack_l4proto.h>
-#include <net/netfilter/nf_conntrack_expect.h>
-#include <net/netfilter/nf_conntrack_acct.h>
-#include <linux/rculist_nulls.h>
-#include <linux/export.h>
-
-struct ct_iter_state {
- struct seq_net_private p;
- struct hlist_nulls_head *hash;
- unsigned int htable_size;
- unsigned int bucket;
-};
-
-static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
-{
- struct ct_iter_state *st = seq->private;
- struct hlist_nulls_node *n;
-
- for (st->bucket = 0;
- st->bucket < st->htable_size;
- st->bucket++) {
- n = rcu_dereference(
- hlist_nulls_first_rcu(&st->hash[st->bucket]));
- if (!is_a_nulls(n))
- return n;
- }
- return NULL;
-}
-
-static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
- struct hlist_nulls_node *head)
-{
- struct ct_iter_state *st = seq->private;
-
- head = rcu_dereference(hlist_nulls_next_rcu(head));
- while (is_a_nulls(head)) {
- if (likely(get_nulls_value(head) == st->bucket)) {
- if (++st->bucket >= st->htable_size)
- return NULL;
- }
- head = rcu_dereference(
- hlist_nulls_first_rcu(&st->hash[st->bucket]));
- }
- return head;
-}
-
-static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
-{
- struct hlist_nulls_node *head = ct_get_first(seq);
-
- if (head)
- while (pos && (head = ct_get_next(seq, head)))
- pos--;
- return pos ? NULL : head;
-}
-
-static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(RCU)
-{
- struct ct_iter_state *st = seq->private;
-
- rcu_read_lock();
-
- nf_conntrack_get_ht(&st->hash, &st->htable_size);
- return ct_get_idx(seq, *pos);
-}
-
-static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
-{
- (*pos)++;
- return ct_get_next(s, v);
-}
-
-static void ct_seq_stop(struct seq_file *s, void *v)
- __releases(RCU)
-{
- rcu_read_unlock();
-}
-
-#ifdef CONFIG_NF_CONNTRACK_SECMARK
-static void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
-{
- int ret;
- u32 len;
- char *secctx;
-
- ret = security_secid_to_secctx(ct->secmark, &secctx, &len);
- if (ret)
- return;
-
- seq_printf(s, "secctx=%s ", secctx);
-
- security_release_secctx(secctx, len);
-}
-#else
-static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
-{
-}
-#endif
-
-static bool ct_seq_should_skip(const struct nf_conn *ct,
- const struct net *net,
- const struct nf_conntrack_tuple_hash *hash)
-{
- /* we only want to print DIR_ORIGINAL */
- if (NF_CT_DIRECTION(hash))
- return true;
-
- if (nf_ct_l3num(ct) != AF_INET)
- return true;
-
- if (!net_eq(nf_ct_net(ct), net))
- return true;
-
- return false;
-}
-
-static int ct_seq_show(struct seq_file *s, void *v)
-{
- struct nf_conntrack_tuple_hash *hash = v;
- struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
- const struct nf_conntrack_l3proto *l3proto;
- const struct nf_conntrack_l4proto *l4proto;
- int ret = 0;
-
- NF_CT_ASSERT(ct);
- if (ct_seq_should_skip(ct, seq_file_net(s), hash))
- return 0;
-
- if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
- return 0;
-
- /* check if we raced w. object reuse */
- if (!nf_ct_is_confirmed(ct) ||
- ct_seq_should_skip(ct, seq_file_net(s), hash))
- goto release;
-
- l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
- NF_CT_ASSERT(l3proto);
- l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
- NF_CT_ASSERT(l4proto);
-
- ret = -ENOSPC;
- seq_printf(s, "%-8s %u %ld ",
- l4proto->name, nf_ct_protonum(ct),
- timer_pending(&ct->timeout)
- ? (long)(ct->timeout.expires - jiffies)/HZ : 0);
-
- if (l4proto->print_conntrack)
- l4proto->print_conntrack(s, ct);
-
- if (seq_has_overflowed(s))
- goto release;
-
- print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
- l3proto, l4proto);
-
- if (seq_has_overflowed(s))
- goto release;
-
- if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL))
- goto release;
-
- if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
- seq_printf(s, "[UNREPLIED] ");
-
- print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
- l3proto, l4proto);
-
- if (seq_has_overflowed(s))
- goto release;
-
- if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
- goto release;
-
- if (test_bit(IPS_ASSURED_BIT, &ct->status))
- seq_printf(s, "[ASSURED] ");
-
-#ifdef CONFIG_NF_CONNTRACK_MARK
- seq_printf(s, "mark=%u ", ct->mark);
-#endif
-
- ct_show_secctx(s, ct);
-
- seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use));
-
- if (seq_has_overflowed(s))
- goto release;
-
- ret = 0;
-release:
- nf_ct_put(ct);
- return ret;
-}
-
-static const struct seq_operations ct_seq_ops = {
- .start = ct_seq_start,
- .next = ct_seq_next,
- .stop = ct_seq_stop,
- .show = ct_seq_show
-};
-
-static int ct_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &ct_seq_ops,
- sizeof(struct ct_iter_state));
-}
-
-static const struct file_operations ct_file_ops = {
- .owner = THIS_MODULE,
- .open = ct_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
-/* expects */
-struct ct_expect_iter_state {
- struct seq_net_private p;
- unsigned int bucket;
-};
-
-static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
-{
- struct ct_expect_iter_state *st = seq->private;
- struct hlist_node *n;
-
- for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
- n = rcu_dereference(
- hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
- if (n)
- return n;
- }
- return NULL;
-}
-
-static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
- struct hlist_node *head)
-{
- struct ct_expect_iter_state *st = seq->private;
-
- head = rcu_dereference(hlist_next_rcu(head));
- while (head == NULL) {
- if (++st->bucket >= nf_ct_expect_hsize)
- return NULL;
- head = rcu_dereference(
- hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
- }
- return head;
-}
-
-static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
-{
- struct hlist_node *head = ct_expect_get_first(seq);
-
- if (head)
- while (pos && (head = ct_expect_get_next(seq, head)))
- pos--;
- return pos ? NULL : head;
-}
-
-static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(RCU)
-{
- rcu_read_lock();
- return ct_expect_get_idx(seq, *pos);
-}
-
-static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- (*pos)++;
- return ct_expect_get_next(seq, v);
-}
-
-static void exp_seq_stop(struct seq_file *seq, void *v)
- __releases(RCU)
-{
- rcu_read_unlock();
-}
-
-static int exp_seq_show(struct seq_file *s, void *v)
-{
- struct nf_conntrack_expect *exp;
- const struct hlist_node *n = v;
-
- exp = hlist_entry(n, struct nf_conntrack_expect, hnode);
-
- if (!net_eq(nf_ct_net(exp->master), seq_file_net(s)))
- return 0;
-
- if (exp->tuple.src.l3num != AF_INET)
- return 0;
-
- if (exp->timeout.function)
- seq_printf(s, "%ld ", timer_pending(&exp->timeout)
- ? (long)(exp->timeout.expires - jiffies)/HZ : 0);
- else
- seq_printf(s, "- ");
-
- seq_printf(s, "proto=%u ", exp->tuple.dst.protonum);
-
- print_tuple(s, &exp->tuple,
- __nf_ct_l3proto_find(exp->tuple.src.l3num),
- __nf_ct_l4proto_find(exp->tuple.src.l3num,
- exp->tuple.dst.protonum));
- seq_putc(s, '\n');
-
- return 0;
-}
-
-static const struct seq_operations exp_seq_ops = {
- .start = exp_seq_start,
- .next = exp_seq_next,
- .stop = exp_seq_stop,
- .show = exp_seq_show
-};
-
-static int exp_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &exp_seq_ops,
- sizeof(struct ct_expect_iter_state));
-}
-
-static const struct file_operations ip_exp_file_ops = {
- .owner = THIS_MODULE,
- .open = exp_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
-static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
-{
- struct net *net = seq_file_net(seq);
- int cpu;
-
- if (*pos == 0)
- return SEQ_START_TOKEN;
-
- for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
- if (!cpu_possible(cpu))
- continue;
- *pos = cpu+1;
- return per_cpu_ptr(net->ct.stat, cpu);
- }
-
- return NULL;
-}
-
-static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct net *net = seq_file_net(seq);
- int cpu;
-
- for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
- if (!cpu_possible(cpu))
- continue;
- *pos = cpu+1;
- return per_cpu_ptr(net->ct.stat, cpu);
- }
-
- return NULL;
-}
-
-static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
-{
-}
-
-static int ct_cpu_seq_show(struct seq_file *seq, void *v)
-{
- struct net *net = seq_file_net(seq);
- unsigned int nr_conntracks = atomic_read(&net->ct.count);
- const struct ip_conntrack_stat *st = v;
-
- if (v == SEQ_START_TOKEN) {
- seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n");
- return 0;
- }
-
- seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x "
- "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
- nr_conntracks,
- st->searched,
- st->found,
- st->new,
- st->invalid,
- st->ignore,
- st->delete,
- st->delete_list,
- st->insert,
- st->insert_failed,
- st->drop,
- st->early_drop,
- st->error,
-
- st->expect_new,
- st->expect_create,
- st->expect_delete,
- st->search_restart
- );
- return 0;
-}
-
-static const struct seq_operations ct_cpu_seq_ops = {
- .start = ct_cpu_seq_start,
- .next = ct_cpu_seq_next,
- .stop = ct_cpu_seq_stop,
- .show = ct_cpu_seq_show,
-};
-
-static int ct_cpu_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &ct_cpu_seq_ops,
- sizeof(struct seq_net_private));
-}
-
-static const struct file_operations ct_cpu_seq_fops = {
- .owner = THIS_MODULE,
- .open = ct_cpu_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
-static int __net_init ip_conntrack_net_init(struct net *net)
-{
- struct proc_dir_entry *proc, *proc_exp, *proc_stat;
-
- proc = proc_create("ip_conntrack", 0440, net->proc_net, &ct_file_ops);
- if (!proc)
- goto err1;
-
- proc_exp = proc_create("ip_conntrack_expect", 0440, net->proc_net,
- &ip_exp_file_ops);
- if (!proc_exp)
- goto err2;
-
- proc_stat = proc_create("ip_conntrack", S_IRUGO,
- net->proc_net_stat, &ct_cpu_seq_fops);
- if (!proc_stat)
- goto err3;
- return 0;
-
-err3:
- remove_proc_entry("ip_conntrack_expect", net->proc_net);
-err2:
- remove_proc_entry("ip_conntrack", net->proc_net);
-err1:
- return -ENOMEM;
-}
-
-static void __net_exit ip_conntrack_net_exit(struct net *net)
-{
- remove_proc_entry("ip_conntrack", net->proc_net_stat);
- remove_proc_entry("ip_conntrack_expect", net->proc_net);
- remove_proc_entry("ip_conntrack", net->proc_net);
-}
-
-static struct pernet_operations ip_conntrack_net_ops = {
- .init = ip_conntrack_net_init,
- .exit = ip_conntrack_net_exit,
-};
-
-int __init nf_conntrack_ipv4_compat_init(void)
-{
- return register_pernet_subsys(&ip_conntrack_net_ops);
-}
-
-void __exit nf_conntrack_ipv4_compat_fini(void)
-{
- unregister_pernet_subsys(&ip_conntrack_net_ops);
-}
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index c567e1b5d799..4b5904bc2614 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -327,17 +327,6 @@ static struct ctl_table icmp_sysctl_table[] = {
},
{ }
};
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-static struct ctl_table icmp_compat_sysctl_table[] = {
- {
- .procname = "ip_conntrack_icmp_timeout",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- { }
-};
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
#endif /* CONFIG_SYSCTL */
static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn,
@@ -355,40 +344,14 @@ static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn,
return 0;
}
-static int icmp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
- struct nf_icmp_net *in)
-{
-#ifdef CONFIG_SYSCTL
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
- pn->ctl_compat_table = kmemdup(icmp_compat_sysctl_table,
- sizeof(icmp_compat_sysctl_table),
- GFP_KERNEL);
- if (!pn->ctl_compat_table)
- return -ENOMEM;
-
- pn->ctl_compat_table[0].data = &in->timeout;
-#endif
-#endif
- return 0;
-}
-
static int icmp_init_net(struct net *net, u_int16_t proto)
{
- int ret;
struct nf_icmp_net *in = icmp_pernet(net);
struct nf_proto_net *pn = &in->pn;
in->timeout = nf_ct_icmp_timeout;
- ret = icmp_kmemdup_compat_sysctl_table(pn, in);
- if (ret < 0)
- return ret;
-
- ret = icmp_kmemdup_sysctl_table(pn, in);
- if (ret < 0)
- nf_ct_kfree_compat_sysctl_table(pn);
-
- return ret;
+ return icmp_kmemdup_sysctl_table(pn, in);
}
static struct nf_proto_net *icmp_get_net_proto(struct net *net)
diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c
index ceb187308120..cf986e1c7bbd 100644
--- a/net/ipv4/netfilter/nf_dup_ipv4.c
+++ b/net/ipv4/netfilter/nf_dup_ipv4.c
@@ -74,21 +74,19 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum,
nf_conntrack_get(skb->nfct);
#endif
/*
- * If we are in PREROUTING/INPUT, the checksum must be recalculated
- * since the length could have changed as a result of defragmentation.
- *
- * We also decrease the TTL to mitigate potential loops between two
- * hosts.
+ * If we are in PREROUTING/INPUT, decrease the TTL to mitigate potential
+ * loops between two hosts.
*
* Set %IP_DF so that the original source is notified of a potentially
* decreased MTU on the clone route. IPv6 does this too.
+ *
+ * IP header checksum will be recalculated at ip_local_out.
*/
iph = ip_hdr(skb);
iph->frag_off |= htons(IP_DF);
if (hooknum == NF_INET_PRE_ROUTING ||
hooknum == NF_INET_LOCAL_IN)
--iph->ttl;
- ip_send_check(iph);
if (nf_dup_ipv4_route(net, skb, gw, oif)) {
__this_cpu_write(nf_skb_duplicated, true);
diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c
index e7ad950cf9ef..8945c2653814 100644
--- a/net/ipv4/netfilter/nf_log_arp.c
+++ b/net/ipv4/netfilter/nf_log_arp.c
@@ -62,7 +62,7 @@ static void dump_arp_packet(struct nf_log_buf *m,
/* If it's for Ethernet and the lengths are OK, then log the ARP
* payload.
*/
- if (ah->ar_hrd != htons(1) ||
+ if (ah->ar_hrd != htons(ARPHRD_ETHER) ||
ah->ar_hln != ETH_ALEN ||
ah->ar_pln != sizeof(__be32))
return;
@@ -111,8 +111,7 @@ static struct nf_logger nf_arp_logger __read_mostly = {
static int __net_init nf_log_arp_net_init(struct net *net)
{
- nf_log_set(net, NFPROTO_ARP, &nf_arp_logger);
- return 0;
+ return nf_log_set(net, NFPROTO_ARP, &nf_arp_logger);
}
static void __net_exit nf_log_arp_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c
index 076aadda0473..20f225593a8b 100644
--- a/net/ipv4/netfilter/nf_log_ipv4.c
+++ b/net/ipv4/netfilter/nf_log_ipv4.c
@@ -347,8 +347,7 @@ static struct nf_logger nf_ip_logger __read_mostly = {
static int __net_init nf_log_ipv4_net_init(struct net *net)
{
- nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger);
- return 0;
+ return nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger);
}
static void __net_exit nf_log_ipv4_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c
index 8dd869642f45..c1bcf699a23d 100644
--- a/net/ipv6/netfilter/nf_log_ipv6.c
+++ b/net/ipv6/netfilter/nf_log_ipv6.c
@@ -379,8 +379,7 @@ static struct nf_logger nf_ip6_logger __read_mostly = {
static int __net_init nf_log_ipv6_net_init(struct net *net)
{
- nf_log_set(net, NFPROTO_IPV6, &nf_ip6_logger);
- return 0;
+ return nf_log_set(net, NFPROTO_IPV6, &nf_ip6_logger);
}
static void __net_exit nf_log_ipv6_net_exit(struct net *net)
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 9266ceebd112..e8d56d9a4df2 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -474,6 +474,12 @@ config NFT_META
This option adds the "meta" expression that you can use to match and
to set packet metainformation such as the packet mark.
+config NFT_NUMGEN
+ tristate "Netfilter nf_tables number generator module"
+ help
+ This option adds the number generator expression used to perform
+ incremental counting and random numbers bound to a upper limit.
+
config NFT_CT
depends on NF_CONNTRACK
tristate "Netfilter nf_tables conntrack module"
@@ -481,13 +487,13 @@ config NFT_CT
This option adds the "meta" expression that you can use to match
connection tracking information such as the flow state.
-config NFT_RBTREE
+config NFT_SET_RBTREE
tristate "Netfilter nf_tables rbtree set module"
help
This option adds the "rbtree" set type (Red Black tree) that is used
to build interval-based sets.
-config NFT_HASH
+config NFT_SET_HASH
tristate "Netfilter nf_tables hash set module"
help
This option adds the "hash" set type that is used to build one-way
@@ -542,6 +548,12 @@ config NFT_QUEUE
This is required if you intend to use the userspace queueing
infrastructure (also known as NFQUEUE) from nftables.
+config NFT_QUOTA
+ tristate "Netfilter nf_tables quota module"
+ help
+ This option adds the "quota" expression that you can use to match
+ enforce bytes quotas.
+
config NFT_REJECT
default m if NETFILTER_ADVANCED=n
tristate "Netfilter nf_tables reject support"
@@ -563,6 +575,12 @@ config NFT_COMPAT
x_tables match/target extensions over the nf_tables
framework.
+config NFT_HASH
+ tristate "Netfilter nf_tables hash module"
+ help
+ This option adds the "hash" expression that you can use to perform
+ a hash operation on registers.
+
if NF_TABLES_NETDEV
config NF_DUP_NETDEV
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 69134541d65b..0c8581100ac6 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -80,18 +80,21 @@ obj-$(CONFIG_NF_TABLES_NETDEV) += nf_tables_netdev.o
obj-$(CONFIG_NFT_COMPAT) += nft_compat.o
obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o
obj-$(CONFIG_NFT_META) += nft_meta.o
+obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o
obj-$(CONFIG_NFT_CT) += nft_ct.o
obj-$(CONFIG_NFT_LIMIT) += nft_limit.o
obj-$(CONFIG_NFT_NAT) += nft_nat.o
obj-$(CONFIG_NFT_QUEUE) += nft_queue.o
+obj-$(CONFIG_NFT_QUOTA) += nft_quota.o
obj-$(CONFIG_NFT_REJECT) += nft_reject.o
obj-$(CONFIG_NFT_REJECT_INET) += nft_reject_inet.o
-obj-$(CONFIG_NFT_RBTREE) += nft_rbtree.o
-obj-$(CONFIG_NFT_HASH) += nft_hash.o
+obj-$(CONFIG_NFT_SET_RBTREE) += nft_set_rbtree.o
+obj-$(CONFIG_NFT_SET_HASH) += nft_set_hash.o
obj-$(CONFIG_NFT_COUNTER) += nft_counter.o
obj-$(CONFIG_NFT_LOG) += nft_log.o
obj-$(CONFIG_NFT_MASQ) += nft_masq.o
obj-$(CONFIG_NFT_REDIR) += nft_redir.o
+obj-$(CONFIG_NFT_HASH) += nft_hash.o
# nf_tables netdev
obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index f04fd8df210b..fc230d99aa3b 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -281,13 +281,10 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
h = nf_conntrack_find_get(cp->ipvs->net, &nf_ct_zone_dflt, &tuple);
if (h) {
ct = nf_ct_tuplehash_to_ctrack(h);
- /* Show what happens instead of calling nf_ct_kill() */
- if (del_timer(&ct->timeout)) {
- IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple="
+ if (nf_ct_kill(ct)) {
+ IP_VS_DBG(7, "%s: ct=%p, deleted conntrack for tuple="
FMT_TUPLE "\n",
__func__, ct, ARG_TUPLE(&tuple));
- if (ct->timeout.function)
- ct->timeout.function(ct->timeout.data);
} else {
IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple="
FMT_TUPLE "\n",
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index dd2c43abf9e2..ac1db4019d5c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -72,12 +72,24 @@ EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
struct hlist_nulls_head *nf_conntrack_hash __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_hash);
+struct conntrack_gc_work {
+ struct delayed_work dwork;
+ u32 last_bucket;
+ bool exiting;
+};
+
static __read_mostly struct kmem_cache *nf_conntrack_cachep;
static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
-static __read_mostly seqcount_t nf_conntrack_generation;
static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
static __read_mostly bool nf_conntrack_locks_all;
+#define GC_MAX_BUCKETS_DIV 64u
+#define GC_MAX_BUCKETS 8192u
+#define GC_INTERVAL (5 * HZ)
+#define GC_MAX_EVICTS 256u
+
+static struct conntrack_gc_work conntrack_gc_work;
+
void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
{
spin_lock(lock);
@@ -164,7 +176,7 @@ unsigned int nf_conntrack_htable_size __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
unsigned int nf_conntrack_max __read_mostly;
-EXPORT_SYMBOL_GPL(nf_conntrack_max);
+seqcount_t nf_conntrack_generation __read_mostly;
DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
@@ -372,7 +384,6 @@ destroy_conntrack(struct nf_conntrack *nfct)
pr_debug("destroy_conntrack(%p)\n", ct);
NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
- NF_CT_ASSERT(!timer_pending(&ct->timeout));
if (unlikely(nf_ct_is_template(ct))) {
nf_ct_tmpl_free(ct);
@@ -435,35 +446,30 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
{
struct nf_conn_tstamp *tstamp;
+ if (test_and_set_bit(IPS_DYING_BIT, &ct->status))
+ return false;
+
tstamp = nf_conn_tstamp_find(ct);
if (tstamp && tstamp->stop == 0)
tstamp->stop = ktime_get_real_ns();
- if (nf_ct_is_dying(ct))
- goto delete;
-
if (nf_conntrack_event_report(IPCT_DESTROY, ct,
portid, report) < 0) {
- /* destroy event was not delivered */
+ /* destroy event was not delivered. nf_ct_put will
+ * be done by event cache worker on redelivery.
+ */
nf_ct_delete_from_lists(ct);
nf_conntrack_ecache_delayed_work(nf_ct_net(ct));
return false;
}
nf_conntrack_ecache_work(nf_ct_net(ct));
- set_bit(IPS_DYING_BIT, &ct->status);
- delete:
nf_ct_delete_from_lists(ct);
nf_ct_put(ct);
return true;
}
EXPORT_SYMBOL_GPL(nf_ct_delete);
-static void death_by_timeout(unsigned long ul_conntrack)
-{
- nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0);
-}
-
static inline bool
nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
const struct nf_conntrack_tuple *tuple,
@@ -481,22 +487,17 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
net_eq(net, nf_ct_net(ct));
}
-/* must be called with rcu read lock held */
-void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize)
+/* caller must hold rcu readlock and none of the nf_conntrack_locks */
+static void nf_ct_gc_expired(struct nf_conn *ct)
{
- struct hlist_nulls_head *hptr;
- unsigned int sequence, hsz;
+ if (!atomic_inc_not_zero(&ct->ct_general.use))
+ return;
- do {
- sequence = read_seqcount_begin(&nf_conntrack_generation);
- hsz = nf_conntrack_htable_size;
- hptr = nf_conntrack_hash;
- } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+ if (nf_ct_should_gc(ct))
+ nf_ct_kill(ct);
- *hash = hptr;
- *hsize = hsz;
+ nf_ct_put(ct);
}
-EXPORT_SYMBOL_GPL(nf_conntrack_get_ht);
/*
* Warning :
@@ -510,16 +511,24 @@ ____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone,
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_head *ct_hash;
struct hlist_nulls_node *n;
- unsigned int bucket, sequence;
+ unsigned int bucket, hsize;
begin:
- do {
- sequence = read_seqcount_begin(&nf_conntrack_generation);
- bucket = scale_hash(hash);
- ct_hash = nf_conntrack_hash;
- } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+ nf_conntrack_get_ht(&ct_hash, &hsize);
+ bucket = reciprocal_scale(hash, hsize);
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) {
+ struct nf_conn *ct;
+
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ if (nf_ct_is_expired(ct)) {
+ nf_ct_gc_expired(ct);
+ continue;
+ }
+
+ if (nf_ct_is_dying(ct))
+ continue;
+
if (nf_ct_key_equal(h, tuple, zone, net)) {
NF_CT_STAT_INC_ATOMIC(net, found);
return h;
@@ -618,7 +627,6 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
zone, net))
goto out;
- add_timer(&ct->timeout);
smp_wmb();
/* The caller holds a reference to this object */
atomic_set(&ct->ct_general.use, 2);
@@ -771,8 +779,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
/* Timer relative to confirmation time, not original
setting time, otherwise we'd get timer wrap in
weird delay cases. */
- ct->timeout.expires += jiffies;
- add_timer(&ct->timeout);
+ ct->timeout += nfct_time_stamp;
atomic_inc(&ct->ct_general.use);
ct->status |= IPS_CONFIRMED;
@@ -823,29 +830,41 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_head *ct_hash;
- unsigned int hash, sequence;
+ unsigned int hash, hsize;
struct hlist_nulls_node *n;
struct nf_conn *ct;
zone = nf_ct_zone(ignored_conntrack);
rcu_read_lock();
- do {
- sequence = read_seqcount_begin(&nf_conntrack_generation);
- hash = hash_conntrack(net, tuple);
- ct_hash = nf_conntrack_hash;
- } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+ begin:
+ nf_conntrack_get_ht(&ct_hash, &hsize);
+ hash = __hash_conntrack(net, tuple, hsize);
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) {
ct = nf_ct_tuplehash_to_ctrack(h);
- if (ct != ignored_conntrack &&
- nf_ct_key_equal(h, tuple, zone, net)) {
+
+ if (ct == ignored_conntrack)
+ continue;
+
+ if (nf_ct_is_expired(ct)) {
+ nf_ct_gc_expired(ct);
+ continue;
+ }
+
+ if (nf_ct_key_equal(h, tuple, zone, net)) {
NF_CT_STAT_INC_ATOMIC(net, found);
rcu_read_unlock();
return 1;
}
NF_CT_STAT_INC_ATOMIC(net, searched);
}
+
+ if (get_nulls_value(n) != hash) {
+ NF_CT_STAT_INC_ATOMIC(net, search_restart);
+ goto begin;
+ }
+
rcu_read_unlock();
return 0;
@@ -867,6 +886,11 @@ static unsigned int early_drop_list(struct net *net,
hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
tmp = nf_ct_tuplehash_to_ctrack(h);
+ if (nf_ct_is_expired(tmp)) {
+ nf_ct_gc_expired(tmp);
+ continue;
+ }
+
if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
!net_eq(nf_ct_net(tmp), net) ||
nf_ct_is_dying(tmp))
@@ -884,7 +908,6 @@ static unsigned int early_drop_list(struct net *net,
*/
if (net_eq(nf_ct_net(tmp), net) &&
nf_ct_is_confirmed(tmp) &&
- del_timer(&tmp->timeout) &&
nf_ct_delete(tmp, 0, 0))
drops++;
@@ -900,14 +923,11 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
for (i = 0; i < NF_CT_EVICTION_RANGE; i++) {
struct hlist_nulls_head *ct_hash;
- unsigned hash, sequence, drops;
+ unsigned int hash, hsize, drops;
rcu_read_lock();
- do {
- sequence = read_seqcount_begin(&nf_conntrack_generation);
- hash = scale_hash(_hash++);
- ct_hash = nf_conntrack_hash;
- } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+ nf_conntrack_get_ht(&ct_hash, &hsize);
+ hash = reciprocal_scale(_hash++, hsize);
drops = early_drop_list(net, &ct_hash[hash]);
rcu_read_unlock();
@@ -921,6 +941,69 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
return false;
}
+static void gc_worker(struct work_struct *work)
+{
+ unsigned int i, goal, buckets = 0, expired_count = 0;
+ unsigned long next_run = GC_INTERVAL;
+ unsigned int ratio, scanned = 0;
+ struct conntrack_gc_work *gc_work;
+
+ gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
+
+ goal = min(nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV, GC_MAX_BUCKETS);
+ i = gc_work->last_bucket;
+
+ do {
+ struct nf_conntrack_tuple_hash *h;
+ struct hlist_nulls_head *ct_hash;
+ struct hlist_nulls_node *n;
+ unsigned int hashsz;
+ struct nf_conn *tmp;
+
+ i++;
+ rcu_read_lock();
+
+ nf_conntrack_get_ht(&ct_hash, &hashsz);
+ if (i >= hashsz)
+ i = 0;
+
+ hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
+ tmp = nf_ct_tuplehash_to_ctrack(h);
+
+ scanned++;
+ if (nf_ct_is_expired(tmp)) {
+ nf_ct_gc_expired(tmp);
+ expired_count++;
+ continue;
+ }
+ }
+
+ /* could check get_nulls_value() here and restart if ct
+ * was moved to another chain. But given gc is best-effort
+ * we will just continue with next hash slot.
+ */
+ rcu_read_unlock();
+ cond_resched_rcu_qs();
+ } while (++buckets < goal &&
+ expired_count < GC_MAX_EVICTS);
+
+ if (gc_work->exiting)
+ return;
+
+ ratio = scanned ? expired_count * 100 / scanned : 0;
+ if (ratio >= 90)
+ next_run = 0;
+
+ gc_work->last_bucket = i;
+ schedule_delayed_work(&gc_work->dwork, next_run);
+}
+
+static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
+{
+ INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
+ gc_work->exiting = false;
+}
+
static struct nf_conn *
__nf_conntrack_alloc(struct net *net,
const struct nf_conntrack_zone *zone,
@@ -957,8 +1040,6 @@ __nf_conntrack_alloc(struct net *net,
/* save hash for reusing when confirming */
*(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
ct->status = 0;
- /* Don't set timer yet: wait for confirmation */
- setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
write_pnet(&ct->ct_net, net);
memset(&ct->__nfct_init_offset[0], 0,
offsetof(struct nf_conn, proto) -
@@ -1332,7 +1413,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
unsigned long extra_jiffies,
int do_acct)
{
- NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
NF_CT_ASSERT(skb);
/* Only update if this is not a fixed timeout */
@@ -1340,39 +1420,25 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
goto acct;
/* If not in hash table, timer will not be active yet */
- if (!nf_ct_is_confirmed(ct)) {
- ct->timeout.expires = extra_jiffies;
- } else {
- unsigned long newtime = jiffies + extra_jiffies;
-
- /* Only update the timeout if the new timeout is at least
- HZ jiffies from the old timeout. Need del_timer for race
- avoidance (may already be dying). */
- if (newtime - ct->timeout.expires >= HZ)
- mod_timer_pending(&ct->timeout, newtime);
- }
+ if (nf_ct_is_confirmed(ct))
+ extra_jiffies += nfct_time_stamp;
+ ct->timeout = extra_jiffies;
acct:
if (do_acct)
nf_ct_acct_update(ct, ctinfo, skb->len);
}
EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
-bool __nf_ct_kill_acct(struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- const struct sk_buff *skb,
- int do_acct)
+bool nf_ct_kill_acct(struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ const struct sk_buff *skb)
{
- if (do_acct)
- nf_ct_acct_update(ct, ctinfo, skb->len);
+ nf_ct_acct_update(ct, ctinfo, skb->len);
- if (del_timer(&ct->timeout)) {
- ct->timeout.function((unsigned long)ct);
- return true;
- }
- return false;
+ return nf_ct_delete(ct, 0, 0);
}
-EXPORT_SYMBOL_GPL(__nf_ct_kill_acct);
+EXPORT_SYMBOL_GPL(nf_ct_kill_acct);
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -1505,11 +1571,8 @@ void nf_ct_iterate_cleanup(struct net *net,
while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
/* Time to push up daises... */
- if (del_timer(&ct->timeout))
- nf_ct_delete(ct, portid, report);
-
- /* ... else the timer will get him soon. */
+ nf_ct_delete(ct, portid, report);
nf_ct_put(ct);
cond_resched();
}
@@ -1545,6 +1608,7 @@ static int untrack_refs(void)
void nf_conntrack_cleanup_start(void)
{
+ conntrack_gc_work.exiting = true;
RCU_INIT_POINTER(ip_ct_attach, NULL);
}
@@ -1554,6 +1618,7 @@ void nf_conntrack_cleanup_end(void)
while (untrack_refs() > 0)
schedule();
+ cancel_delayed_work_sync(&conntrack_gc_work.dwork);
nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
nf_conntrack_proto_fini();
@@ -1828,6 +1893,10 @@ int nf_conntrack_init_start(void)
}
/* - and look it like as a confirmed connection */
nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
+
+ conntrack_gc_work_init(&conntrack_gc_work);
+ schedule_delayed_work(&conntrack_gc_work.dwork, GC_INTERVAL);
+
return 0;
err_proto:
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index d28011b42845..da9df2d56e66 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -49,8 +49,13 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) {
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+ struct nf_conntrack_ecache *e;
- if (nf_ct_is_dying(ct))
+ if (!nf_ct_is_confirmed(ct))
+ continue;
+
+ e = nf_ct_ecache_find(ct);
+ if (!e || e->state != NFCT_ECACHE_DESTROY_FAIL)
continue;
if (nf_conntrack_event(IPCT_DESTROY, ct)) {
@@ -58,8 +63,7 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
break;
}
- /* we've got the event delivered, now it's dying */
- set_bit(IPS_DYING_BIT, &ct->status);
+ e->state = NFCT_ECACHE_DESTROY_SENT;
refs[evicted] = ct;
if (++evicted >= ARRAY_SIZE(refs)) {
@@ -130,7 +134,7 @@ int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct,
if (!e)
goto out_unlock;
- if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) {
+ if (nf_ct_is_confirmed(ct)) {
struct nf_ct_event item = {
.ct = ct,
.portid = e->portid ? e->portid : portid,
@@ -150,11 +154,13 @@ int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct,
* triggered by a process, we store the PORTID
* to include it in the retransmission.
*/
- if (eventmask & (1 << IPCT_DESTROY) &&
- e->portid == 0 && portid != 0)
- e->portid = portid;
- else
+ if (eventmask & (1 << IPCT_DESTROY)) {
+ if (e->portid == 0 && portid != 0)
+ e->portid = portid;
+ e->state = NFCT_ECACHE_DESTROY_FAIL;
+ } else {
e->missed |= eventmask;
+ }
} else {
e->missed &= ~missed;
}
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 43147005bea3..b6934b5edf7a 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -237,7 +237,7 @@ static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd,
}
delim = data[0];
if (isdigit(delim) || delim < 33 || delim > 126 || data[2] != delim) {
- pr_debug("try_eprt: invalid delimitter.\n");
+ pr_debug("try_eprt: invalid delimiter.\n");
return 0;
}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index fdfc71f416b7..c052b712c49f 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -149,10 +149,7 @@ nla_put_failure:
static int ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct)
{
- long timeout = ((long)ct->timeout.expires - (long)jiffies) / HZ;
-
- if (timeout < 0)
- timeout = 0;
+ long timeout = nf_ct_expires(ct) / HZ;
if (nla_put_be32(skb, CTA_TIMEOUT, htonl(timeout)))
goto nla_put_failure;
@@ -818,14 +815,23 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
struct hlist_nulls_node *n;
struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
u_int8_t l3proto = nfmsg->nfgen_family;
- int res;
+ struct nf_conn *nf_ct_evict[8];
+ int res, i;
spinlock_t *lockp;
last = (struct nf_conn *)cb->args[1];
+ i = 0;
local_bh_disable();
for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) {
restart:
+ while (i) {
+ i--;
+ if (nf_ct_should_gc(nf_ct_evict[i]))
+ nf_ct_kill(nf_ct_evict[i]);
+ nf_ct_put(nf_ct_evict[i]);
+ }
+
lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS];
nf_conntrack_lock(lockp);
if (cb->args[0] >= nf_conntrack_htable_size) {
@@ -837,6 +843,13 @@ restart:
if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
continue;
ct = nf_ct_tuplehash_to_ctrack(h);
+ if (nf_ct_is_expired(ct)) {
+ if (i < ARRAY_SIZE(nf_ct_evict) &&
+ atomic_inc_not_zero(&ct->ct_general.use))
+ nf_ct_evict[i++] = ct;
+ continue;
+ }
+
if (!net_eq(net, nf_ct_net(ct)))
continue;
@@ -878,6 +891,13 @@ out:
if (last)
nf_ct_put(last);
+ while (i) {
+ i--;
+ if (nf_ct_should_gc(nf_ct_evict[i]))
+ nf_ct_kill(nf_ct_evict[i]);
+ nf_ct_put(nf_ct_evict[i]);
+ }
+
return skb->len;
}
@@ -1147,9 +1167,7 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
}
}
- if (del_timer(&ct->timeout))
- nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh));
-
+ nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh));
nf_ct_put(ct);
return 0;
@@ -1517,11 +1535,10 @@ static int ctnetlink_change_timeout(struct nf_conn *ct,
{
u_int32_t timeout = ntohl(nla_get_be32(cda[CTA_TIMEOUT]));
- if (!del_timer(&ct->timeout))
- return -ETIME;
+ ct->timeout = nfct_time_stamp + timeout * HZ;
- ct->timeout.expires = jiffies + timeout * HZ;
- add_timer(&ct->timeout);
+ if (test_bit(IPS_DYING_BIT, &ct->status))
+ return -ETIME;
return 0;
}
@@ -1719,9 +1736,8 @@ ctnetlink_create_conntrack(struct net *net,
if (!cda[CTA_TIMEOUT])
goto err1;
- ct->timeout.expires = ntohl(nla_get_be32(cda[CTA_TIMEOUT]));
- ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
+ ct->timeout = nfct_time_stamp + ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
rcu_read_lock();
if (cda[CTA_HELP]) {
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 5588c7ae1ac2..f60a4755d71e 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -157,8 +157,7 @@ static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct,
pr_debug("setting timeout of conntrack %p to 0\n", sibling);
sibling->proto.gre.timeout = 0;
sibling->proto.gre.stream_timeout = 0;
- if (del_timer(&sibling->timeout))
- sibling->timeout.function((unsigned long)sibling);
+ nf_ct_kill(sibling);
nf_ct_put(sibling);
return 1;
} else {
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index b65d5864b6d9..8d2c7d8c666a 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -159,54 +159,6 @@ static int kill_l4proto(struct nf_conn *i, void *data)
nf_ct_l3num(i) == l4proto->l3proto;
}
-static struct nf_ip_net *nf_ct_l3proto_net(struct net *net,
- struct nf_conntrack_l3proto *l3proto)
-{
- if (l3proto->l3proto == PF_INET)
- return &net->ct.nf_ct_proto;
- else
- return NULL;
-}
-
-static int nf_ct_l3proto_register_sysctl(struct net *net,
- struct nf_conntrack_l3proto *l3proto)
-{
- int err = 0;
- struct nf_ip_net *in = nf_ct_l3proto_net(net, l3proto);
- /* nf_conntrack_l3proto_ipv6 doesn't support sysctl */
- if (in == NULL)
- return 0;
-
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
- if (in->ctl_table != NULL) {
- err = nf_ct_register_sysctl(net,
- &in->ctl_table_header,
- l3proto->ctl_table_path,
- in->ctl_table);
- if (err < 0) {
- kfree(in->ctl_table);
- in->ctl_table = NULL;
- }
- }
-#endif
- return err;
-}
-
-static void nf_ct_l3proto_unregister_sysctl(struct net *net,
- struct nf_conntrack_l3proto *l3proto)
-{
- struct nf_ip_net *in = nf_ct_l3proto_net(net, l3proto);
-
- if (in == NULL)
- return;
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
- if (in->ctl_table_header != NULL)
- nf_ct_unregister_sysctl(&in->ctl_table_header,
- &in->ctl_table,
- 0);
-#endif
-}
-
int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto)
{
int ret = 0;
@@ -241,7 +193,7 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_register);
int nf_ct_l3proto_pernet_register(struct net *net,
struct nf_conntrack_l3proto *proto)
{
- int ret = 0;
+ int ret;
if (proto->init_net) {
ret = proto->init_net(net);
@@ -249,7 +201,7 @@ int nf_ct_l3proto_pernet_register(struct net *net,
return ret;
}
- return nf_ct_l3proto_register_sysctl(net, proto);
+ return 0;
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register);
@@ -272,8 +224,6 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister);
void nf_ct_l3proto_pernet_unregister(struct net *net,
struct nf_conntrack_l3proto *proto)
{
- nf_ct_l3proto_unregister_sysctl(net, proto);
-
/* Remove all contrack entries for this protocol */
nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0);
}
@@ -312,26 +262,6 @@ int nf_ct_l4proto_register_sysctl(struct net *net,
}
}
}
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
- if (l4proto->l3proto != AF_INET6 && pn->ctl_compat_table != NULL) {
- if (err < 0) {
- nf_ct_kfree_compat_sysctl_table(pn);
- goto out;
- }
- err = nf_ct_register_sysctl(net,
- &pn->ctl_compat_header,
- "net/ipv4/netfilter",
- pn->ctl_compat_table);
- if (err == 0)
- goto out;
-
- nf_ct_kfree_compat_sysctl_table(pn);
- nf_ct_unregister_sysctl(&pn->ctl_table_header,
- &pn->ctl_table,
- pn->users);
- }
-out:
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
#endif /* CONFIG_SYSCTL */
return err;
}
@@ -346,13 +276,6 @@ void nf_ct_l4proto_unregister_sysctl(struct net *net,
nf_ct_unregister_sysctl(&pn->ctl_table_header,
&pn->ctl_table,
pn->users);
-
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
- if (l4proto->l3proto != AF_INET6 && pn->ctl_compat_header != NULL)
- nf_ct_unregister_sysctl(&pn->ctl_compat_header,
- &pn->ctl_compat_table,
- 0);
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
#endif /* CONFIG_SYSCTL */
}
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 399a38fd685a..a45bee52dccc 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -402,7 +402,8 @@ static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
{
struct dccp_hdr _hdr, *dh;
- dh = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+ /* Actually only need first 4 bytes to get ports. */
+ dh = skb_header_pointer(skb, dataoff, 4, &_hdr);
if (dh == NULL)
return false;
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 86dc752e5349..d5868bad33a7 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -151,17 +151,6 @@ static struct ctl_table generic_sysctl_table[] = {
},
{ }
};
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-static struct ctl_table generic_compat_sysctl_table[] = {
- {
- .procname = "ip_conntrack_generic_timeout",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- { }
-};
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
#endif /* CONFIG_SYSCTL */
static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn,
@@ -179,40 +168,14 @@ static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn,
return 0;
}
-static int generic_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
- struct nf_generic_net *gn)
-{
-#ifdef CONFIG_SYSCTL
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
- pn->ctl_compat_table = kmemdup(generic_compat_sysctl_table,
- sizeof(generic_compat_sysctl_table),
- GFP_KERNEL);
- if (!pn->ctl_compat_table)
- return -ENOMEM;
-
- pn->ctl_compat_table[0].data = &gn->timeout;
-#endif
-#endif
- return 0;
-}
-
static int generic_init_net(struct net *net, u_int16_t proto)
{
- int ret;
struct nf_generic_net *gn = generic_pernet(net);
struct nf_proto_net *pn = &gn->pn;
gn->timeout = nf_ct_generic_timeout;
- ret = generic_kmemdup_compat_sysctl_table(pn, gn);
- if (ret < 0)
- return ret;
-
- ret = generic_kmemdup_sysctl_table(pn, gn);
- if (ret < 0)
- nf_ct_kfree_compat_sysctl_table(pn);
-
- return ret;
+ return generic_kmemdup_sysctl_table(pn, gn);
}
static struct nf_proto_net *generic_get_net_proto(struct net *net)
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 1d7ab960a9e6..982ea62606c7 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -161,8 +161,8 @@ static bool sctp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
const struct sctphdr *hp;
struct sctphdr _hdr;
- /* Actually only need first 8 bytes. */
- hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
+ /* Actually only need first 4 bytes to get ports. */
+ hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
if (hp == NULL)
return false;
@@ -705,54 +705,6 @@ static struct ctl_table sctp_sysctl_table[] = {
},
{ }
};
-
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-static struct ctl_table sctp_compat_sysctl_table[] = {
- {
- .procname = "ip_conntrack_sctp_timeout_closed",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_sctp_timeout_cookie_wait",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_sctp_timeout_cookie_echoed",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_sctp_timeout_established",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_sctp_timeout_shutdown_sent",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_sctp_timeout_shutdown_recd",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_sctp_timeout_shutdown_ack_sent",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- { }
-};
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
#endif
static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
@@ -781,32 +733,8 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
return 0;
}
-static int sctp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
- struct sctp_net *sn)
-{
-#ifdef CONFIG_SYSCTL
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
- pn->ctl_compat_table = kmemdup(sctp_compat_sysctl_table,
- sizeof(sctp_compat_sysctl_table),
- GFP_KERNEL);
- if (!pn->ctl_compat_table)
- return -ENOMEM;
-
- pn->ctl_compat_table[0].data = &sn->timeouts[SCTP_CONNTRACK_CLOSED];
- pn->ctl_compat_table[1].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_WAIT];
- pn->ctl_compat_table[2].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_ECHOED];
- pn->ctl_compat_table[3].data = &sn->timeouts[SCTP_CONNTRACK_ESTABLISHED];
- pn->ctl_compat_table[4].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT];
- pn->ctl_compat_table[5].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD];
- pn->ctl_compat_table[6].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT];
-#endif
-#endif
- return 0;
-}
-
static int sctp_init_net(struct net *net, u_int16_t proto)
{
- int ret;
struct sctp_net *sn = sctp_pernet(net);
struct nf_proto_net *pn = &sn->pn;
@@ -817,18 +745,7 @@ static int sctp_init_net(struct net *net, u_int16_t proto)
sn->timeouts[i] = sctp_timeouts[i];
}
- if (proto == AF_INET) {
- ret = sctp_kmemdup_compat_sysctl_table(pn, sn);
- if (ret < 0)
- return ret;
-
- ret = sctp_kmemdup_sysctl_table(pn, sn);
- if (ret < 0)
- nf_ct_kfree_compat_sysctl_table(pn);
- } else
- ret = sctp_kmemdup_sysctl_table(pn, sn);
-
- return ret;
+ return sctp_kmemdup_sysctl_table(pn, sn);
}
static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 70c8381641a7..69f687740c76 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -282,8 +282,8 @@ static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
const struct tcphdr *hp;
struct tcphdr _hdr;
- /* Actually only need first 8 bytes. */
- hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
+ /* Actually only need first 4 bytes to get ports. */
+ hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
if (hp == NULL)
return false;
@@ -1481,90 +1481,6 @@ static struct ctl_table tcp_sysctl_table[] = {
},
{ }
};
-
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-static struct ctl_table tcp_compat_sysctl_table[] = {
- {
- .procname = "ip_conntrack_tcp_timeout_syn_sent",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_tcp_timeout_syn_sent2",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_tcp_timeout_syn_recv",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_tcp_timeout_established",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_tcp_timeout_fin_wait",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_tcp_timeout_close_wait",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_tcp_timeout_last_ack",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_tcp_timeout_time_wait",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_tcp_timeout_close",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_tcp_timeout_max_retrans",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_tcp_loose",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "ip_conntrack_tcp_be_liberal",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "ip_conntrack_tcp_max_retrans",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- { }
-};
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
#endif /* CONFIG_SYSCTL */
static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
@@ -1597,38 +1513,8 @@ static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
return 0;
}
-static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
- struct nf_tcp_net *tn)
-{
-#ifdef CONFIG_SYSCTL
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
- pn->ctl_compat_table = kmemdup(tcp_compat_sysctl_table,
- sizeof(tcp_compat_sysctl_table),
- GFP_KERNEL);
- if (!pn->ctl_compat_table)
- return -ENOMEM;
-
- pn->ctl_compat_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
- pn->ctl_compat_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT2];
- pn->ctl_compat_table[2].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
- pn->ctl_compat_table[3].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
- pn->ctl_compat_table[4].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
- pn->ctl_compat_table[5].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
- pn->ctl_compat_table[6].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
- pn->ctl_compat_table[7].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
- pn->ctl_compat_table[8].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
- pn->ctl_compat_table[9].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
- pn->ctl_compat_table[10].data = &tn->tcp_loose;
- pn->ctl_compat_table[11].data = &tn->tcp_be_liberal;
- pn->ctl_compat_table[12].data = &tn->tcp_max_retrans;
-#endif
-#endif
- return 0;
-}
-
static int tcp_init_net(struct net *net, u_int16_t proto)
{
- int ret;
struct nf_tcp_net *tn = tcp_pernet(net);
struct nf_proto_net *pn = &tn->pn;
@@ -1643,18 +1529,7 @@ static int tcp_init_net(struct net *net, u_int16_t proto)
tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
}
- if (proto == AF_INET) {
- ret = tcp_kmemdup_compat_sysctl_table(pn, tn);
- if (ret < 0)
- return ret;
-
- ret = tcp_kmemdup_sysctl_table(pn, tn);
- if (ret < 0)
- nf_ct_kfree_compat_sysctl_table(pn);
- } else
- ret = tcp_kmemdup_sysctl_table(pn, tn);
-
- return ret;
+ return tcp_kmemdup_sysctl_table(pn, tn);
}
static struct nf_proto_net *tcp_get_net_proto(struct net *net)
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 4fd040575ffe..20f35ed68030 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -44,8 +44,8 @@ static bool udp_pkt_to_tuple(const struct sk_buff *skb,
const struct udphdr *hp;
struct udphdr _hdr;
- /* Actually only need first 8 bytes. */
- hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+ /* Actually only need first 4 bytes to get ports. */
+ hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
if (hp == NULL)
return false;
@@ -218,23 +218,6 @@ static struct ctl_table udp_sysctl_table[] = {
},
{ }
};
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-static struct ctl_table udp_compat_sysctl_table[] = {
- {
- .procname = "ip_conntrack_udp_timeout",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "ip_conntrack_udp_timeout_stream",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- { }
-};
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
#endif /* CONFIG_SYSCTL */
static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn,
@@ -254,27 +237,8 @@ static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn,
return 0;
}
-static int udp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
- struct nf_udp_net *un)
-{
-#ifdef CONFIG_SYSCTL
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
- pn->ctl_compat_table = kmemdup(udp_compat_sysctl_table,
- sizeof(udp_compat_sysctl_table),
- GFP_KERNEL);
- if (!pn->ctl_compat_table)
- return -ENOMEM;
-
- pn->ctl_compat_table[0].data = &un->timeouts[UDP_CT_UNREPLIED];
- pn->ctl_compat_table[1].data = &un->timeouts[UDP_CT_REPLIED];
-#endif
-#endif
- return 0;
-}
-
static int udp_init_net(struct net *net, u_int16_t proto)
{
- int ret;
struct nf_udp_net *un = udp_pernet(net);
struct nf_proto_net *pn = &un->pn;
@@ -285,18 +249,7 @@ static int udp_init_net(struct net *net, u_int16_t proto)
un->timeouts[i] = udp_timeouts[i];
}
- if (proto == AF_INET) {
- ret = udp_kmemdup_compat_sysctl_table(pn, un);
- if (ret < 0)
- return ret;
-
- ret = udp_kmemdup_sysctl_table(pn, un);
- if (ret < 0)
- nf_ct_kfree_compat_sysctl_table(pn);
- } else
- ret = udp_kmemdup_sysctl_table(pn, un);
-
- return ret;
+ return udp_kmemdup_sysctl_table(pn, un);
}
static struct nf_proto_net *udp_get_net_proto(struct net *net)
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 9d692f5adb94..029206e8dec4 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -54,7 +54,8 @@ static bool udplite_pkt_to_tuple(const struct sk_buff *skb,
const struct udphdr *hp;
struct udphdr _hdr;
- hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+ /* Actually only need first 4 bytes to get ports. */
+ hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
if (hp == NULL)
return false;
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 9f267c3ffb39..3d9a316a3c77 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -228,8 +228,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
seq_printf(s, "%-8s %u %-8s %u %ld ",
l3proto->name, nf_ct_l3num(ct),
l4proto->name, nf_ct_protonum(ct),
- timer_pending(&ct->timeout)
- ? (long)(ct->timeout.expires - jiffies)/HZ : 0);
+ nf_ct_expires(ct) / HZ);
if (l4proto->print_conntrack)
l4proto->print_conntrack(s, ct);
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index aa5847a16713..30a17d649a83 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -39,12 +39,12 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger)
return NULL;
}
-void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger)
+int nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger)
{
const struct nf_logger *log;
- if (pf == NFPROTO_UNSPEC)
- return;
+ if (pf == NFPROTO_UNSPEC || pf >= ARRAY_SIZE(net->nf.nf_loggers))
+ return -EOPNOTSUPP;
mutex_lock(&nf_log_mutex);
log = nft_log_dereference(net->nf.nf_loggers[pf]);
@@ -52,6 +52,8 @@ void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger)
rcu_assign_pointer(net->nf.nf_loggers[pf], logger);
mutex_unlock(&nf_log_mutex);
+
+ return 0;
}
EXPORT_SYMBOL(nf_log_set);
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index de31818417b8..81ae41f85d3a 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -565,16 +565,10 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
* Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack()
* will delete entry from already-freed table.
*/
- if (!del_timer(&ct->timeout))
- return 1;
-
ct->status &= ~IPS_NAT_DONE_MASK;
-
rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource,
nf_nat_bysource_params);
- add_timer(&ct->timeout);
-
/* don't delete conntrack. Although that would make things a lot
* simpler, we'd end up flushing all conntracks on nat rmmod.
*/
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 7e1c876c7608..bd9715e5ff26 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1196,6 +1196,83 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
}
}
+struct nft_chain_hook {
+ u32 num;
+ u32 priority;
+ const struct nf_chain_type *type;
+ struct net_device *dev;
+};
+
+static int nft_chain_parse_hook(struct net *net,
+ const struct nlattr * const nla[],
+ struct nft_af_info *afi,
+ struct nft_chain_hook *hook, bool create)
+{
+ struct nlattr *ha[NFTA_HOOK_MAX + 1];
+ const struct nf_chain_type *type;
+ struct net_device *dev;
+ int err;
+
+ err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK],
+ nft_hook_policy);
+ if (err < 0)
+ return err;
+
+ if (ha[NFTA_HOOK_HOOKNUM] == NULL ||
+ ha[NFTA_HOOK_PRIORITY] == NULL)
+ return -EINVAL;
+
+ hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
+ if (hook->num >= afi->nhooks)
+ return -EINVAL;
+
+ hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY]));
+
+ type = chain_type[afi->family][NFT_CHAIN_T_DEFAULT];
+ if (nla[NFTA_CHAIN_TYPE]) {
+ type = nf_tables_chain_type_lookup(afi, nla[NFTA_CHAIN_TYPE],
+ create);
+ if (IS_ERR(type))
+ return PTR_ERR(type);
+ }
+ if (!(type->hook_mask & (1 << hook->num)))
+ return -EOPNOTSUPP;
+ if (!try_module_get(type->owner))
+ return -ENOENT;
+
+ hook->type = type;
+
+ hook->dev = NULL;
+ if (afi->flags & NFT_AF_NEEDS_DEV) {
+ char ifname[IFNAMSIZ];
+
+ if (!ha[NFTA_HOOK_DEV]) {
+ module_put(type->owner);
+ return -EOPNOTSUPP;
+ }
+
+ nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ);
+ dev = dev_get_by_name(net, ifname);
+ if (!dev) {
+ module_put(type->owner);
+ return -ENOENT;
+ }
+ hook->dev = dev;
+ } else if (ha[NFTA_HOOK_DEV]) {
+ module_put(type->owner);
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static void nft_chain_release_hook(struct nft_chain_hook *hook)
+{
+ module_put(hook->type->owner);
+ if (hook->dev != NULL)
+ dev_put(hook->dev);
+}
+
static int nf_tables_newchain(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -1206,10 +1283,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
struct nft_table *table;
struct nft_chain *chain;
struct nft_base_chain *basechain = NULL;
- struct nlattr *ha[NFTA_HOOK_MAX + 1];
u8 genmask = nft_genmask_next(net);
int family = nfmsg->nfgen_family;
- struct net_device *dev = NULL;
u8 policy = NF_ACCEPT;
u64 handle = 0;
unsigned int i;
@@ -1273,6 +1348,37 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
if (nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
+ if (nla[NFTA_CHAIN_HOOK]) {
+ struct nft_base_chain *basechain;
+ struct nft_chain_hook hook;
+ struct nf_hook_ops *ops;
+
+ if (!(chain->flags & NFT_BASE_CHAIN))
+ return -EBUSY;
+
+ err = nft_chain_parse_hook(net, nla, afi, &hook,
+ create);
+ if (err < 0)
+ return err;
+
+ basechain = nft_base_chain(chain);
+ if (basechain->type != hook.type) {
+ nft_chain_release_hook(&hook);
+ return -EBUSY;
+ }
+
+ for (i = 0; i < afi->nops; i++) {
+ ops = &basechain->ops[i];
+ if (ops->hooknum != hook.num ||
+ ops->priority != hook.priority ||
+ ops->dev != hook.dev) {
+ nft_chain_release_hook(&hook);
+ return -EBUSY;
+ }
+ }
+ nft_chain_release_hook(&hook);
+ }
+
if (nla[NFTA_CHAIN_HANDLE] && name) {
struct nft_chain *chain2;
@@ -1320,102 +1426,53 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
return -EOVERFLOW;
if (nla[NFTA_CHAIN_HOOK]) {
- const struct nf_chain_type *type;
+ struct nft_chain_hook hook;
struct nf_hook_ops *ops;
nf_hookfn *hookfn;
- u32 hooknum, priority;
-
- type = chain_type[family][NFT_CHAIN_T_DEFAULT];
- if (nla[NFTA_CHAIN_TYPE]) {
- type = nf_tables_chain_type_lookup(afi,
- nla[NFTA_CHAIN_TYPE],
- create);
- if (IS_ERR(type))
- return PTR_ERR(type);
- }
- err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK],
- nft_hook_policy);
+ err = nft_chain_parse_hook(net, nla, afi, &hook, create);
if (err < 0)
return err;
- if (ha[NFTA_HOOK_HOOKNUM] == NULL ||
- ha[NFTA_HOOK_PRIORITY] == NULL)
- return -EINVAL;
-
- hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
- if (hooknum >= afi->nhooks)
- return -EINVAL;
- priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY]));
-
- if (!(type->hook_mask & (1 << hooknum)))
- return -EOPNOTSUPP;
- if (!try_module_get(type->owner))
- return -ENOENT;
- hookfn = type->hooks[hooknum];
-
- if (afi->flags & NFT_AF_NEEDS_DEV) {
- char ifname[IFNAMSIZ];
-
- if (!ha[NFTA_HOOK_DEV]) {
- module_put(type->owner);
- return -EOPNOTSUPP;
- }
-
- nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ);
- dev = dev_get_by_name(net, ifname);
- if (!dev) {
- module_put(type->owner);
- return -ENOENT;
- }
- } else if (ha[NFTA_HOOK_DEV]) {
- module_put(type->owner);
- return -EOPNOTSUPP;
- }
basechain = kzalloc(sizeof(*basechain), GFP_KERNEL);
if (basechain == NULL) {
- module_put(type->owner);
- if (dev != NULL)
- dev_put(dev);
+ nft_chain_release_hook(&hook);
return -ENOMEM;
}
- if (dev != NULL)
- strncpy(basechain->dev_name, dev->name, IFNAMSIZ);
+ if (hook.dev != NULL)
+ strncpy(basechain->dev_name, hook.dev->name, IFNAMSIZ);
if (nla[NFTA_CHAIN_COUNTERS]) {
stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
if (IS_ERR(stats)) {
- module_put(type->owner);
+ nft_chain_release_hook(&hook);
kfree(basechain);
- if (dev != NULL)
- dev_put(dev);
return PTR_ERR(stats);
}
basechain->stats = stats;
} else {
stats = netdev_alloc_pcpu_stats(struct nft_stats);
if (stats == NULL) {
- module_put(type->owner);
+ nft_chain_release_hook(&hook);
kfree(basechain);
- if (dev != NULL)
- dev_put(dev);
return -ENOMEM;
}
rcu_assign_pointer(basechain->stats, stats);
}
- basechain->type = type;
+ hookfn = hook.type->hooks[hook.num];
+ basechain->type = hook.type;
chain = &basechain->chain;
for (i = 0; i < afi->nops; i++) {
ops = &basechain->ops[i];
ops->pf = family;
- ops->hooknum = hooknum;
- ops->priority = priority;
+ ops->hooknum = hook.num;
+ ops->priority = hook.priority;
ops->priv = chain;
ops->hook = afi->hooks[ops->hooknum];
- ops->dev = dev;
+ ops->dev = hook.dev;
if (hookfn)
ops->hook = hookfn;
if (afi->hook_ops_init)
@@ -3426,12 +3483,12 @@ static int nft_setelem_parse_flags(const struct nft_set *set,
}
static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
- const struct nlattr *attr)
+ const struct nlattr *attr, u32 nlmsg_flags)
{
struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
struct nft_data_desc d1, d2;
struct nft_set_ext_tmpl tmpl;
- struct nft_set_ext *ext;
+ struct nft_set_ext *ext, *ext2;
struct nft_set_elem elem;
struct nft_set_binding *binding;
struct nft_userdata *udata;
@@ -3558,9 +3615,19 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
goto err4;
ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK;
- err = set->ops->insert(ctx->net, set, &elem);
- if (err < 0)
+ err = set->ops->insert(ctx->net, set, &elem, &ext2);
+ if (err) {
+ if (err == -EEXIST) {
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
+ nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) &&
+ memcmp(nft_set_ext_data(ext),
+ nft_set_ext_data(ext2), set->dlen) != 0)
+ err = -EBUSY;
+ else if (!(nlmsg_flags & NLM_F_EXCL))
+ err = 0;
+ }
goto err5;
+ }
nft_trans_elem(trans) = elem;
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
@@ -3616,7 +3683,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
!atomic_add_unless(&set->nelems, 1, set->size + set->ndeact))
return -ENFILE;
- err = nft_add_set_elem(&ctx, set, attr);
+ err = nft_add_set_elem(&ctx, set, attr, nlh->nlmsg_flags);
if (err < 0) {
atomic_dec(&set->nelems);
break;
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 564fa7929ed5..764251d31e46 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -1,395 +1,136 @@
/*
- * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2016 Laura Garcia <nevola@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
-#include <linux/list.h>
-#include <linux/log2.h>
-#include <linux/jhash.h>
#include <linux/netlink.h>
-#include <linux/workqueue.h>
-#include <linux/rhashtable.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
-
-/* We target a hash table size of 4, element hint is 75% of final size */
-#define NFT_HASH_ELEMENT_HINT 3
+#include <net/netfilter/nf_tables_core.h>
+#include <linux/jhash.h>
struct nft_hash {
- struct rhashtable ht;
- struct delayed_work gc_work;
-};
-
-struct nft_hash_elem {
- struct rhash_head node;
- struct nft_set_ext ext;
+ enum nft_registers sreg:8;
+ enum nft_registers dreg:8;
+ u8 len;
+ u32 modulus;
+ u32 seed;
};
-struct nft_hash_cmp_arg {
- const struct nft_set *set;
- const u32 *key;
- u8 genmask;
-};
-
-static const struct rhashtable_params nft_hash_params;
-
-static inline u32 nft_hash_key(const void *data, u32 len, u32 seed)
-{
- const struct nft_hash_cmp_arg *arg = data;
-
- return jhash(arg->key, len, seed);
-}
-
-static inline u32 nft_hash_obj(const void *data, u32 len, u32 seed)
-{
- const struct nft_hash_elem *he = data;
-
- return jhash(nft_set_ext_key(&he->ext), len, seed);
-}
-
-static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg,
- const void *ptr)
-{
- const struct nft_hash_cmp_arg *x = arg->key;
- const struct nft_hash_elem *he = ptr;
-
- if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
- return 1;
- if (nft_set_elem_expired(&he->ext))
- return 1;
- if (!nft_set_elem_active(&he->ext, x->genmask))
- return 1;
- return 0;
-}
-
-static bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext)
-{
- struct nft_hash *priv = nft_set_priv(set);
- const struct nft_hash_elem *he;
- struct nft_hash_cmp_arg arg = {
- .genmask = nft_genmask_cur(net),
- .set = set,
- .key = key,
- };
-
- he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
- if (he != NULL)
- *ext = &he->ext;
-
- return !!he;
-}
-
-static bool nft_hash_update(struct nft_set *set, const u32 *key,
- void *(*new)(struct nft_set *,
- const struct nft_expr *,
- struct nft_regs *regs),
- const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_set_ext **ext)
-{
- struct nft_hash *priv = nft_set_priv(set);
- struct nft_hash_elem *he;
- struct nft_hash_cmp_arg arg = {
- .genmask = NFT_GENMASK_ANY,
- .set = set,
- .key = key,
- };
-
- he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
- if (he != NULL)
- goto out;
-
- he = new(set, expr, regs);
- if (he == NULL)
- goto err1;
- if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
- nft_hash_params))
- goto err2;
-out:
- *ext = &he->ext;
- return true;
-
-err2:
- nft_set_elem_destroy(set, he);
-err1:
- return false;
-}
-
-static int nft_hash_insert(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem)
-{
- struct nft_hash *priv = nft_set_priv(set);
- struct nft_hash_elem *he = elem->priv;
- struct nft_hash_cmp_arg arg = {
- .genmask = nft_genmask_next(net),
- .set = set,
- .key = elem->key.val.data,
- };
-
- return rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
- nft_hash_params);
-}
-
-static void nft_hash_activate(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem)
+static void nft_hash_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
{
- struct nft_hash_elem *he = elem->priv;
+ struct nft_hash *priv = nft_expr_priv(expr);
+ const void *data = &regs->data[priv->sreg];
- nft_set_elem_change_active(net, set, &he->ext);
- nft_set_elem_clear_busy(&he->ext);
+ regs->data[priv->dreg] =
+ reciprocal_scale(jhash(data, priv->len, priv->seed),
+ priv->modulus);
}
-static void *nft_hash_deactivate(const struct net *net,
- const struct nft_set *set,
- const struct nft_set_elem *elem)
-{
- struct nft_hash *priv = nft_set_priv(set);
- struct nft_hash_elem *he;
- struct nft_hash_cmp_arg arg = {
- .genmask = nft_genmask_next(net),
- .set = set,
- .key = elem->key.val.data,
- };
-
- rcu_read_lock();
- he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
- if (he != NULL) {
- if (!nft_set_elem_mark_busy(&he->ext) ||
- !nft_is_active(net, &he->ext))
- nft_set_elem_change_active(net, set, &he->ext);
- else
- he = NULL;
- }
- rcu_read_unlock();
-
- return he;
-}
-
-static void nft_hash_remove(const struct nft_set *set,
- const struct nft_set_elem *elem)
-{
- struct nft_hash *priv = nft_set_priv(set);
- struct nft_hash_elem *he = elem->priv;
-
- rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
-}
-
-static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
- struct nft_set_iter *iter)
-{
- struct nft_hash *priv = nft_set_priv(set);
- struct nft_hash_elem *he;
- struct rhashtable_iter hti;
- struct nft_set_elem elem;
- int err;
-
- err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
- iter->err = err;
- if (err)
- return;
-
- err = rhashtable_walk_start(&hti);
- if (err && err != -EAGAIN) {
- iter->err = err;
- goto out;
- }
-
- while ((he = rhashtable_walk_next(&hti))) {
- if (IS_ERR(he)) {
- err = PTR_ERR(he);
- if (err != -EAGAIN) {
- iter->err = err;
- goto out;
- }
-
- continue;
- }
-
- if (iter->count < iter->skip)
- goto cont;
- if (nft_set_elem_expired(&he->ext))
- goto cont;
- if (!nft_set_elem_active(&he->ext, iter->genmask))
- goto cont;
-
- elem.priv = he;
-
- iter->err = iter->fn(ctx, set, iter, &elem);
- if (iter->err < 0)
- goto out;
-
-cont:
- iter->count++;
- }
-
-out:
- rhashtable_walk_stop(&hti);
- rhashtable_walk_exit(&hti);
-}
-
-static void nft_hash_gc(struct work_struct *work)
-{
- struct nft_set *set;
- struct nft_hash_elem *he;
- struct nft_hash *priv;
- struct nft_set_gc_batch *gcb = NULL;
- struct rhashtable_iter hti;
- int err;
-
- priv = container_of(work, struct nft_hash, gc_work.work);
- set = nft_set_container_of(priv);
-
- err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
- if (err)
- goto schedule;
-
- err = rhashtable_walk_start(&hti);
- if (err && err != -EAGAIN)
- goto out;
-
- while ((he = rhashtable_walk_next(&hti))) {
- if (IS_ERR(he)) {
- if (PTR_ERR(he) != -EAGAIN)
- goto out;
- continue;
- }
-
- if (!nft_set_elem_expired(&he->ext))
- continue;
- if (nft_set_elem_mark_busy(&he->ext))
- continue;
-
- gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
- if (gcb == NULL)
- goto out;
- rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
- atomic_dec(&set->nelems);
- nft_set_gc_batch_add(gcb, he);
- }
-out:
- rhashtable_walk_stop(&hti);
- rhashtable_walk_exit(&hti);
-
- nft_set_gc_batch_complete(gcb);
-schedule:
- queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
- nft_set_gc_interval(set));
-}
-
-static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
-{
- return sizeof(struct nft_hash);
-}
-
-static const struct rhashtable_params nft_hash_params = {
- .head_offset = offsetof(struct nft_hash_elem, node),
- .hashfn = nft_hash_key,
- .obj_hashfn = nft_hash_obj,
- .obj_cmpfn = nft_hash_cmp,
- .automatic_shrinking = true,
+static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
+ [NFTA_HASH_SREG] = { .type = NLA_U32 },
+ [NFTA_HASH_DREG] = { .type = NLA_U32 },
+ [NFTA_HASH_LEN] = { .type = NLA_U32 },
+ [NFTA_HASH_MODULUS] = { .type = NLA_U32 },
+ [NFTA_HASH_SEED] = { .type = NLA_U32 },
};
-static int nft_hash_init(const struct nft_set *set,
- const struct nft_set_desc *desc,
+static int nft_hash_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
const struct nlattr * const tb[])
{
- struct nft_hash *priv = nft_set_priv(set);
- struct rhashtable_params params = nft_hash_params;
- int err;
+ struct nft_hash *priv = nft_expr_priv(expr);
+ u32 len;
- params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT;
- params.key_len = set->klen;
+ if (!tb[NFTA_HASH_SREG] ||
+ !tb[NFTA_HASH_DREG] ||
+ !tb[NFTA_HASH_LEN] ||
+ !tb[NFTA_HASH_SEED] ||
+ !tb[NFTA_HASH_MODULUS])
+ return -EINVAL;
- err = rhashtable_init(&priv->ht, &params);
- if (err < 0)
- return err;
+ priv->sreg = nft_parse_register(tb[NFTA_HASH_SREG]);
+ priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]);
- INIT_DEFERRABLE_WORK(&priv->gc_work, nft_hash_gc);
- if (set->flags & NFT_SET_TIMEOUT)
- queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
- nft_set_gc_interval(set));
- return 0;
-}
+ len = ntohl(nla_get_be32(tb[NFTA_HASH_LEN]));
+ if (len == 0 || len > U8_MAX)
+ return -ERANGE;
-static void nft_hash_elem_destroy(void *ptr, void *arg)
-{
- nft_set_elem_destroy((const struct nft_set *)arg, ptr);
-}
+ priv->len = len;
-static void nft_hash_destroy(const struct nft_set *set)
-{
- struct nft_hash *priv = nft_set_priv(set);
+ priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS]));
+ if (priv->modulus <= 1)
+ return -ERANGE;
- cancel_delayed_work_sync(&priv->gc_work);
- rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy,
- (void *)set);
+ priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED]));
+
+ return nft_validate_register_load(priv->sreg, len) &&
+ nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, sizeof(u32));
}
-static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
- struct nft_set_estimate *est)
+static int nft_hash_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
{
- unsigned int esize;
+ const struct nft_hash *priv = nft_expr_priv(expr);
- esize = sizeof(struct nft_hash_elem);
- if (desc->size) {
- est->size = sizeof(struct nft_hash) +
- roundup_pow_of_two(desc->size * 4 / 3) *
- sizeof(struct nft_hash_elem *) +
- desc->size * esize;
- } else {
- /* Resizing happens when the load drops below 30% or goes
- * above 75%. The average of 52.5% load (approximated by 50%)
- * is used for the size estimation of the hash buckets,
- * meaning we calculate two buckets per element.
- */
- est->size = esize + 2 * sizeof(struct nft_hash_elem *);
- }
+ if (nft_dump_register(skb, NFTA_HASH_SREG, priv->sreg))
+ goto nla_put_failure;
+ if (nft_dump_register(skb, NFTA_HASH_DREG, priv->dreg))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HASH_LEN, htonl(priv->len)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HASH_MODULUS, htonl(priv->modulus)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HASH_SEED, htonl(priv->seed)))
+ goto nla_put_failure;
- est->class = NFT_SET_CLASS_O_1;
+ return 0;
- return true;
+nla_put_failure:
+ return -1;
}
-static struct nft_set_ops nft_hash_ops __read_mostly = {
- .privsize = nft_hash_privsize,
- .elemsize = offsetof(struct nft_hash_elem, ext),
- .estimate = nft_hash_estimate,
+static struct nft_expr_type nft_hash_type;
+static const struct nft_expr_ops nft_hash_ops = {
+ .type = &nft_hash_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_hash)),
+ .eval = nft_hash_eval,
.init = nft_hash_init,
- .destroy = nft_hash_destroy,
- .insert = nft_hash_insert,
- .activate = nft_hash_activate,
- .deactivate = nft_hash_deactivate,
- .remove = nft_hash_remove,
- .lookup = nft_hash_lookup,
- .update = nft_hash_update,
- .walk = nft_hash_walk,
- .features = NFT_SET_MAP | NFT_SET_TIMEOUT,
+ .dump = nft_hash_dump,
+};
+
+static struct nft_expr_type nft_hash_type __read_mostly = {
+ .name = "hash",
+ .ops = &nft_hash_ops,
+ .policy = nft_hash_policy,
+ .maxattr = NFTA_HASH_MAX,
.owner = THIS_MODULE,
};
static int __init nft_hash_module_init(void)
{
- return nft_register_set(&nft_hash_ops);
+ return nft_register_expr(&nft_hash_type);
}
static void __exit nft_hash_module_exit(void)
{
- nft_unregister_set(&nft_hash_ops);
+ nft_unregister_expr(&nft_hash_type);
}
module_init(nft_hash_module_init);
module_exit(nft_hash_module_exit);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_SET();
+MODULE_AUTHOR("Laura Garcia <nevola@gmail.com>");
+MODULE_ALIAS_NFT_EXPR("hash");
diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
new file mode 100644
index 000000000000..294745ecb0fc
--- /dev/null
+++ b/net/netfilter/nft_numgen.c
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2016 Laura Garcia <nevola@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <linux/static_key.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+
+static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state);
+
+struct nft_ng_inc {
+ enum nft_registers dreg:8;
+ u32 until;
+ atomic_t counter;
+};
+
+static void nft_ng_inc_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_ng_inc *priv = nft_expr_priv(expr);
+ u32 nval, oval;
+
+ do {
+ oval = atomic_read(&priv->counter);
+ nval = (oval + 1 < priv->until) ? oval + 1 : 0;
+ } while (atomic_cmpxchg(&priv->counter, oval, nval) != oval);
+
+ memcpy(&regs->data[priv->dreg], &priv->counter, sizeof(u32));
+}
+
+static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = {
+ [NFTA_NG_DREG] = { .type = NLA_U32 },
+ [NFTA_NG_UNTIL] = { .type = NLA_U32 },
+ [NFTA_NG_TYPE] = { .type = NLA_U32 },
+};
+
+static int nft_ng_inc_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_ng_inc *priv = nft_expr_priv(expr);
+
+ priv->until = ntohl(nla_get_be32(tb[NFTA_NG_UNTIL]));
+ if (priv->until == 0)
+ return -ERANGE;
+
+ priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]);
+ atomic_set(&priv->counter, 0);
+
+ return nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, sizeof(u32));
+}
+
+static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg,
+ u32 until, enum nft_ng_types type)
+{
+ if (nft_dump_register(skb, NFTA_NG_DREG, dreg))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_NG_UNTIL, htonl(until)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_NG_TYPE, htonl(type)))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_ng_inc *priv = nft_expr_priv(expr);
+
+ return nft_ng_dump(skb, priv->dreg, priv->until, NFT_NG_INCREMENTAL);
+}
+
+struct nft_ng_random {
+ enum nft_registers dreg:8;
+ u32 until;
+};
+
+static void nft_ng_random_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_ng_random *priv = nft_expr_priv(expr);
+ struct rnd_state *state = this_cpu_ptr(&nft_numgen_prandom_state);
+
+ regs->data[priv->dreg] = reciprocal_scale(prandom_u32_state(state),
+ priv->until);
+}
+
+static int nft_ng_random_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_ng_random *priv = nft_expr_priv(expr);
+
+ priv->until = ntohl(nla_get_be32(tb[NFTA_NG_UNTIL]));
+ if (priv->until == 0)
+ return -ERANGE;
+
+ prandom_init_once(&nft_numgen_prandom_state);
+
+ priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]);
+
+ return nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, sizeof(u32));
+}
+
+static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_ng_random *priv = nft_expr_priv(expr);
+
+ return nft_ng_dump(skb, priv->dreg, priv->until, NFT_NG_RANDOM);
+}
+
+static struct nft_expr_type nft_ng_type;
+static const struct nft_expr_ops nft_ng_inc_ops = {
+ .type = &nft_ng_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_ng_inc)),
+ .eval = nft_ng_inc_eval,
+ .init = nft_ng_inc_init,
+ .dump = nft_ng_inc_dump,
+};
+
+static const struct nft_expr_ops nft_ng_random_ops = {
+ .type = &nft_ng_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_ng_random)),
+ .eval = nft_ng_random_eval,
+ .init = nft_ng_random_init,
+ .dump = nft_ng_random_dump,
+};
+
+static const struct nft_expr_ops *
+nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
+{
+ u32 type;
+
+ if (!tb[NFTA_NG_DREG] ||
+ !tb[NFTA_NG_UNTIL] ||
+ !tb[NFTA_NG_TYPE])
+ return ERR_PTR(-EINVAL);
+
+ type = ntohl(nla_get_be32(tb[NFTA_NG_TYPE]));
+
+ switch (type) {
+ case NFT_NG_INCREMENTAL:
+ return &nft_ng_inc_ops;
+ case NFT_NG_RANDOM:
+ return &nft_ng_random_ops;
+ }
+
+ return ERR_PTR(-EINVAL);
+}
+
+static struct nft_expr_type nft_ng_type __read_mostly = {
+ .name = "numgen",
+ .select_ops = &nft_ng_select_ops,
+ .policy = nft_ng_policy,
+ .maxattr = NFTA_NG_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_ng_module_init(void)
+{
+ return nft_register_expr(&nft_ng_type);
+}
+
+static void __exit nft_ng_module_exit(void)
+{
+ nft_unregister_expr(&nft_ng_type);
+}
+
+module_init(nft_ng_module_init);
+module_exit(nft_ng_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Laura Garcia <nevola@gmail.com>");
+MODULE_ALIAS_NFT_EXPR("numgen");
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
new file mode 100644
index 000000000000..6eafbf987ed9
--- /dev/null
+++ b/net/netfilter/nft_quota.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/atomic.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_quota {
+ u64 quota;
+ bool invert;
+ atomic64_t remain;
+};
+
+static inline long nft_quota(struct nft_quota *priv,
+ const struct nft_pktinfo *pkt)
+{
+ return atomic64_sub_return(pkt->skb->len, &priv->remain);
+}
+
+static void nft_quota_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_quota *priv = nft_expr_priv(expr);
+
+ if (nft_quota(priv, pkt) < 0 && !priv->invert)
+ regs->verdict.code = NFT_BREAK;
+}
+
+static const struct nla_policy nft_quota_policy[NFTA_QUOTA_MAX + 1] = {
+ [NFTA_QUOTA_BYTES] = { .type = NLA_U64 },
+ [NFTA_QUOTA_FLAGS] = { .type = NLA_U32 },
+};
+
+static int nft_quota_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_quota *priv = nft_expr_priv(expr);
+ u32 flags = 0;
+ u64 quota;
+
+ if (!tb[NFTA_QUOTA_BYTES])
+ return -EINVAL;
+
+ quota = be64_to_cpu(nla_get_be64(tb[NFTA_QUOTA_BYTES]));
+ if (quota > S64_MAX)
+ return -EOVERFLOW;
+
+ if (tb[NFTA_QUOTA_FLAGS]) {
+ flags = ntohl(nla_get_be32(tb[NFTA_QUOTA_FLAGS]));
+ if (flags & ~NFT_QUOTA_F_INV)
+ return -EINVAL;
+ }
+
+ priv->quota = quota;
+ priv->invert = (flags & NFT_QUOTA_F_INV) ? true : false;
+ atomic64_set(&priv->remain, quota);
+
+ return 0;
+}
+
+static int nft_quota_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_quota *priv = nft_expr_priv(expr);
+ u32 flags = priv->invert ? NFT_QUOTA_F_INV : 0;
+
+ if (nla_put_be64(skb, NFTA_QUOTA_BYTES, cpu_to_be64(priv->quota),
+ NFTA_QUOTA_PAD) ||
+ nla_put_be32(skb, NFTA_QUOTA_FLAGS, htonl(flags)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_quota_type;
+static const struct nft_expr_ops nft_quota_ops = {
+ .type = &nft_quota_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_quota)),
+ .eval = nft_quota_eval,
+ .init = nft_quota_init,
+ .dump = nft_quota_dump,
+};
+
+static struct nft_expr_type nft_quota_type __read_mostly = {
+ .name = "quota",
+ .ops = &nft_quota_ops,
+ .policy = nft_quota_policy,
+ .maxattr = NFTA_QUOTA_MAX,
+ .flags = NFT_EXPR_STATEFUL,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_quota_module_init(void)
+{
+ return nft_register_expr(&nft_quota_type);
+}
+
+static void __exit nft_quota_module_exit(void)
+{
+ nft_unregister_expr(&nft_quota_type);
+}
+
+module_init(nft_quota_module_init);
+module_exit(nft_quota_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_EXPR("quota");
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
new file mode 100644
index 000000000000..3794cb2fc788
--- /dev/null
+++ b/net/netfilter/nft_set_hash.c
@@ -0,0 +1,404 @@
+/*
+ * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/log2.h>
+#include <linux/jhash.h>
+#include <linux/netlink.h>
+#include <linux/workqueue.h>
+#include <linux/rhashtable.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+/* We target a hash table size of 4, element hint is 75% of final size */
+#define NFT_HASH_ELEMENT_HINT 3
+
+struct nft_hash {
+ struct rhashtable ht;
+ struct delayed_work gc_work;
+};
+
+struct nft_hash_elem {
+ struct rhash_head node;
+ struct nft_set_ext ext;
+};
+
+struct nft_hash_cmp_arg {
+ const struct nft_set *set;
+ const u32 *key;
+ u8 genmask;
+};
+
+static const struct rhashtable_params nft_hash_params;
+
+static inline u32 nft_hash_key(const void *data, u32 len, u32 seed)
+{
+ const struct nft_hash_cmp_arg *arg = data;
+
+ return jhash(arg->key, len, seed);
+}
+
+static inline u32 nft_hash_obj(const void *data, u32 len, u32 seed)
+{
+ const struct nft_hash_elem *he = data;
+
+ return jhash(nft_set_ext_key(&he->ext), len, seed);
+}
+
+static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
+{
+ const struct nft_hash_cmp_arg *x = arg->key;
+ const struct nft_hash_elem *he = ptr;
+
+ if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
+ return 1;
+ if (nft_set_elem_expired(&he->ext))
+ return 1;
+ if (!nft_set_elem_active(&he->ext, x->genmask))
+ return 1;
+ return 0;
+}
+
+static bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ const struct nft_hash_elem *he;
+ struct nft_hash_cmp_arg arg = {
+ .genmask = nft_genmask_cur(net),
+ .set = set,
+ .key = key,
+ };
+
+ he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+ if (he != NULL)
+ *ext = &he->ext;
+
+ return !!he;
+}
+
+static bool nft_hash_update(struct nft_set *set, const u32 *key,
+ void *(*new)(struct nft_set *,
+ const struct nft_expr *,
+ struct nft_regs *regs),
+ const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_set_ext **ext)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he;
+ struct nft_hash_cmp_arg arg = {
+ .genmask = NFT_GENMASK_ANY,
+ .set = set,
+ .key = key,
+ };
+
+ he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+ if (he != NULL)
+ goto out;
+
+ he = new(set, expr, regs);
+ if (he == NULL)
+ goto err1;
+ if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
+ nft_hash_params))
+ goto err2;
+out:
+ *ext = &he->ext;
+ return true;
+
+err2:
+ nft_set_elem_destroy(set, he);
+err1:
+ return false;
+}
+
+static int nft_hash_insert(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem,
+ struct nft_set_ext **ext)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he = elem->priv;
+ struct nft_hash_cmp_arg arg = {
+ .genmask = nft_genmask_next(net),
+ .set = set,
+ .key = elem->key.val.data,
+ };
+ struct nft_hash_elem *prev;
+
+ prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node,
+ nft_hash_params);
+ if (IS_ERR(prev))
+ return PTR_ERR(prev);
+ if (prev) {
+ *ext = &prev->ext;
+ return -EEXIST;
+ }
+ return 0;
+}
+
+static void nft_hash_activate(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_hash_elem *he = elem->priv;
+
+ nft_set_elem_change_active(net, set, &he->ext);
+ nft_set_elem_clear_busy(&he->ext);
+}
+
+static void *nft_hash_deactivate(const struct net *net,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he;
+ struct nft_hash_cmp_arg arg = {
+ .genmask = nft_genmask_next(net),
+ .set = set,
+ .key = elem->key.val.data,
+ };
+
+ rcu_read_lock();
+ he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+ if (he != NULL) {
+ if (!nft_set_elem_mark_busy(&he->ext) ||
+ !nft_is_active(net, &he->ext))
+ nft_set_elem_change_active(net, set, &he->ext);
+ else
+ he = NULL;
+ }
+ rcu_read_unlock();
+
+ return he;
+}
+
+static void nft_hash_remove(const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he = elem->priv;
+
+ rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
+}
+
+static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
+ struct nft_set_iter *iter)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he;
+ struct rhashtable_iter hti;
+ struct nft_set_elem elem;
+ int err;
+
+ err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
+ iter->err = err;
+ if (err)
+ return;
+
+ err = rhashtable_walk_start(&hti);
+ if (err && err != -EAGAIN) {
+ iter->err = err;
+ goto out;
+ }
+
+ while ((he = rhashtable_walk_next(&hti))) {
+ if (IS_ERR(he)) {
+ err = PTR_ERR(he);
+ if (err != -EAGAIN) {
+ iter->err = err;
+ goto out;
+ }
+
+ continue;
+ }
+
+ if (iter->count < iter->skip)
+ goto cont;
+ if (nft_set_elem_expired(&he->ext))
+ goto cont;
+ if (!nft_set_elem_active(&he->ext, iter->genmask))
+ goto cont;
+
+ elem.priv = he;
+
+ iter->err = iter->fn(ctx, set, iter, &elem);
+ if (iter->err < 0)
+ goto out;
+
+cont:
+ iter->count++;
+ }
+
+out:
+ rhashtable_walk_stop(&hti);
+ rhashtable_walk_exit(&hti);
+}
+
+static void nft_hash_gc(struct work_struct *work)
+{
+ struct nft_set *set;
+ struct nft_hash_elem *he;
+ struct nft_hash *priv;
+ struct nft_set_gc_batch *gcb = NULL;
+ struct rhashtable_iter hti;
+ int err;
+
+ priv = container_of(work, struct nft_hash, gc_work.work);
+ set = nft_set_container_of(priv);
+
+ err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
+ if (err)
+ goto schedule;
+
+ err = rhashtable_walk_start(&hti);
+ if (err && err != -EAGAIN)
+ goto out;
+
+ while ((he = rhashtable_walk_next(&hti))) {
+ if (IS_ERR(he)) {
+ if (PTR_ERR(he) != -EAGAIN)
+ goto out;
+ continue;
+ }
+
+ if (!nft_set_elem_expired(&he->ext))
+ continue;
+ if (nft_set_elem_mark_busy(&he->ext))
+ continue;
+
+ gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
+ if (gcb == NULL)
+ goto out;
+ rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
+ atomic_dec(&set->nelems);
+ nft_set_gc_batch_add(gcb, he);
+ }
+out:
+ rhashtable_walk_stop(&hti);
+ rhashtable_walk_exit(&hti);
+
+ nft_set_gc_batch_complete(gcb);
+schedule:
+ queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
+ nft_set_gc_interval(set));
+}
+
+static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
+{
+ return sizeof(struct nft_hash);
+}
+
+static const struct rhashtable_params nft_hash_params = {
+ .head_offset = offsetof(struct nft_hash_elem, node),
+ .hashfn = nft_hash_key,
+ .obj_hashfn = nft_hash_obj,
+ .obj_cmpfn = nft_hash_cmp,
+ .automatic_shrinking = true,
+};
+
+static int nft_hash_init(const struct nft_set *set,
+ const struct nft_set_desc *desc,
+ const struct nlattr * const tb[])
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ struct rhashtable_params params = nft_hash_params;
+ int err;
+
+ params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT;
+ params.key_len = set->klen;
+
+ err = rhashtable_init(&priv->ht, &params);
+ if (err < 0)
+ return err;
+
+ INIT_DEFERRABLE_WORK(&priv->gc_work, nft_hash_gc);
+ if (set->flags & NFT_SET_TIMEOUT)
+ queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
+ nft_set_gc_interval(set));
+ return 0;
+}
+
+static void nft_hash_elem_destroy(void *ptr, void *arg)
+{
+ nft_set_elem_destroy((const struct nft_set *)arg, ptr);
+}
+
+static void nft_hash_destroy(const struct nft_set *set)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+
+ cancel_delayed_work_sync(&priv->gc_work);
+ rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy,
+ (void *)set);
+}
+
+static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
+ struct nft_set_estimate *est)
+{
+ unsigned int esize;
+
+ esize = sizeof(struct nft_hash_elem);
+ if (desc->size) {
+ est->size = sizeof(struct nft_hash) +
+ roundup_pow_of_two(desc->size * 4 / 3) *
+ sizeof(struct nft_hash_elem *) +
+ desc->size * esize;
+ } else {
+ /* Resizing happens when the load drops below 30% or goes
+ * above 75%. The average of 52.5% load (approximated by 50%)
+ * is used for the size estimation of the hash buckets,
+ * meaning we calculate two buckets per element.
+ */
+ est->size = esize + 2 * sizeof(struct nft_hash_elem *);
+ }
+
+ est->class = NFT_SET_CLASS_O_1;
+
+ return true;
+}
+
+static struct nft_set_ops nft_hash_ops __read_mostly = {
+ .privsize = nft_hash_privsize,
+ .elemsize = offsetof(struct nft_hash_elem, ext),
+ .estimate = nft_hash_estimate,
+ .init = nft_hash_init,
+ .destroy = nft_hash_destroy,
+ .insert = nft_hash_insert,
+ .activate = nft_hash_activate,
+ .deactivate = nft_hash_deactivate,
+ .remove = nft_hash_remove,
+ .lookup = nft_hash_lookup,
+ .update = nft_hash_update,
+ .walk = nft_hash_walk,
+ .features = NFT_SET_MAP | NFT_SET_TIMEOUT,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_hash_module_init(void)
+{
+ return nft_register_set(&nft_hash_ops);
+}
+
+static void __exit nft_hash_module_exit(void)
+{
+ nft_unregister_set(&nft_hash_ops);
+}
+
+module_init(nft_hash_module_init);
+module_exit(nft_hash_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_SET();
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_set_rbtree.c
index ffe9ae062d23..38b5bda242f8 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -96,7 +96,8 @@ out:
}
static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
- struct nft_rbtree_elem *new)
+ struct nft_rbtree_elem *new,
+ struct nft_set_ext **ext)
{
struct nft_rbtree *priv = nft_set_priv(set);
u8 genmask = nft_genmask_next(net);
@@ -124,8 +125,10 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
else if (!nft_rbtree_interval_end(rbe) &&
nft_rbtree_interval_end(new))
p = &parent->rb_right;
- else
+ else {
+ *ext = &rbe->ext;
return -EEXIST;
+ }
}
}
}
@@ -135,13 +138,14 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
}
static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem)
+ const struct nft_set_elem *elem,
+ struct nft_set_ext **ext)
{
struct nft_rbtree_elem *rbe = elem->priv;
int err;
spin_lock_bh(&nft_rbtree_lock);
- err = __nft_rbtree_insert(net, set, rbe);
+ err = __nft_rbtree_insert(net, set, rbe, ext);
spin_unlock_bh(&nft_rbtree_lock);
return err;
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 188404b9b002..a3b8f697cfc5 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -233,10 +233,8 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
return false;
if (info->match_flags & XT_CONNTRACK_EXPIRES) {
- unsigned long expires = 0;
+ unsigned long expires = nf_ct_expires(ct) / HZ;
- if (timer_pending(&ct->timeout))
- expires = (ct->timeout.expires - jiffies) / HZ;
if ((expires >= info->expires_min &&
expires <= info->expires_max) ^
!(info->invert_flags & XT_CONNTRACK_EXPIRES))
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index e5f18988aee0..bb33598e4530 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -107,8 +107,8 @@ static int physdev_mt_check(const struct xt_mtchk_param *par)
info->invert & XT_PHYSDEV_OP_BRIDGED) &&
par->hook_mask & ((1 << NF_INET_LOCAL_OUT) |
(1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) {
- pr_info("using --physdev-out and --physdev-is-out are only"
- "supported in the FORWARD and POSTROUTING chains with"
+ pr_info("using --physdev-out and --physdev-is-out are only "
+ "supported in the FORWARD and POSTROUTING chains with "
"bridged traffic.\n");
if (par->hook_mask & (1 << NF_INET_LOCAL_OUT))
return -EINVAL;
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index e054a748ff25..31045ef44a82 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1367,7 +1367,7 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
if (ct_info->helper)
module_put(ct_info->helper->me);
if (ct_info->ct)
- nf_ct_put(ct_info->ct);
+ nf_ct_tmpl_free(ct_info->ct);
}
void ovs_ct_init(struct net *net)
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index ae469b37d852..753f774cb46f 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -269,18 +269,19 @@ void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, u32 acked)
*
* RCU is locked, no other locks set
*/
-void tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
- struct tipc_msg *hdr)
+int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
+ struct tipc_msg *hdr)
{
struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq;
struct sk_buff_head xmitq;
+ int rc = 0;
__skb_queue_head_init(&xmitq);
tipc_bcast_lock(net);
if (msg_type(hdr) == STATE_MSG) {
tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr), &xmitq);
- tipc_link_bc_sync_rcv(l, hdr, &xmitq);
+ rc = tipc_link_bc_sync_rcv(l, hdr, &xmitq);
} else {
tipc_link_bc_init_rcv(l, hdr);
}
@@ -291,6 +292,7 @@ void tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
/* Any socket wakeup messages ? */
if (!skb_queue_empty(inputq))
tipc_sk_rcv(net, inputq);
+ return rc;
}
/* tipc_bcast_add_peer - add a peer node to broadcast link and bearer
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index d5e79b3767fd..5ffe34472ccd 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -56,8 +56,8 @@ int tipc_bcast_get_mtu(struct net *net);
int tipc_bcast_xmit(struct net *net, struct sk_buff_head *list);
int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb);
void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, u32 acked);
-void tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
- struct tipc_msg *hdr);
+int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
+ struct tipc_msg *hdr);
int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg);
int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]);
int tipc_bclink_reset_stats(struct net *net);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 2c6e1b9e024b..b36e16cdc945 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -181,7 +181,10 @@ struct tipc_link {
u16 acked;
struct tipc_link *bc_rcvlink;
struct tipc_link *bc_sndlink;
- int nack_state;
+ unsigned long prev_retr;
+ u16 prev_from;
+ u16 prev_to;
+ u8 nack_state;
bool bc_peer_is_up;
/* Statistics */
@@ -202,6 +205,8 @@ enum {
BC_NACK_SND_SUPPRESS,
};
+#define TIPC_BC_RETR_LIMIT 10 /* [ms] */
+
/*
* Interval between NACKs when packets arrive out of order
*/
@@ -237,8 +242,8 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
u16 rcvgap, int tolerance, int priority,
struct sk_buff_head *xmitq);
static void link_print(struct tipc_link *l, const char *str);
-static void tipc_link_build_nack_msg(struct tipc_link *l,
- struct sk_buff_head *xmitq);
+static int tipc_link_build_nack_msg(struct tipc_link *l,
+ struct sk_buff_head *xmitq);
static void tipc_link_build_bc_init_msg(struct tipc_link *l,
struct sk_buff_head *xmitq);
static bool tipc_link_release_pkts(struct tipc_link *l, u16 to);
@@ -367,6 +372,18 @@ int tipc_link_bc_peers(struct tipc_link *l)
return l->ackers;
}
+u16 link_bc_rcv_gap(struct tipc_link *l)
+{
+ struct sk_buff *skb = skb_peek(&l->deferdq);
+ u16 gap = 0;
+
+ if (more(l->snd_nxt, l->rcv_nxt))
+ gap = l->snd_nxt - l->rcv_nxt;
+ if (skb)
+ gap = buf_seqno(skb) - l->rcv_nxt;
+ return gap;
+}
+
void tipc_link_set_mtu(struct tipc_link *l, int mtu)
{
l->mtu = mtu;
@@ -1135,7 +1152,10 @@ int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
if (((l->rcv_nxt ^ tipc_own_addr(l->net)) & 0xf) != 0xf)
return 0;
l->rcv_unacked = 0;
- return TIPC_LINK_SND_BC_ACK;
+
+ /* Use snd_nxt to store peer's snd_nxt in broadcast rcv link */
+ l->snd_nxt = l->rcv_nxt;
+ return TIPC_LINK_SND_STATE;
}
/* Unicast ACK */
@@ -1164,17 +1184,26 @@ void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
}
/* tipc_link_build_nack_msg: prepare link nack message for transmission
+ * Note that sending of broadcast NACK is coordinated among nodes, to
+ * reduce the risk of NACK storms towards the sender
*/
-static void tipc_link_build_nack_msg(struct tipc_link *l,
- struct sk_buff_head *xmitq)
+static int tipc_link_build_nack_msg(struct tipc_link *l,
+ struct sk_buff_head *xmitq)
{
u32 def_cnt = ++l->stats.deferred_recv;
+ int match1, match2;
- if (link_is_bc_rcvlink(l))
- return;
+ if (link_is_bc_rcvlink(l)) {
+ match1 = def_cnt & 0xf;
+ match2 = tipc_own_addr(l->net) & 0xf;
+ if (match1 == match2)
+ return TIPC_LINK_SND_STATE;
+ return 0;
+ }
if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV))
tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq);
+ return 0;
}
/* tipc_link_rcv - process TIPC packets/messages arriving from off-node
@@ -1225,7 +1254,7 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
/* Defer delivery if sequence gap */
if (unlikely(seqno != rcv_nxt)) {
__tipc_skb_queue_sorted(defq, seqno, skb);
- tipc_link_build_nack_msg(l, xmitq);
+ rc |= tipc_link_build_nack_msg(l, xmitq);
break;
}
@@ -1236,7 +1265,7 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
rc |= tipc_link_input(l, skb, l->inputq);
if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN))
rc |= tipc_link_build_state_msg(l, xmitq);
- if (unlikely(rc & ~TIPC_LINK_SND_BC_ACK))
+ if (unlikely(rc & ~TIPC_LINK_SND_STATE))
break;
} while ((skb = __skb_dequeue(defq)));
@@ -1250,10 +1279,11 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
u16 rcvgap, int tolerance, int priority,
struct sk_buff_head *xmitq)
{
+ struct tipc_link *bcl = l->bc_rcvlink;
struct sk_buff *skb;
struct tipc_msg *hdr;
struct sk_buff_head *dfq = &l->deferdq;
- bool node_up = link_is_up(l->bc_rcvlink);
+ bool node_up = link_is_up(bcl);
struct tipc_mon_state *mstate = &l->mon_state;
int dlen = 0;
void *data;
@@ -1281,7 +1311,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
msg_set_net_plane(hdr, l->net_plane);
msg_set_next_sent(hdr, l->snd_nxt);
msg_set_ack(hdr, l->rcv_nxt - 1);
- msg_set_bcast_ack(hdr, l->bc_rcvlink->rcv_nxt - 1);
+ msg_set_bcast_ack(hdr, bcl->rcv_nxt - 1);
msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1);
msg_set_link_tolerance(hdr, tolerance);
msg_set_linkprio(hdr, priority);
@@ -1291,6 +1321,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
if (mtyp == STATE_MSG) {
msg_set_seq_gap(hdr, rcvgap);
+ msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl));
msg_set_probe(hdr, probe);
tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id);
msg_set_size(hdr, INT_H_SIZE + dlen);
@@ -1573,51 +1604,107 @@ void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr)
l->rcv_nxt = peers_snd_nxt;
}
+/* link_bc_retr eval()- check if the indicated range can be retransmitted now
+ * - Adjust permitted range if there is overlap with previous retransmission
+ */
+static bool link_bc_retr_eval(struct tipc_link *l, u16 *from, u16 *to)
+{
+ unsigned long elapsed = jiffies_to_msecs(jiffies - l->prev_retr);
+
+ if (less(*to, *from))
+ return false;
+
+ /* New retransmission request */
+ if ((elapsed > TIPC_BC_RETR_LIMIT) ||
+ less(*to, l->prev_from) || more(*from, l->prev_to)) {
+ l->prev_from = *from;
+ l->prev_to = *to;
+ l->prev_retr = jiffies;
+ return true;
+ }
+
+ /* Inside range of previous retransmit */
+ if (!less(*from, l->prev_from) && !more(*to, l->prev_to))
+ return false;
+
+ /* Fully or partially outside previous range => exclude overlap */
+ if (less(*from, l->prev_from)) {
+ *to = l->prev_from - 1;
+ l->prev_from = *from;
+ }
+ if (more(*to, l->prev_to)) {
+ *from = l->prev_to + 1;
+ l->prev_to = *to;
+ }
+ l->prev_retr = jiffies;
+ return true;
+}
+
/* tipc_link_bc_sync_rcv - update rcv link according to peer's send state
*/
-void tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
- struct sk_buff_head *xmitq)
+int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
+ struct sk_buff_head *xmitq)
{
+ struct tipc_link *snd_l = l->bc_sndlink;
u16 peers_snd_nxt = msg_bc_snd_nxt(hdr);
+ u16 from = msg_bcast_ack(hdr) + 1;
+ u16 to = from + msg_bc_gap(hdr) - 1;
+ int rc = 0;
if (!link_is_up(l))
- return;
+ return rc;
if (!msg_peer_node_is_up(hdr))
- return;
+ return rc;
/* Open when peer ackowledges our bcast init msg (pkt #1) */
if (msg_ack(hdr))
l->bc_peer_is_up = true;
if (!l->bc_peer_is_up)
- return;
+ return rc;
+
+ l->stats.recv_nacks++;
/* Ignore if peers_snd_nxt goes beyond receive window */
if (more(peers_snd_nxt, l->rcv_nxt + l->window))
- return;
+ return rc;
+
+ if (link_bc_retr_eval(snd_l, &from, &to))
+ rc = tipc_link_retrans(snd_l, from, to, xmitq);
+
+ l->snd_nxt = peers_snd_nxt;
+ if (link_bc_rcv_gap(l))
+ rc |= TIPC_LINK_SND_STATE;
+
+ /* Return now if sender supports nack via STATE messages */
+ if (l->peer_caps & TIPC_BCAST_STATE_NACK)
+ return rc;
+
+ /* Otherwise, be backwards compatible */
if (!more(peers_snd_nxt, l->rcv_nxt)) {
l->nack_state = BC_NACK_SND_CONDITIONAL;
- return;
+ return 0;
}
/* Don't NACK if one was recently sent or peeked */
if (l->nack_state == BC_NACK_SND_SUPPRESS) {
l->nack_state = BC_NACK_SND_UNCONDITIONAL;
- return;
+ return 0;
}
/* Conditionally delay NACK sending until next synch rcv */
if (l->nack_state == BC_NACK_SND_CONDITIONAL) {
l->nack_state = BC_NACK_SND_UNCONDITIONAL;
if ((peers_snd_nxt - l->rcv_nxt) < TIPC_MIN_LINK_WIN)
- return;
+ return 0;
}
/* Send NACK now but suppress next one */
tipc_link_build_bc_proto_msg(l, true, peers_snd_nxt, xmitq);
l->nack_state = BC_NACK_SND_SUPPRESS;
+ return 0;
}
void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked,
@@ -1654,6 +1741,8 @@ void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked,
}
/* tipc_link_bc_nack_rcv(): receive broadcast nack message
+ * This function is here for backwards compatibility, since
+ * no BCAST_PROTOCOL/STATE messages occur from TIPC v2.5.
*/
int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
struct sk_buff_head *xmitq)
diff --git a/net/tipc/link.h b/net/tipc/link.h
index d7e9d42fcb2d..d1bd1787a768 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -63,7 +63,7 @@ enum {
enum {
TIPC_LINK_UP_EVT = 1,
TIPC_LINK_DOWN_EVT = (1 << 1),
- TIPC_LINK_SND_BC_ACK = (1 << 2)
+ TIPC_LINK_SND_STATE = (1 << 2)
};
/* Starting value for maximum packet size negotiation on unicast links
@@ -138,8 +138,8 @@ void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked,
void tipc_link_build_bc_sync_msg(struct tipc_link *l,
struct sk_buff_head *xmitq);
void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr);
-void tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
- struct sk_buff_head *xmitq);
+int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
+ struct sk_buff_head *xmitq);
int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
struct sk_buff_head *xmitq);
#endif
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 7cf52fb39bee..c3832cdf2278 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -719,6 +719,16 @@ static inline char *msg_media_addr(struct tipc_msg *m)
return (char *)&m->hdr[TIPC_MEDIA_INFO_OFFSET];
}
+static inline u32 msg_bc_gap(struct tipc_msg *m)
+{
+ return msg_bits(m, 8, 0, 0x3ff);
+}
+
+static inline void msg_set_bc_gap(struct tipc_msg *m, u32 n)
+{
+ msg_set_bits(m, 8, 0, 0x3ff, n);
+}
+
/*
* Word 9
*/
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 7e8b75fd1a02..7ef14e2d2356 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1262,6 +1262,34 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb)
kfree_skb(skb);
}
+static void tipc_node_bc_sync_rcv(struct tipc_node *n, struct tipc_msg *hdr,
+ int bearer_id, struct sk_buff_head *xmitq)
+{
+ struct tipc_link *ucl;
+ int rc;
+
+ rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr);
+
+ if (rc & TIPC_LINK_DOWN_EVT) {
+ tipc_bearer_reset_all(n->net);
+ return;
+ }
+
+ if (!(rc & TIPC_LINK_SND_STATE))
+ return;
+
+ /* If probe message, a STATE response will be sent anyway */
+ if (msg_probe(hdr))
+ return;
+
+ /* Produce a STATE message carrying broadcast NACK */
+ tipc_node_read_lock(n);
+ ucl = n->links[bearer_id].link;
+ if (ucl)
+ tipc_link_build_state_msg(ucl, xmitq);
+ tipc_node_read_unlock(n);
+}
+
/**
* tipc_node_bc_rcv - process TIPC broadcast packet arriving from off-node
* @net: the applicable net namespace
@@ -1298,7 +1326,7 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id
rc = tipc_bcast_rcv(net, be->link, skb);
/* Broadcast ACKs are sent on a unicast link */
- if (rc & TIPC_LINK_SND_BC_ACK) {
+ if (rc & TIPC_LINK_SND_STATE) {
tipc_node_read_lock(n);
tipc_link_build_state_msg(le->link, &xmitq);
tipc_node_read_unlock(n);
@@ -1505,7 +1533,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
/* Ensure broadcast reception is in synch with peer's send state */
if (unlikely(usr == LINK_PROTOCOL))
- tipc_bcast_sync_rcv(net, n->bc_entry.link, hdr);
+ tipc_node_bc_sync_rcv(n, hdr, bearer_id, &xmitq);
else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack))
tipc_bcast_ack_rcv(net, n->bc_entry.link, bc_ack);
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 4578b34c7dca..39ef54c1f2ad 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -1,7 +1,7 @@
/*
* net/tipc/node.h: Include file for TIPC node management routines
*
- * Copyright (c) 2000-2006, 2014-2015, Ericsson AB
+ * Copyright (c) 2000-2006, 2014-2016, Ericsson AB
* Copyright (c) 2005, 2010-2014, Wind River Systems
* All rights reserved.
*
@@ -45,11 +45,14 @@
/* Optional capabilities supported by this code version
*/
enum {
- TIPC_BCAST_SYNCH = (1 << 1),
- TIPC_BLOCK_FLOWCTL = (2 << 1)
+ TIPC_BCAST_SYNCH = (1 << 1),
+ TIPC_BCAST_STATE_NACK = (1 << 2),
+ TIPC_BLOCK_FLOWCTL = (1 << 3)
};
-#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | TIPC_BLOCK_FLOWCTL)
+#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | \
+ TIPC_BCAST_STATE_NACK | \
+ TIPC_BLOCK_FLOWCTL)
#define INVALID_BEARER_ID -1
void tipc_node_stop(struct net *net);