From e4a6a3424b75f23f6bb1cc479974fc305a4b9f78 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 13 Jul 2017 14:27:58 +0800 Subject: bpf: fix return in bpf_skb_adjust_net The bpf_skb_adjust_net() ignores the return value of bpf_skb_net_shrink/grow, and always return 0, fix it by return 'ret'. Signed-off-by: Kefeng Wang Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/core/filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index c7f737058d89..f44fc22fd45a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2248,7 +2248,7 @@ static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff) bpf_skb_net_grow(skb, len_diff_abs); bpf_compute_data_end(skb); - return 0; + return ret; } BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff, -- cgit v1.2.3 From 0db01097cabd97897d123b4c5d805d1a7b061d82 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Thu, 13 Jul 2017 10:57:40 +0200 Subject: xgene: Don't fail probe, if there is no clk resource for SGMII interfaces This change fixes following problem [ 1.827940] xgene-enet: probe of 1f210030.ethernet failed with error -2 which leads to a missing ethernet interface (reproducable at least on Gigabyte MP30-AR0 and APM Mustang systems). The check for a valid clk resource fails, because DT doesn't provide a clock for sgenet1. But the driver doesn't use this clk, if the ethernet port is connected via SGMII. Therefore this patch avoids probing for clk on SGMII interfaces. Fixes: 9aea7779b764 ("drivers: net: xgene: Fix crash on DT systems") Signed-off-by: Thomas Bogendoerfer Signed-off-by: David S. Miller --- drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index d3906f6b01bd..86058a9f3417 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -1785,16 +1785,18 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata) xgene_enet_gpiod_get(pdata); - pdata->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(pdata->clk)) { - /* Abort if the clock is defined but couldn't be retrived. - * Always abort if the clock is missing on DT system as - * the driver can't cope with this case. - */ - if (PTR_ERR(pdata->clk) != -ENOENT || dev->of_node) - return PTR_ERR(pdata->clk); - /* Firmware may have set up the clock already. */ - dev_info(dev, "clocks have been setup already\n"); + if (pdata->phy_mode != PHY_INTERFACE_MODE_SGMII) { + pdata->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(pdata->clk)) { + /* Abort if the clock is defined but couldn't be + * retrived. Always abort if the clock is missing on + * DT system as the driver can't cope with this case. + */ + if (PTR_ERR(pdata->clk) != -ENOENT || dev->of_node) + return PTR_ERR(pdata->clk); + /* Firmware may have set up the clock already. */ + dev_info(dev, "clocks have been setup already\n"); + } } if (pdata->phy_mode != PHY_INTERFACE_MODE_XGMII) -- cgit v1.2.3 From 22c608919bd4f0fae8e70e1284c83e0f1d507f34 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 11 Jul 2017 11:52:23 +0100 Subject: net: broadcom: bnx2x: make a couple of const arrays static Don't populate various tables on the stack but make them static const. Makes the object code smaller by nearly 200 bytes: Before: text data bss dec hex filename 113468 11200 0 124668 1e6fc bnx2x_ethtool.o After: text data bss dec hex filename 113129 11344 0 124473 1e639 bnx2x_ethtool.o Signed-off-by: Colin Ian King Acked-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index 43423744fdfa..21bc4bed6b26 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -2886,7 +2886,7 @@ static int bnx2x_test_nvram_tbl(struct bnx2x *bp, static int bnx2x_test_nvram(struct bnx2x *bp) { - const struct crc_pair nvram_tbl[] = { + static const struct crc_pair nvram_tbl[] = { { 0, 0x14 }, /* bootstrap */ { 0x14, 0xec }, /* dir */ { 0x100, 0x350 }, /* manuf_info */ @@ -2895,7 +2895,7 @@ static int bnx2x_test_nvram(struct bnx2x *bp) { 0x708, 0x70 }, /* manuf_key_info */ { 0, 0 } }; - const struct crc_pair nvram_tbl2[] = { + static const struct crc_pair nvram_tbl2[] = { { 0x7e8, 0x350 }, /* manuf_info2 */ { 0xb38, 0xf0 }, /* feature_info */ { 0, 0 } -- cgit v1.2.3 From f56ff774871d95f60c3f858419cf7424e6c361cf Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 11 Jul 2017 12:18:48 +0100 Subject: net: stmmac: make const array route_possibilities static Don't populate array route_possibilities on the stack but make it static const. Makes the object code a little smaller by 85 bytes: Before: text data bss dec hex filename 9901 2448 0 12349 303d dwmac4_core.o After: text data bss dec hex filename 9760 2504 0 12264 2fe8 dwmac4_core.o Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index f233bf8b4ebb..c4407e8e39a3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -117,7 +117,7 @@ static void dwmac4_tx_queue_routing(struct mac_device_info *hw, void __iomem *ioaddr = hw->pcsr; u32 value; - const struct stmmac_rx_routing route_possibilities[] = { + static const struct stmmac_rx_routing route_possibilities[] = { { GMAC_RXQCTRL_AVCPQ_MASK, GMAC_RXQCTRL_AVCPQ_SHIFT }, { GMAC_RXQCTRL_PTPQ_MASK, GMAC_RXQCTRL_PTPQ_SHIFT }, { GMAC_RXQCTRL_DCBCPQ_MASK, GMAC_RXQCTRL_DCBCPQ_SHIFT }, -- cgit v1.2.3 From bf98bd0be1ba509c5e6d77524ffac192f1edb2dd Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 11 Jul 2017 12:47:33 +0100 Subject: rt2x00: make const array glrt_table static Don't populate array glrt_table on the stack but make it static. Makes the object code a smaller by over 670 bytes: Before: text data bss dec hex filename 131772 4733 0 136505 21539 rt2800lib.o After: text data bss dec hex filename 131043 4789 0 135832 21298 rt2800lib.o Signed-off-by: Colin Ian King Acked-by: Stanislaw Gruszka Signed-off-by: David S. Miller --- drivers/net/wireless/ralink/rt2x00/rt2800lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c index 6e2e760d98b1..0b75def39c6c 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c @@ -5704,7 +5704,7 @@ static void rt2800_init_freq_calibration(struct rt2x00_dev *rt2x00dev) static void rt2800_init_bbp_5592_glrt(struct rt2x00_dev *rt2x00dev) { - const u8 glrt_table[] = { + static const u8 glrt_table[] = { 0xE0, 0x1F, 0X38, 0x32, 0x08, 0x28, 0x19, 0x0A, 0xFF, 0x00, /* 128 ~ 137 */ 0x16, 0x10, 0x10, 0x0B, 0x36, 0x2C, 0x26, 0x24, 0x42, 0x36, /* 138 ~ 147 */ 0x30, 0x2D, 0x4C, 0x46, 0x3D, 0x40, 0x3E, 0x42, 0x3D, 0x40, /* 148 ~ 157 */ -- cgit v1.2.3 From 5e349fc03cdd44c670aaf3affaa6f0426c5e553f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 13 Jul 2017 12:22:24 +0100 Subject: dccp: make const array error_code static Don't populate array error_code on the stack but make it static. Makes the object code smaller by almost 250 bytes: Before: text data bss dec hex filename 10366 983 0 11349 2c55 net/dccp/input.o After: text data bss dec hex filename 10161 1039 0 11200 2bc0 net/dccp/input.o Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- net/dccp/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dccp/input.c b/net/dccp/input.c index 4a05d7876850..fa6be9750bb4 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -126,7 +126,7 @@ static int dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) static u16 dccp_reset_code_convert(const u8 code) { - const u16 error_code[] = { + static const u16 error_code[] = { [DCCP_RESET_CODE_CLOSED] = 0, /* normal termination */ [DCCP_RESET_CODE_UNSPECIFIED] = 0, /* nothing known */ [DCCP_RESET_CODE_ABORTED] = ECONNRESET, -- cgit v1.2.3 From 803d5b6ebfb06a0d2ee3699fea4f1c7593958566 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Thu, 13 Jul 2017 18:45:07 +0530 Subject: cxgb4: add new T5 pci device id's Add 0x50a3 and 0x50a4 T5 device id's Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h index 99987d8e437e..aa28299aef5f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h @@ -174,6 +174,8 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN CH_PCI_ID_TABLE_FENTRY(0x50a0), /* Custom T540-CR */ CH_PCI_ID_TABLE_FENTRY(0x50a1), /* Custom T540-CR */ CH_PCI_ID_TABLE_FENTRY(0x50a2), /* Custom T540-KR4 */ + CH_PCI_ID_TABLE_FENTRY(0x50a3), /* Custom T580-KR4 */ + CH_PCI_ID_TABLE_FENTRY(0x50a4), /* Custom 2x T540-CR */ /* T6 adapters: */ -- cgit v1.2.3 From 2683701201ddb9a2a4d0fda5d950ab50ca8e77e4 Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Thu, 13 Jul 2017 16:46:43 +0200 Subject: netlink: correctly document nla_put_u64_64bit() Signed-off-by: Rolf Eike Beer Signed-off-by: David S. Miller --- include/net/netlink.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/netlink.h b/include/net/netlink.h index 01709172b3d3..ef8e6c3a80a6 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -98,8 +98,8 @@ * nla_put_u8(skb, type, value) add u8 attribute to skb * nla_put_u16(skb, type, value) add u16 attribute to skb * nla_put_u32(skb, type, value) add u32 attribute to skb - * nla_put_u64_64bits(skb, type, - * value, padattr) add u64 attribute to skb + * nla_put_u64_64bit(skb, type, + * value, padattr) add u64 attribute to skb * nla_put_s8(skb, type, value) add s8 attribute to skb * nla_put_s16(skb, type, value) add s16 attribute to skb * nla_put_s32(skb, type, value) add s32 attribute to skb -- cgit v1.2.3 From 5d89fb33223e0be32e4100623b915048b02beeec Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 13 Jul 2017 13:36:40 -0700 Subject: net: set fib rule refcount after malloc The configure callback of fib_rules_ops can change the refcnt of a fib rule. For instance, mlxsw takes a refcnt when adding the processing of the rule to a work queue. Thus the rule refcnt can not be reset to to 1 afterwards. Move the refcnt setting to after the allocation. Fixes: 5361e209dd30 ("net: avoid one splat in fib_nl_delrule()") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/core/fib_rules.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index a0093e1b0235..fdcb1bcd2afa 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -400,6 +400,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, err = -ENOMEM; goto errout; } + refcount_set(&rule->refcnt, 1); rule->fr_net = net; rule->pref = tb[FRA_PRIORITY] ? nla_get_u32(tb[FRA_PRIORITY]) @@ -517,8 +518,6 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, last = r; } - refcount_set(&rule->refcnt, 1); - if (last) list_add_rcu(&rule->list, &last->list); else -- cgit v1.2.3 From 457839ed3e78618cf0354cf79a1f47fe6eb26aef Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Mon, 10 Jul 2017 14:35:23 +0200 Subject: mdio: mux: fix parsing mux registers outside of the PHY address range mdio_mux_init parses the child nodes of the MDIO mux. When using "mdio-mux-mmioreg" the child nodes are describing the register value that is written to switch between the MDIO busses. The change which makes the error messages more verbose changed the parsing of the "reg" property from a simple of_property_read_u32 call to of_mdio_parse_addr. On a Khadas VIM (based on the Meson GXL SoC, which uses mdio-mux-mmioreg) this prevents registering the MDIO mux (because the "reg" values on the MDIO mux child nodes are 0x2009087f and 0xe40908ff) and leads to the following errors: mdio-mux-mmioreg c883455c.eth-phy-mux: /soc/periphs@c8834000/eth-phy-mux/mdio@e40908ff PHY address -469169921 is too large mdio-mux-mmioreg c883455c.eth-phy-mux: Error: Failed to find reg for child /soc/periphs@c8834000/eth-phy-mux/mdio@e40908ff mdio-mux-mmioreg c883455c.eth-phy-mux: /soc/periphs@c8834000/eth-phy-mux/mdio@2009087f PHY address 537462911 is too large mdio-mux-mmioreg c883455c.eth-phy-mux: Error: Failed to find reg for child /soc/periphs@c8834000/eth-phy-mux/mdio@2009087f mdio-mux-mmioreg c883455c.eth-phy-mux: Error: No acceptable child buses found mdio-mux-mmioreg c883455c.eth-phy-mux: failed to register mdio-mux bus /soc/periphs@c8834000/eth-phy-mux (as a result of that ethernet is not working, because the PHY which is connected through the mux' child MDIO bus, which is not being registered). Fix this by reverting the change from of_mdio_parse_addr to of_mdio_parse_addr. Fixes: 342fa1964439 ("mdio: mux: make child bus walking more permissive and errors more verbose") Signed-off-by: Martin Blumenstingl Acked-by: Neil Armstrong Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/mdio-mux.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c index 00755b6a42cf..c608e1dfaf09 100644 --- a/drivers/net/phy/mdio-mux.c +++ b/drivers/net/phy/mdio-mux.c @@ -135,8 +135,8 @@ int mdio_mux_init(struct device *dev, for_each_available_child_of_node(dev->of_node, child_bus_node) { int v; - v = of_mdio_parse_addr(dev, child_bus_node); - if (v < 0) { + r = of_property_read_u32(child_bus_node, "reg", &v); + if (r) { dev_err(dev, "Error: Failed to find reg for child %s\n", of_node_full_name(child_bus_node)); -- cgit v1.2.3 From 2b02c20ce0c28974b44e69a2e2f5ddc6a470ad6f Mon Sep 17 00:00:00 2001 From: Enrico Mioso Date: Tue, 11 Jul 2017 17:21:52 +0200 Subject: cdc_ncm: Set NTB format again after altsetting switch for Huawei devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some firmwares in Huawei E3372H devices have been observed to switch back to NTB 32-bit format after altsetting switch. This patch implements a driver flag to check for the device settings and set NTB format to 16-bit again if needed. The flag has been activated for devices controlled by the huawei_cdc_ncm.c driver. V1->V2: - fixed broken error checks - some corrections to the commit message V2->V3: - variable name changes, to clarify what's happening - check (and possibly set) the NTB format later in the common bind code path Signed-off-by: Enrico Mioso Reported-and-tested-by: Christian Panton Reviewed-by: Bjørn Mork CC: Bjørn Mork CC: Christian Panton CC: linux-usb@vger.kernel.org CC: netdev@vger.kernel.org CC: Oliver Neukum Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 28 ++++++++++++++++++++++++++++ drivers/net/usb/huawei_cdc_ncm.c | 6 ++++++ include/linux/usb/cdc_ncm.h | 1 + 3 files changed, 35 insertions(+) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index d103a1d4fb36..8f572b9f3625 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -768,8 +768,10 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_ u8 *buf; int len; int temp; + int err; u8 iface_no; struct usb_cdc_parsed_header hdr; + u16 curr_ntb_format; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) @@ -874,6 +876,32 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_ goto error2; } + /* + * Some Huawei devices have been observed to come out of reset in NDP32 mode. + * Let's check if this is the case, and set the device to NDP16 mode again if + * needed. + */ + if (ctx->drvflags & CDC_NCM_FLAG_RESET_NTB16) { + err = usbnet_read_cmd(dev, USB_CDC_GET_NTB_FORMAT, + USB_TYPE_CLASS | USB_DIR_IN | USB_RECIP_INTERFACE, + 0, iface_no, &curr_ntb_format, 2); + if (err < 0) { + goto error2; + } + + if (curr_ntb_format == USB_CDC_NCM_NTB32_FORMAT) { + dev_info(&intf->dev, "resetting NTB format to 16-bit"); + err = usbnet_write_cmd(dev, USB_CDC_SET_NTB_FORMAT, + USB_TYPE_CLASS | USB_DIR_OUT + | USB_RECIP_INTERFACE, + USB_CDC_NCM_NTB16_FORMAT, + iface_no, NULL, 0); + + if (err < 0) + goto error2; + } + } + cdc_ncm_find_endpoints(dev, ctx->data); cdc_ncm_find_endpoints(dev, ctx->control); if (!dev->in || !dev->out || !dev->status) { diff --git a/drivers/net/usb/huawei_cdc_ncm.c b/drivers/net/usb/huawei_cdc_ncm.c index 2680a65cd5e4..63f28908afda 100644 --- a/drivers/net/usb/huawei_cdc_ncm.c +++ b/drivers/net/usb/huawei_cdc_ncm.c @@ -80,6 +80,12 @@ static int huawei_cdc_ncm_bind(struct usbnet *usbnet_dev, * be at the end of the frame. */ drvflags |= CDC_NCM_FLAG_NDP_TO_END; + + /* Additionally, it has been reported that some Huawei E3372H devices, with + * firmware version 21.318.01.00.541, come out of reset in NTB32 format mode, hence + * needing to be set to the NTB16 one again. + */ + drvflags |= CDC_NCM_FLAG_RESET_NTB16; ret = cdc_ncm_bind_common(usbnet_dev, intf, 1, drvflags); if (ret) goto err; diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 021f7a88f52c..1a59699cf82a 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -83,6 +83,7 @@ /* Driver flags */ #define CDC_NCM_FLAG_NDP_TO_END 0x02 /* NDP is placed at end of frame */ #define CDC_MBIM_FLAG_AVOID_ALTSETTING_TOGGLE 0x04 /* Avoid altsetting toggle during init */ +#define CDC_NCM_FLAG_RESET_NTB16 0x08 /* set NDP16 one more time after altsetting switch */ #define cdc_ncm_comm_intf_is_mbim(x) ((x)->desc.bInterfaceSubClass == USB_CDC_SUBCLASS_MBIM && \ (x)->desc.bInterfaceProtocol == USB_CDC_PROTO_NONE) -- cgit v1.2.3 From 230cd1279d0019d52f9529c7d91c96d095cae755 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 12 Jul 2017 15:56:41 -0700 Subject: netpoll: shut up a kernel warning on refcount When we convert atomic_t to refcount_t, a new kernel warning on "increment on 0" is introduced in the netpoll code, zap_completion_queue(). In fact for this special case, we know the refcount is 0 and we just have to set it to 1 to satisfy the following dev_kfree_skb_any(), so we can just use refcount_set(..., 1) instead. Fixes: 633547973ffc ("net: convert sk_buff.users from atomic_t to refcount_t") Reported-by: Dave Jones Cc: Reshetova, Elena Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/netpoll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index d3408a693166..8357f164c660 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -277,7 +277,7 @@ static void zap_completion_queue(void) struct sk_buff *skb = clist; clist = clist->next; if (!skb_irq_freeable(skb)) { - refcount_inc(&skb->users); + refcount_set(&skb->users, 1); dev_kfree_skb_any(skb); /* put this one back */ } else { __kfree_skb(skb); -- cgit v1.2.3 From 1e4babee70a2e2d8f9e0da06f013563b0e26f654 Mon Sep 17 00:00:00 2001 From: LiuJian Date: Thu, 13 Jul 2017 18:57:54 +0800 Subject: net: hns: add acpi function of xge led control The current code only support DT method to control xge led. This patch is the implementation of acpi method to control xge led. Signed-off-by: LiuJian Reviewed-by: John Garry Reviewed-by: Yunsheng Lin Reviewed-by: Daode Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c | 3 +- drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c | 58 +++++++++++++++++++++- 2 files changed, 58 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c index ff864a187d5a..a37166ee577b 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c @@ -776,8 +776,9 @@ void hns_ae_update_led_status(struct hnae_handle *handle) assert(handle); mac_cb = hns_get_mac_cb(handle); - if (!mac_cb->cpld_ctrl) + if (mac_cb->media_type != HNAE_MEDIA_TYPE_FIBER) return; + hns_set_led_opt(mac_cb); } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c index 7a8addda726e..408b63faf9a8 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c @@ -53,6 +53,34 @@ static u32 dsaf_read_sub(struct dsaf_device *dsaf_dev, u32 reg) return ret; } +static void hns_dsaf_acpi_ledctrl_by_port(struct hns_mac_cb *mac_cb, u8 op_type, + u32 link, u32 port, u32 act) +{ + union acpi_object *obj; + union acpi_object obj_args[3], argv4; + + obj_args[0].integer.type = ACPI_TYPE_INTEGER; + obj_args[0].integer.value = link; + obj_args[1].integer.type = ACPI_TYPE_INTEGER; + obj_args[1].integer.value = port; + obj_args[2].integer.type = ACPI_TYPE_INTEGER; + obj_args[2].integer.value = act; + + argv4.type = ACPI_TYPE_PACKAGE; + argv4.package.count = 3; + argv4.package.elements = obj_args; + + obj = acpi_evaluate_dsm(ACPI_HANDLE(mac_cb->dev), + &hns_dsaf_acpi_dsm_guid, 0, op_type, &argv4); + if (!obj) { + dev_warn(mac_cb->dev, "ledctrl fail, link:%d port:%d act:%d!\n", + link, port, act); + return; + } + + ACPI_FREE(obj); +} + static void hns_cpld_set_led(struct hns_mac_cb *mac_cb, int link_status, u16 speed, int data) { @@ -93,6 +121,18 @@ static void hns_cpld_set_led(struct hns_mac_cb *mac_cb, int link_status, } } +static void hns_cpld_set_led_acpi(struct hns_mac_cb *mac_cb, int link_status, + u16 speed, int data) +{ + if (!mac_cb) { + pr_err("cpld_led_set mac_cb is null!\n"); + return; + } + + hns_dsaf_acpi_ledctrl_by_port(mac_cb, HNS_OP_LED_SET_FUNC, + link_status, mac_cb->mac_id, data); +} + static void cpld_led_reset(struct hns_mac_cb *mac_cb) { if (!mac_cb || !mac_cb->cpld_ctrl) @@ -103,6 +143,20 @@ static void cpld_led_reset(struct hns_mac_cb *mac_cb) mac_cb->cpld_led_value = CPLD_LED_DEFAULT_VALUE; } +static void cpld_led_reset_acpi(struct hns_mac_cb *mac_cb) +{ + if (!mac_cb) { + pr_err("cpld_led_reset mac_cb is null!\n"); + return; + } + + if (mac_cb->media_type != HNAE_MEDIA_TYPE_FIBER) + return; + + hns_dsaf_acpi_ledctrl_by_port(mac_cb, HNS_OP_LED_SET_FUNC, + 0, mac_cb->mac_id, 0); +} + static int cpld_set_led_id(struct hns_mac_cb *mac_cb, enum hnae_led_state status) { @@ -604,8 +658,8 @@ struct dsaf_misc_op *hns_misc_op_get(struct dsaf_device *dsaf_dev) misc_op->cfg_serdes_loopback = hns_mac_config_sds_loopback; } else if (is_acpi_node(dsaf_dev->dev->fwnode)) { - misc_op->cpld_set_led = hns_cpld_set_led; - misc_op->cpld_reset_led = cpld_led_reset; + misc_op->cpld_set_led = hns_cpld_set_led_acpi; + misc_op->cpld_reset_led = cpld_led_reset_acpi; misc_op->cpld_set_led_id = cpld_set_led_id; misc_op->dsaf_reset = hns_dsaf_rst_acpi; -- cgit v1.2.3 From 40fbbce007cb458da4c15cbf53beacf6b755cedc Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Thu, 13 Jul 2017 18:36:50 +0530 Subject: cxgb4: ptp_clock_register() returns error pointers Check ptp_clock_register() return not only for NULL but also for error pointers, and also nullify adapter->ptp_clock if ptp_clock_register() fails. Fixes: 9c33e4208bce ("cxgb4: Add PTP Hardware Clock (PHC) support") Reported-by: Dan Carpenter Cc: Richard Cochran Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c index 50517cfd9671..9f9d6cae39d5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c @@ -441,7 +441,8 @@ void cxgb4_ptp_init(struct adapter *adapter) adapter->ptp_clock = ptp_clock_register(&adapter->ptp_clock_info, &adapter->pdev->dev); - if (!adapter->ptp_clock) { + if (IS_ERR_OR_NULL(adapter->ptp_clock)) { + adapter->ptp_clock = NULL; dev_err(adapter->pdev_dev, "PTP %s Clock registration has failed\n", __func__); return; -- cgit v1.2.3 From 31a4562d7408493c6377933ff2f7d7302dbdea80 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 13 Jul 2017 16:09:10 +0300 Subject: net: bridge: fix dest lookup when vlan proto doesn't match With 802.1ad support the vlan_ingress code started checking for vlan protocol mismatch which causes the current tag to be inserted and the bridge vlan protocol & pvid to be set. The vlan tag insertion changes the skb mac_header and thus the lookup mac dest pointer which was loaded prior to calling br_allowed_ingress in br_handle_frame_finish is VLAN_HLEN bytes off now, pointing to the last two bytes of the destination mac and the first four of the source mac causing lookups to always fail and broadcasting all such packets to all ports. Same thing happens for locally originated packets when passing via br_dev_xmit. So load the dest pointer after the vlan checks and possible skb change. Fixes: 8580e2117c06 ("bridge: Prepare for 802.1ad vlan filtering support") Reported-by: Anitha Narasimha Murthy Signed-off-by: Nikolay Aleksandrov Acked-by: Toshiaki Makita Signed-off-by: David S. Miller --- net/bridge/br_device.c | 3 ++- net/bridge/br_input.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index f0f3447e8aa4..861ae2a165f4 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -34,11 +34,11 @@ static struct lock_class_key bridge_netdev_addr_lock_key; netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); - const unsigned char *dest = skb->data; struct net_bridge_fdb_entry *dst; struct net_bridge_mdb_entry *mdst; struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats); const struct nf_br_ops *nf_ops; + const unsigned char *dest; u16 vid = 0; rcu_read_lock(); @@ -61,6 +61,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid)) goto out; + dest = eth_hdr(skb)->h_dest; if (is_broadcast_ether_addr(dest)) { br_flood(br, skb, BR_PKT_BROADCAST, false, true); } else if (is_multicast_ether_addr(dest)) { diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 013f2290bfa5..7637f58c1226 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -131,11 +131,11 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br, int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { struct net_bridge_port *p = br_port_get_rcu(skb->dev); - const unsigned char *dest = eth_hdr(skb)->h_dest; enum br_pkt_type pkt_type = BR_PKT_UNICAST; struct net_bridge_fdb_entry *dst = NULL; struct net_bridge_mdb_entry *mdst; bool local_rcv, mcast_hit = false; + const unsigned char *dest; struct net_bridge *br; u16 vid = 0; @@ -153,6 +153,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, false); local_rcv = !!(br->dev->flags & IFF_PROMISC); + dest = eth_hdr(skb)->h_dest; if (is_multicast_ether_addr(dest)) { /* by definition the broadcast is also a multicast address */ if (is_broadcast_ether_addr(dest)) { -- cgit v1.2.3 From ccd4eb49f3392ebf989d58bd013a7bf44cdca4d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Briano?= Date: Thu, 13 Jul 2017 09:46:58 -0700 Subject: net/packet: Fix Tx queue selection for AF_PACKET MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When PACKET_QDISC_BYPASS is not used, Tx queue selection will be done before the packet is enqueued, taking into account any mappings set by a queuing discipline such as mqprio without hardware offloading. This selection may be affected by a previously saved queue_mapping, either on the Rx path, or done before the packet reaches the device, as it's currently the case for AF_PACKET. In order for queue selection to work as expected when using traffic control, there can't be another selection done before that point is reached, so move the call to packet_pick_tx_queue to packet_direct_xmit, leaving the default xmit path as it was before PACKET_QDISC_BYPASS was introduced. A forward declaration of packet_pick_tx_queue() is introduced to avoid the need to reorder the functions within the file. Fixes: d346a3fae3ff ("packet: introduce PACKET_QDISC_BYPASS socket option") Signed-off-by: Iván Briano Signed-off-by: David S. Miller --- net/packet/af_packet.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index e3beb28203eb..008bb34ee324 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -214,6 +214,7 @@ static void prb_clear_rxhash(struct tpacket_kbdq_core *, static void prb_fill_vlan_info(struct tpacket_kbdq_core *, struct tpacket3_hdr *); static void packet_flush_mclist(struct sock *sk); +static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb); struct packet_skb_cb { union { @@ -260,6 +261,7 @@ static int packet_direct_xmit(struct sk_buff *skb) if (skb != orig_skb) goto drop; + packet_pick_tx_queue(dev, skb); txq = skb_get_tx_queue(dev, skb); local_bh_disable(); @@ -2747,8 +2749,6 @@ tpacket_error: goto tpacket_error; } - packet_pick_tx_queue(dev, skb); - skb->destructor = tpacket_destruct_skb; __packet_set_status(po, ph, TP_STATUS_SENDING); packet_inc_pending(&po->tx_ring); @@ -2931,8 +2931,6 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) skb->priority = sk->sk_priority; skb->mark = sockc.mark; - packet_pick_tx_queue(dev, skb); - if (po->has_vnet_hdr) { err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le()); if (err) -- cgit v1.2.3 From c4c4290c17bd099b7654d683f8ab6233b4f4a364 Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Thu, 13 Jul 2017 13:12:18 -0400 Subject: net sched actions: rename act_get_notify() to tcf_get_notify() Make name consistent with other TC event notification routines, such as tcf_add_notify() and tcf_del_notify() Signed-off-by: Roman Mashak Signed-off-by: David S. Miller --- net/sched/act_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index aed6cf2e9fd8..f2e9ed34a963 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -835,7 +835,7 @@ out_nlmsg_trim: } static int -act_get_notify(struct net *net, u32 portid, struct nlmsghdr *n, +tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n, struct list_head *actions, int event) { struct sk_buff *skb; @@ -1018,7 +1018,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, } if (event == RTM_GETACTION) - ret = act_get_notify(net, portid, n, &actions, event); + ret = tcf_get_notify(net, portid, n, &actions, event); else { /* delete */ ret = tcf_del_notify(net, n, &actions, portid); if (ret) -- cgit v1.2.3 From a8f5cb9e79913a6918f87495d4969706f8817b8e Mon Sep 17 00:00:00 2001 From: Petr Kulhavy Date: Thu, 13 Jul 2017 19:40:57 +0200 Subject: smsc95xx: use ethtool_op_get_ts_info() This change enables the use of SW timestamping on Raspberry PI. smsc95xx uses the usbnet transmit function usbnet_start_xmit(), which implements software timestamping. However the SOF_TIMESTAMPING_TX_SOFTWARE capability was missing and only SOF_TIMESTAMPING_RX_SOFTWARE was announced. By using ethtool_op_get_ts_info() as get_ts_info() also the SOF_TIMESTAMPING_TX_SOFTWARE is announced. Signed-off-by: Petr Kulhavy Reviewed-by: Woojung Huh Signed-off-by: David S. Miller --- drivers/net/usb/smsc95xx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 2dfca96a63b6..340c13484e5c 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -898,6 +898,7 @@ static const struct ethtool_ops smsc95xx_ethtool_ops = { .set_wol = smsc95xx_ethtool_set_wol, .get_link_ksettings = smsc95xx_get_link_ksettings, .set_link_ksettings = smsc95xx_set_link_ksettings, + .get_ts_info = ethtool_op_get_ts_info, }; static int smsc95xx_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd) -- cgit v1.2.3 From c98b0537f0d962b46c62c27b6c2d783257f7400f Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Thu, 13 Jul 2017 15:45:41 -0500 Subject: net: qcom/emac: fix double free of SGMII IRQ during shutdown If the interface is not up, then don't try to close it during a shutdown. This avoids possible double free of the IRQ, which can happen during a shutdown. Fixes: 03eb3eb4d4d5 ("net: qcom/emac: add shutdown function") Signed-off-by: Timur Tabi Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 746d94e28470..60850bfa3d32 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -766,11 +766,13 @@ static void emac_shutdown(struct platform_device *pdev) struct emac_adapter *adpt = netdev_priv(netdev); struct emac_sgmii *sgmii = &adpt->phy; - /* Closing the SGMII turns off its interrupts */ - sgmii->close(adpt); + if (netdev->flags & IFF_UP) { + /* Closing the SGMII turns off its interrupts */ + sgmii->close(adpt); - /* Resetting the MAC turns off all DMA and its interrupts */ - emac_mac_reset(adpt); + /* Resetting the MAC turns off all DMA and its interrupts */ + emac_mac_reset(adpt); + } } static struct platform_driver emac_platform_driver = { -- cgit v1.2.3 From 45e0b4b3d532ea67bf90701e6162f31855ab3c98 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 14 Jul 2017 14:07:00 +0200 Subject: isdn: divert: fix sprintf buffer overflow warning One string we pass into the cs->info buffer might be too long, as pointed out by gcc: drivers/isdn/divert/isdn_divert.c: In function 'll_callback': drivers/isdn/divert/isdn_divert.c:488:22: error: '%d' directive writing between 1 and 3 bytes into a region of size between 1 and 69 [-Werror=format-overflow=] sprintf(cs->info, "%d 0x%lx %s %s %s %s 0x%x 0x%x %d %d %s\n", ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/isdn/divert/isdn_divert.c:488:22: note: directive argument in the range [0, 255] drivers/isdn/divert/isdn_divert.c:488:4: note: 'sprintf' output 25 or more bytes (assuming 129) into a destination of size 90 This is unlikely to actually cause problems, so let's use snprintf as a simple workaround to shut up the warning and truncate the buffer instead. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/isdn/divert/isdn_divert.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/isdn/divert/isdn_divert.c b/drivers/isdn/divert/isdn_divert.c index 060d357f107f..6f423bc49d0d 100644 --- a/drivers/isdn/divert/isdn_divert.c +++ b/drivers/isdn/divert/isdn_divert.c @@ -485,18 +485,19 @@ static int isdn_divert_icall(isdn_ctrl *ic) cs->deflect_dest[0] = '\0'; retval = 4; /* only proceed */ } - sprintf(cs->info, "%d 0x%lx %s %s %s %s 0x%x 0x%x %d %d %s\n", - cs->akt_state, - cs->divert_id, - divert_if.drv_to_name(cs->ics.driver), - (ic->command == ISDN_STAT_ICALLW) ? "1" : "0", - cs->ics.parm.setup.phone, - cs->ics.parm.setup.eazmsn, - cs->ics.parm.setup.si1, - cs->ics.parm.setup.si2, - cs->ics.parm.setup.screen, - dv->rule.waittime, - cs->deflect_dest); + snprintf(cs->info, sizeof(cs->info), + "%d 0x%lx %s %s %s %s 0x%x 0x%x %d %d %s\n", + cs->akt_state, + cs->divert_id, + divert_if.drv_to_name(cs->ics.driver), + (ic->command == ISDN_STAT_ICALLW) ? "1" : "0", + cs->ics.parm.setup.phone, + cs->ics.parm.setup.eazmsn, + cs->ics.parm.setup.si1, + cs->ics.parm.setup.si2, + cs->ics.parm.setup.screen, + dv->rule.waittime, + cs->deflect_dest); if ((dv->rule.action == DEFLECT_REPORT) || (dv->rule.action == DEFLECT_REJECT)) { put_info_buffer(cs->info); -- cgit v1.2.3 From 73066f6c53fea76f95caadd70d802d24db747303 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 14 Jul 2017 14:07:01 +0200 Subject: net: niu: fix format string overflow warning: We get a warning for the port_name string that might be longer than six characters if we had more than 10 ports: drivers/net/ethernet/sun/niu.c: In function 'niu_put_parent': drivers/net/ethernet/sun/niu.c:9563:21: error: '%d' directive writing between 1 and 3 bytes into a region of size 2 [-Werror=format-overflow=] sprintf(port_name, "port%d", port); ^~~~~~~~ drivers/net/ethernet/sun/niu.c:9563:21: note: directive argument in the range [0, 255] drivers/net/ethernet/sun/niu.c:9563:2: note: 'sprintf' output between 6 and 8 bytes into a destination of size 6 sprintf(port_name, "port%d", port); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/net/ethernet/sun/niu.c: In function 'niu_pci_init_one': drivers/net/ethernet/sun/niu.c:9538:22: error: '%d' directive writing between 1 and 3 bytes into a region of size 2 [-Werror=format-overflow=] sprintf(port_name, "port%d", port); ^~~~~~~~ drivers/net/ethernet/sun/niu.c:9538:22: note: directive argument in the range [0, 255] drivers/net/ethernet/sun/niu.c:9538:3: note: 'sprintf' output between 6 and 8 bytes into a destination of size 6 While we know that the port number is small, there is no harm in making the format string two bytes longer to avoid the warning. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/niu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 46cb7f8955a2..4bb04aaf9650 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -9532,7 +9532,7 @@ static struct niu_parent *niu_get_parent(struct niu *np, p = niu_new_parent(np, id, ptype); if (p) { - char port_name[6]; + char port_name[8]; int err; sprintf(port_name, "port%d", port); @@ -9553,7 +9553,7 @@ static void niu_put_parent(struct niu *np) { struct niu_parent *p = np->parent; u8 port = np->port; - char port_name[6]; + char port_name[8]; BUG_ON(!p || p->ports[port] != np); -- cgit v1.2.3 From be9cdf1b97023cf5548bd4f6920bb546272ebabc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 14 Jul 2017 14:07:02 +0200 Subject: bnx2x: fix format overflow warning gcc notices that large queue numbers would overflow the queue name string: drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c: In function 'bnx2x_get_strings': drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c:3165:25: error: '%d' directive writing between 1 and 10 bytes into a region of size 5 [-Werror=format-overflow=] drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c:3165:25: note: directive argument in the range [0, 2147483647] drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c:3165:5: note: 'sprintf' output between 2 and 11 bytes into a destination of size 5 There is a hard limit in place that makes the number at most two digits, so the code is fine. This changes it to use snprintf() to truncate instead of overflowing, which shuts up that warning. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index 21bc4bed6b26..1e33abde4a3e 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -3162,7 +3162,8 @@ static void bnx2x_get_strings(struct net_device *dev, u32 stringset, u8 *buf) if (is_multi(bp)) { for_each_eth_queue(bp, i) { memset(queue_name, 0, sizeof(queue_name)); - sprintf(queue_name, "%d", i); + snprintf(queue_name, sizeof(queue_name), + "%d", i); for (j = 0; j < BNX2X_NUM_Q_STATS; j++) snprintf(buf + (k + j)*ETH_GSTRING_LEN, ETH_GSTRING_LEN, -- cgit v1.2.3 From c41626ce3113eb2d40b5aa1c4fc2b0cd2785367b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 14 Jul 2017 14:07:03 +0200 Subject: net: thunder_bgx: avoid format string overflow warning gcc warns that the temporary buffer might be too small here: drivers/net/ethernet/cavium/thunder/thunder_bgx.c: In function 'bgx_probe': drivers/net/ethernet/cavium/thunder/thunder_bgx.c:1020:16: error: '%d' directive writing between 1 and 10 bytes into a region of size between 9 and 11 [-Werror=format-overflow=] sprintf(str, "BGX%d LMAC%d mode", bgx->bgx_id, lmacid); ^~~~~~~~~~~~~~~~~~~ drivers/net/ethernet/cavium/thunder/thunder_bgx.c:1020:16: note: directive argument in the range [0, 2147483647] drivers/net/ethernet/cavium/thunder/thunder_bgx.c:1020:3: note: 'sprintf' output between 16 and 27 bytes into a destination of size 20 This probably can't happen, but it can't hurt to make it long enough for the theoretical limit. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index a0ca68ce3fbb..79112563a25a 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -1008,7 +1008,7 @@ static void bgx_print_qlm_mode(struct bgx *bgx, u8 lmacid) { struct device *dev = &bgx->pdev->dev; struct lmac *lmac; - char str[20]; + char str[27]; if (!bgx->is_dlm && lmacid) return; -- cgit v1.2.3 From c7673e4dea9a338e00fa26cdd42d3697e8e22319 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 14 Jul 2017 14:07:04 +0200 Subject: vmxnet3: avoid format strint overflow warning gcc-7 notices that "-event-%d" could be more than 11 characters long if we had larger 'vector' numbers: drivers/net/vmxnet3/vmxnet3_drv.c: In function 'vmxnet3_activate_dev': drivers/net/vmxnet3/vmxnet3_drv.c:2095:40: error: 'sprintf' may write a terminating nul past the end of the destination [-Werror=format-overflow=] sprintf(intr->event_msi_vector_name, "%s-event-%d", ^~~~~~~~~~~~~ drivers/net/vmxnet3/vmxnet3_drv.c:2095:3: note: 'sprintf' output between 9 and 33 bytes into a destination of size 32 The current code is safe, but making the string a little longer is harmless and lets gcc see that it's ok. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_int.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index ba1c9f93592b..9c51b8be0038 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -311,7 +311,7 @@ struct vmxnet3_intr { u8 num_intrs; /* # of intr vectors */ u8 event_intr_idx; /* idx of the intr vector for event */ u8 mod_levels[VMXNET3_LINUX_MAX_MSIX_VECT]; /* moderation level */ - char event_msi_vector_name[IFNAMSIZ+11]; + char event_msi_vector_name[IFNAMSIZ+17]; #ifdef CONFIG_PCI_MSI struct msix_entry msix_entries[VMXNET3_LINUX_MAX_MSIX_VECT]; #endif -- cgit v1.2.3 From 56c0da495a0b38f8ac0c0c0e3fcc750ea449daea Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 14 Jul 2017 14:07:05 +0200 Subject: liquidio: fix possible eeprom format string overflow gcc reports that the temporary buffer for computing the string length may be too small here: drivers/net/ethernet/cavium/liquidio/lio_ethtool.c: In function 'lio_get_eeprom_len': /drivers/net/ethernet/cavium/liquidio/lio_ethtool.c:345:21: error: 'sprintf' may write a terminating nul past the end of the destination [-Werror=format-overflow=] len = sprintf(buf, "boardname:%s serialnum:%s maj:%lld min:%lld\n", ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/net/ethernet/cavium/liquidio/lio_ethtool.c:345:6: note: 'sprintf' output between 35 and 167 bytes into a destination of size 128 len = sprintf(buf, "boardname:%s serialnum:%s maj:%lld min:%lld\n", This extends it to 192 bytes, which is certainly enough. As far as I could tell, there are no other constraints that require a specific maximum size. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/lio_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c index 28ecda3d3404..ebd353bc78ff 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c @@ -335,7 +335,7 @@ lio_ethtool_get_channels(struct net_device *dev, static int lio_get_eeprom_len(struct net_device *netdev) { - u8 buf[128]; + u8 buf[192]; struct lio *lio = GET_LIO(netdev); struct octeon_device *oct_dev = lio->oct_dev; struct octeon_board_info *board_info; -- cgit v1.2.3 From 10b3bf54406bb7f4e78da9bb2a485c5c986678ad Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 14 Jul 2017 22:07:33 +0800 Subject: sctp: fix an array overflow when all ext chunks are set Marcelo noticed an array overflow caused by commit c28445c3cb07 ("sctp: add reconf_enable in asoc ep and netns"), in which sctp would add SCTP_CID_RECONF into extensions when reconf_enable is set in sctp_make_init and sctp_make_init_ack. Then now when all ext chunks are set, 4 ext chunk ids can be put into extensions array while extensions array size is 3. It would cause a kernel panic because of this overflow. This patch is to fix it by defining extensions array size is 4 in both sctp_make_init and sctp_make_init_ack. Fixes: c28445c3cb07 ("sctp: add reconf_enable in asoc ep and netns") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/sm_make_chunk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 4e16b02ed832..6110447fe51d 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -228,7 +228,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, sctp_adaptation_ind_param_t aiparam; sctp_supported_ext_param_t ext_param; int num_ext = 0; - __u8 extensions[3]; + __u8 extensions[4]; struct sctp_paramhdr *auth_chunks = NULL, *auth_hmacs = NULL; @@ -396,7 +396,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, sctp_adaptation_ind_param_t aiparam; sctp_supported_ext_param_t ext_param; int num_ext = 0; - __u8 extensions[3]; + __u8 extensions[4]; struct sctp_paramhdr *auth_chunks = NULL, *auth_hmacs = NULL, *auth_random = NULL; -- cgit v1.2.3 From 83a5c5af26ef8005caebd50ce62383a02c5bae82 Mon Sep 17 00:00:00 2001 From: Abhishek Shah Date: Fri, 14 Jul 2017 00:34:07 +0530 Subject: net: ethernet: bgmac: Remove unnecessary 'return' from platform_bgmac_idm_write Return type for idm register write callback should be void as 'writel' API is used for write operation. However, there no need to have 'return' in this function. Signed-off-by: Abhishek Shah Reviewed-by: Oza Oza Reviewed-by: Ray Jui Reviewed-by: Scott Branden Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bgmac-platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c b/drivers/net/ethernet/broadcom/bgmac-platform.c index 73aca97a96bc..1ca75dea1a77 100644 --- a/drivers/net/ethernet/broadcom/bgmac-platform.c +++ b/drivers/net/ethernet/broadcom/bgmac-platform.c @@ -50,7 +50,7 @@ static u32 platform_bgmac_idm_read(struct bgmac *bgmac, u16 offset) static void platform_bgmac_idm_write(struct bgmac *bgmac, u16 offset, u32 value) { - return writel(value, bgmac->plat.idm_base + offset); + writel(value, bgmac->plat.idm_base + offset); } static bool platform_bgmac_clk_enabled(struct bgmac *bgmac) -- cgit v1.2.3 From a163bdb02beb7df8b2768ce7c74a2b17803c96f9 Mon Sep 17 00:00:00 2001 From: Abhishek Shah Date: Fri, 14 Jul 2017 00:34:08 +0530 Subject: net: ethernet: bgmac: Make IDM register space optional IDM operations are usually one time ops and should be done in firmware itself. Driver is not supposed to touch IDM registers. However, for some SoCs', driver is performing IDM read/writes. So this patch masks IDM operations in case firmware is taking care of IDM operations. Signed-off-by: Abhishek Shah Reviewed-by: Oza Oza Reviewed-by: Ray Jui Reviewed-by: Scott Branden Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bgmac-platform.c | 19 ++++--- drivers/net/ethernet/broadcom/bgmac.c | 70 +++++++++++++++----------- drivers/net/ethernet/broadcom/bgmac.h | 1 + 3 files changed, 55 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c b/drivers/net/ethernet/broadcom/bgmac-platform.c index 1ca75dea1a77..d937083db9a4 100644 --- a/drivers/net/ethernet/broadcom/bgmac-platform.c +++ b/drivers/net/ethernet/broadcom/bgmac-platform.c @@ -55,6 +55,9 @@ static void platform_bgmac_idm_write(struct bgmac *bgmac, u16 offset, u32 value) static bool platform_bgmac_clk_enabled(struct bgmac *bgmac) { + if (!bgmac->plat.idm_base) + return true; + if ((bgmac_idm_read(bgmac, BCMA_IOCTL) & BGMAC_CLK_EN) != BGMAC_CLK_EN) return false; if (bgmac_idm_read(bgmac, BCMA_RESET_CTL) & BCMA_RESET_CTL_RESET) @@ -66,6 +69,9 @@ static void platform_bgmac_clk_enable(struct bgmac *bgmac, u32 flags) { u32 val; + if (!bgmac->plat.idm_base) + return; + /* The Reset Control register only contains a single bit to show if the * controller is currently in reset. Do a sanity check here, just in * case the bootloader happened to leave the device in reset. @@ -180,6 +186,7 @@ static int bgmac_probe(struct platform_device *pdev) bgmac->feature_flags |= BGMAC_FEAT_CMDCFG_SR_REV4; bgmac->feature_flags |= BGMAC_FEAT_TX_MASK_SETUP; bgmac->feature_flags |= BGMAC_FEAT_RX_MASK_SETUP; + bgmac->feature_flags |= BGMAC_FEAT_IDM_MASK; bgmac->dev = &pdev->dev; bgmac->dma_dev = &pdev->dev; @@ -207,15 +214,13 @@ static int bgmac_probe(struct platform_device *pdev) return PTR_ERR(bgmac->plat.base); regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "idm_base"); - if (!regs) { - dev_err(&pdev->dev, "Unable to obtain idm resource\n"); - return -EINVAL; + if (regs) { + bgmac->plat.idm_base = devm_ioremap_resource(&pdev->dev, regs); + if (IS_ERR(bgmac->plat.idm_base)) + return PTR_ERR(bgmac->plat.idm_base); + bgmac->feature_flags &= ~BGMAC_FEAT_IDM_MASK; } - bgmac->plat.idm_base = devm_ioremap_resource(&pdev->dev, regs); - if (IS_ERR(bgmac->plat.idm_base)) - return PTR_ERR(bgmac->plat.idm_base); - regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nicpm_base"); if (regs) { bgmac->plat.nicpm_base = devm_ioremap_resource(&pdev->dev, diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index ba4d2e145bb9..48d672b204a4 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -622,9 +622,11 @@ static int bgmac_dma_alloc(struct bgmac *bgmac) BUILD_BUG_ON(BGMAC_MAX_TX_RINGS > ARRAY_SIZE(ring_base)); BUILD_BUG_ON(BGMAC_MAX_RX_RINGS > ARRAY_SIZE(ring_base)); - if (!(bgmac_idm_read(bgmac, BCMA_IOST) & BCMA_IOST_DMA64)) { - dev_err(bgmac->dev, "Core does not report 64-bit DMA\n"); - return -ENOTSUPP; + if (!(bgmac->feature_flags & BGMAC_FEAT_IDM_MASK)) { + if (!(bgmac_idm_read(bgmac, BCMA_IOST) & BCMA_IOST_DMA64)) { + dev_err(bgmac->dev, "Core does not report 64-bit DMA\n"); + return -ENOTSUPP; + } } for (i = 0; i < BGMAC_MAX_TX_RINGS; i++) { @@ -855,9 +857,11 @@ static void bgmac_mac_speed(struct bgmac *bgmac) static void bgmac_miiconfig(struct bgmac *bgmac) { if (bgmac->feature_flags & BGMAC_FEAT_FORCE_SPEED_2500) { - bgmac_idm_write(bgmac, BCMA_IOCTL, - bgmac_idm_read(bgmac, BCMA_IOCTL) | 0x40 | - BGMAC_BCMA_IOCTL_SW_CLKEN); + if (!(bgmac->feature_flags & BGMAC_FEAT_IDM_MASK)) { + bgmac_idm_write(bgmac, BCMA_IOCTL, + bgmac_idm_read(bgmac, BCMA_IOCTL) | + 0x40 | BGMAC_BCMA_IOCTL_SW_CLKEN); + } bgmac->mac_speed = SPEED_2500; bgmac->mac_duplex = DUPLEX_FULL; bgmac_mac_speed(bgmac); @@ -874,11 +878,36 @@ static void bgmac_miiconfig(struct bgmac *bgmac) } } +static void bgmac_chip_reset_idm_config(struct bgmac *bgmac) +{ + u32 iost; + + iost = bgmac_idm_read(bgmac, BCMA_IOST); + if (bgmac->feature_flags & BGMAC_FEAT_IOST_ATTACHED) + iost &= ~BGMAC_BCMA_IOST_ATTACHED; + + /* 3GMAC: for BCM4707 & BCM47094, only do core reset at bgmac_probe() */ + if (!(bgmac->feature_flags & BGMAC_FEAT_NO_RESET)) { + u32 flags = 0; + + if (iost & BGMAC_BCMA_IOST_ATTACHED) { + flags = BGMAC_BCMA_IOCTL_SW_CLKEN; + if (!bgmac->has_robosw) + flags |= BGMAC_BCMA_IOCTL_SW_RESET; + } + bgmac_clk_enable(bgmac, flags); + } + + if (iost & BGMAC_BCMA_IOST_ATTACHED && !bgmac->has_robosw) + bgmac_idm_write(bgmac, BCMA_IOCTL, + bgmac_idm_read(bgmac, BCMA_IOCTL) & + ~BGMAC_BCMA_IOCTL_SW_RESET); +} + /* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipreset */ static void bgmac_chip_reset(struct bgmac *bgmac) { u32 cmdcfg_sr; - u32 iost; int i; if (bgmac_clk_enabled(bgmac)) { @@ -899,20 +928,8 @@ static void bgmac_chip_reset(struct bgmac *bgmac) /* TODO: Clear software multicast filter list */ } - iost = bgmac_idm_read(bgmac, BCMA_IOST); - if (bgmac->feature_flags & BGMAC_FEAT_IOST_ATTACHED) - iost &= ~BGMAC_BCMA_IOST_ATTACHED; - - /* 3GMAC: for BCM4707 & BCM47094, only do core reset at bgmac_probe() */ - if (!(bgmac->feature_flags & BGMAC_FEAT_NO_RESET)) { - u32 flags = 0; - if (iost & BGMAC_BCMA_IOST_ATTACHED) { - flags = BGMAC_BCMA_IOCTL_SW_CLKEN; - if (!bgmac->has_robosw) - flags |= BGMAC_BCMA_IOCTL_SW_RESET; - } - bgmac_clk_enable(bgmac, flags); - } + if (!(bgmac->feature_flags & BGMAC_FEAT_IDM_MASK)) + bgmac_chip_reset_idm_config(bgmac); /* Request Misc PLL for corerev > 2 */ if (bgmac->feature_flags & BGMAC_FEAT_MISC_PLL_REQ) { @@ -970,11 +987,6 @@ static void bgmac_chip_reset(struct bgmac *bgmac) BGMAC_CHIPCTL_7_IF_TYPE_RGMII); } - if (iost & BGMAC_BCMA_IOST_ATTACHED && !bgmac->has_robosw) - bgmac_idm_write(bgmac, BCMA_IOCTL, - bgmac_idm_read(bgmac, BCMA_IOCTL) & - ~BGMAC_BCMA_IOCTL_SW_RESET); - /* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/gmac_reset * Specs don't say about using BGMAC_CMDCFG_SR, but in this routine * BGMAC_CMDCFG is read _after_ putting chip in a reset. So it has to @@ -1497,8 +1509,10 @@ int bgmac_enet_probe(struct bgmac *bgmac) bgmac_clk_enable(bgmac, 0); /* This seems to be fixing IRQ by assigning OOB #6 to the core */ - if (bgmac->feature_flags & BGMAC_FEAT_IRQ_ID_OOB_6) - bgmac_idm_write(bgmac, BCMA_OOB_SEL_OUT_A30, 0x86); + if (!(bgmac->feature_flags & BGMAC_FEAT_IDM_MASK)) { + if (bgmac->feature_flags & BGMAC_FEAT_IRQ_ID_OOB_6) + bgmac_idm_write(bgmac, BCMA_OOB_SEL_OUT_A30, 0x86); + } bgmac_chip_reset(bgmac); diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h index c1818766c501..443d57b10264 100644 --- a/drivers/net/ethernet/broadcom/bgmac.h +++ b/drivers/net/ethernet/broadcom/bgmac.h @@ -425,6 +425,7 @@ #define BGMAC_FEAT_CC4_IF_SW_TYPE BIT(17) #define BGMAC_FEAT_CC4_IF_SW_TYPE_RGMII BIT(18) #define BGMAC_FEAT_CC7_IF_TYPE_RGMII BIT(19) +#define BGMAC_FEAT_IDM_MASK BIT(20) struct bgmac_slot_info { union { -- cgit v1.2.3 From 10d79f7d3c8358f13dc23619838b0d65bcc6e2af Mon Sep 17 00:00:00 2001 From: Abhishek Shah Date: Fri, 14 Jul 2017 00:34:09 +0530 Subject: Documentation: devicetree: net: optional idm regs for bgmac Specifying IDM register space in DT is not mendatory for SoCs where firmware takes care of IDM operations. This patch updates BGMAC driver's DT binding documentation indicating the same. Signed-off-by: Abhishek Shah Reviewed-by: Ray Jui Reviewed-by: Oza Oza Reviewed-by: Scott Branden Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/brcm,amac.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/net/brcm,amac.txt b/Documentation/devicetree/bindings/net/brcm,amac.txt index 2fefa1a44afd..ad16c1f481f7 100644 --- a/Documentation/devicetree/bindings/net/brcm,amac.txt +++ b/Documentation/devicetree/bindings/net/brcm,amac.txt @@ -11,6 +11,7 @@ Required properties: - reg-names: Names of the registers. "amac_base": Address and length of the GMAC registers "idm_base": Address and length of the GMAC IDM registers + (required for NSP and Northstar2) "nicpm_base": Address and length of the NIC Port Manager registers (required for Northstar2) - interrupts: Interrupt number -- cgit v1.2.3 From dfcc16c9d5b9c9c38fe18a91da63ab5af05d96ca Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 10 Jul 2017 14:00:32 +0200 Subject: ioc3-eth: store pointer to net_device for priviate area Computing the alignment manually for going from priv to pub is probably not such a good idea, and in general the assumption that going from priv to pub is possible trivially could change, so rather than relying on that, we change things to just store a pointer to pub. This was sugested by DaveM in [1]. [1] http://www.spinics.net/lists/netdev/msg443992.html Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- drivers/net/ethernet/sgi/ioc3-eth.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c index b607936e1b3e..9c0488e0f08e 100644 --- a/drivers/net/ethernet/sgi/ioc3-eth.c +++ b/drivers/net/ethernet/sgi/ioc3-eth.c @@ -90,17 +90,13 @@ struct ioc3_private { spinlock_t ioc3_lock; struct mii_if_info mii; + struct net_device *dev; struct pci_dev *pdev; /* Members used by autonegotiation */ struct timer_list ioc3_timer; }; -static inline struct net_device *priv_netdev(struct ioc3_private *dev) -{ - return (void *)dev - ((sizeof(struct net_device) + 31) & ~31); -} - static int ioc3_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static void ioc3_set_multicast_list(struct net_device *dev); static int ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev); @@ -427,7 +423,7 @@ static void ioc3_get_eaddr_nic(struct ioc3_private *ip) nic[i] = nic_read_byte(ioc3); for (i = 2; i < 8; i++) - priv_netdev(ip)->dev_addr[i - 2] = nic[i]; + ip->dev->dev_addr[i - 2] = nic[i]; } /* @@ -439,7 +435,7 @@ static void ioc3_get_eaddr(struct ioc3_private *ip) { ioc3_get_eaddr_nic(ip); - printk("Ethernet address is %pM.\n", priv_netdev(ip)->dev_addr); + printk("Ethernet address is %pM.\n", ip->dev->dev_addr); } static void __ioc3_set_mac_address(struct net_device *dev) @@ -790,13 +786,12 @@ static void ioc3_timer(unsigned long data) */ static int ioc3_mii_init(struct ioc3_private *ip) { - struct net_device *dev = priv_netdev(ip); int i, found = 0, res = 0; int ioc3_phy_workaround = 1; u16 word; for (i = 0; i < 32; i++) { - word = ioc3_mdio_read(dev, i, MII_PHYSID1); + word = ioc3_mdio_read(ip->dev, i, MII_PHYSID1); if (word != 0xffff && word != 0x0000) { found = 1; @@ -1276,6 +1271,7 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent) SET_NETDEV_DEV(dev, &pdev->dev); ip = netdev_priv(dev); + ip->dev = dev; dev->irq = pdev->irq; -- cgit v1.2.3 From e36fef66f4bbe8a51e8f8334a058a42c0f16e373 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Thu, 13 Jul 2017 23:01:27 -0400 Subject: mlx4_en: remove unnecessary returned value check The function __mlx4_zone_remove_one_entry always returns zero. So it is not necessary to check it. Cc: Joe Jin Cc: Junxiao Bi Signed-off-by: Zhu Yanjun Reviewed-by: Yuval Shaia Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/alloc.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c index 249a4584401a..710d6c61150e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c @@ -283,7 +283,7 @@ int mlx4_zone_add_one(struct mlx4_zone_allocator *zone_alloc, } /* Should be called under a lock */ -static int __mlx4_zone_remove_one_entry(struct mlx4_zone_entry *entry) +static void __mlx4_zone_remove_one_entry(struct mlx4_zone_entry *entry) { struct mlx4_zone_allocator *zone_alloc = entry->allocator; @@ -315,8 +315,6 @@ static int __mlx4_zone_remove_one_entry(struct mlx4_zone_entry *entry) } zone_alloc->mask = mask; } - - return 0; } void mlx4_zone_allocator_destroy(struct mlx4_zone_allocator *zone_alloc) @@ -457,7 +455,7 @@ struct mlx4_bitmap *mlx4_zone_get_bitmap(struct mlx4_zone_allocator *zones, u32 int mlx4_zone_remove_one(struct mlx4_zone_allocator *zones, u32 uid) { struct mlx4_zone_entry *zone; - int res; + int res = 0; spin_lock(&zones->lock); @@ -468,7 +466,7 @@ int mlx4_zone_remove_one(struct mlx4_zone_allocator *zones, u32 uid) goto out; } - res = __mlx4_zone_remove_one_entry(zone); + __mlx4_zone_remove_one_entry(zone); out: spin_unlock(&zones->lock); -- cgit v1.2.3 From 254d900b801fc04aa524ff7bafe28fdd1dbf0ed6 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 14 Jul 2017 12:04:16 +0300 Subject: ipv4: ip_do_fragment: fix headroom tests Some time ago David Woodhouse reported skb_under_panic when we try to push ethernet header to fragmented ipv6 skbs. It was fixed for ipv6 by Florian Westphal in commit 1d325d217c7f ("ipv6: ip6_fragment: fix headroom tests and skb leak") However similar problem still exist in ipv4. It does not trigger skb_under_panic due paranoid check in ip_finish_output2, however according to Alexey Kuznetsov current state is abnormal and ip_fragment should be fixed too. Signed-off-by: Vasily Averin Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 7eb252dcecee..50c74cd890bc 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -599,6 +599,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, hlen = iph->ihl * 4; mtu = mtu - hlen; /* Size of data space */ IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE; + ll_rs = LL_RESERVED_SPACE(rt->dst.dev); /* When frag_list is given, use it. First, check its validity: * some transformers could create wrong frag_list or break existing @@ -614,14 +615,15 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, if (first_len - hlen > mtu || ((first_len - hlen) & 7) || ip_is_fragment(iph) || - skb_cloned(skb)) + skb_cloned(skb) || + skb_headroom(skb) < ll_rs) goto slow_path; skb_walk_frags(skb, frag) { /* Correct geometry. */ if (frag->len > mtu || ((frag->len & 7) && frag->next) || - skb_headroom(frag) < hlen) + skb_headroom(frag) < hlen + ll_rs) goto slow_path_clean; /* Partially cloned skb? */ @@ -711,8 +713,6 @@ slow_path: left = skb->len - hlen; /* Space per frame */ ptr = hlen; /* Where to start from */ - ll_rs = LL_RESERVED_SPACE(rt->dst.dev); - /* * Fragment the datagram. */ -- cgit v1.2.3 From b1f5bfc27a19f214006b9b4db7b9126df2dfdf5a Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 14 Jul 2017 18:32:45 +0200 Subject: sctp: don't dereference ptr before leaving _sctp_walk_{params, errors}() If the length field of the iterator (|pos.p| or |err|) is past the end of the chunk, we shouldn't access it. This bug has been detected by KMSAN. For the following pair of system calls: socket(PF_INET6, SOCK_STREAM, 0x84 /* IPPROTO_??? */) = 3 sendto(3, "A", 1, MSG_OOB, {sa_family=AF_INET6, sin6_port=htons(0), inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=0}, 28) = 1 the tool has reported a use of uninitialized memory: ================================================================== BUG: KMSAN: use of uninitialized memory in sctp_rcv+0x17b8/0x43b0 CPU: 1 PID: 2940 Comm: probe Not tainted 4.11.0-rc5+ #2926 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 dump_stack+0x172/0x1c0 lib/dump_stack.c:52 kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:927 __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:469 __sctp_rcv_init_lookup net/sctp/input.c:1074 __sctp_rcv_lookup_harder net/sctp/input.c:1233 __sctp_rcv_lookup net/sctp/input.c:1255 sctp_rcv+0x17b8/0x43b0 net/sctp/input.c:170 sctp6_rcv+0x32/0x70 net/sctp/ipv6.c:984 ip6_input_finish+0x82f/0x1ee0 net/ipv6/ip6_input.c:279 NF_HOOK ./include/linux/netfilter.h:257 ip6_input+0x239/0x290 net/ipv6/ip6_input.c:322 dst_input ./include/net/dst.h:492 ip6_rcv_finish net/ipv6/ip6_input.c:69 NF_HOOK ./include/linux/netfilter.h:257 ipv6_rcv+0x1dbd/0x22e0 net/ipv6/ip6_input.c:203 __netif_receive_skb_core+0x2f6f/0x3a20 net/core/dev.c:4208 __netif_receive_skb net/core/dev.c:4246 process_backlog+0x667/0xba0 net/core/dev.c:4866 napi_poll net/core/dev.c:5268 net_rx_action+0xc95/0x1590 net/core/dev.c:5333 __do_softirq+0x485/0x942 kernel/softirq.c:284 do_softirq_own_stack+0x1c/0x30 arch/x86/entry/entry_64.S:902 do_softirq kernel/softirq.c:328 __local_bh_enable_ip+0x25b/0x290 kernel/softirq.c:181 local_bh_enable+0x37/0x40 ./include/linux/bottom_half.h:31 rcu_read_unlock_bh ./include/linux/rcupdate.h:931 ip6_finish_output2+0x19b2/0x1cf0 net/ipv6/ip6_output.c:124 ip6_finish_output+0x764/0x970 net/ipv6/ip6_output.c:149 NF_HOOK_COND ./include/linux/netfilter.h:246 ip6_output+0x456/0x520 net/ipv6/ip6_output.c:163 dst_output ./include/net/dst.h:486 NF_HOOK ./include/linux/netfilter.h:257 ip6_xmit+0x1841/0x1c00 net/ipv6/ip6_output.c:261 sctp_v6_xmit+0x3b7/0x470 net/sctp/ipv6.c:225 sctp_packet_transmit+0x38cb/0x3a20 net/sctp/output.c:632 sctp_outq_flush+0xeb3/0x46e0 net/sctp/outqueue.c:885 sctp_outq_uncork+0xb2/0xd0 net/sctp/outqueue.c:750 sctp_side_effects net/sctp/sm_sideeffect.c:1773 sctp_do_sm+0x6962/0x6ec0 net/sctp/sm_sideeffect.c:1147 sctp_primitive_ASSOCIATE+0x12c/0x160 net/sctp/primitive.c:88 sctp_sendmsg+0x43e5/0x4f90 net/sctp/socket.c:1954 inet_sendmsg+0x498/0x670 net/ipv4/af_inet.c:762 sock_sendmsg_nosec net/socket.c:633 sock_sendmsg net/socket.c:643 SYSC_sendto+0x608/0x710 net/socket.c:1696 SyS_sendto+0x8a/0xb0 net/socket.c:1664 do_syscall_64+0xe6/0x130 arch/x86/entry/common.c:285 entry_SYSCALL64_slow_path+0x25/0x25 arch/x86/entry/entry_64.S:246 RIP: 0033:0x401133 RSP: 002b:00007fff6d99cd38 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00000000004002b0 RCX: 0000000000401133 RDX: 0000000000000001 RSI: 0000000000494088 RDI: 0000000000000003 RBP: 00007fff6d99cd90 R08: 00007fff6d99cd50 R09: 000000000000001c R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000000 R13: 00000000004063d0 R14: 0000000000406460 R15: 0000000000000000 origin: save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302 kmsan_internal_poison_shadow+0xb1/0x1a0 mm/kmsan/kmsan.c:198 kmsan_poison_shadow+0x6d/0xc0 mm/kmsan/kmsan.c:211 slab_alloc_node mm/slub.c:2743 __kmalloc_node_track_caller+0x200/0x360 mm/slub.c:4351 __kmalloc_reserve net/core/skbuff.c:138 __alloc_skb+0x26b/0x840 net/core/skbuff.c:231 alloc_skb ./include/linux/skbuff.h:933 sctp_packet_transmit+0x31e/0x3a20 net/sctp/output.c:570 sctp_outq_flush+0xeb3/0x46e0 net/sctp/outqueue.c:885 sctp_outq_uncork+0xb2/0xd0 net/sctp/outqueue.c:750 sctp_side_effects net/sctp/sm_sideeffect.c:1773 sctp_do_sm+0x6962/0x6ec0 net/sctp/sm_sideeffect.c:1147 sctp_primitive_ASSOCIATE+0x12c/0x160 net/sctp/primitive.c:88 sctp_sendmsg+0x43e5/0x4f90 net/sctp/socket.c:1954 inet_sendmsg+0x498/0x670 net/ipv4/af_inet.c:762 sock_sendmsg_nosec net/socket.c:633 sock_sendmsg net/socket.c:643 SYSC_sendto+0x608/0x710 net/socket.c:1696 SyS_sendto+0x8a/0xb0 net/socket.c:1664 do_syscall_64+0xe6/0x130 arch/x86/entry/common.c:285 return_from_SYSCALL_64+0x0/0x6a arch/x86/entry/entry_64.S:246 ================================================================== Signed-off-by: Alexander Potapenko Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index a9519a06a23b..980807d7506f 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -469,6 +469,8 @@ _sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member) #define _sctp_walk_params(pos, chunk, end, member)\ for (pos.v = chunk->member;\ + (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <\ + (void *)chunk + end) &&\ pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\ ntohs(pos.p->length) >= sizeof(struct sctp_paramhdr);\ pos.v += SCTP_PAD4(ntohs(pos.p->length))) @@ -479,6 +481,8 @@ _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length)) #define _sctp_walk_errors(err, chunk_hdr, end)\ for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \ sizeof(struct sctp_chunkhdr));\ + ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <\ + (void *)chunk_hdr + end) &&\ (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\ ntohs(err->length) >= sizeof(sctp_errhdr_t); \ err = (sctp_errhdr_t *)((void *)err + SCTP_PAD4(ntohs(err->length)))) -- cgit v1.2.3 From 8b97ac5bda17cfaa257bcab6180af0f43a2e87e0 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Fri, 14 Jul 2017 12:42:49 -0700 Subject: openvswitch: Fix for force/commit action failures When there is an established connection in direction A->B, it is possible to receive a packet on port B which then executes ct(commit,force) without first performing ct() - ie, a lookup. In this case, we would expect that this packet can delete the existing entry so that we can commit a connection with direction B->A. However, currently we only perform a check in skb_nfct_cached() for whether OVS_CS_F_TRACKED is set and OVS_CS_F_INVALID is not set, ie that a lookup previously occurred. In the above scenario, a lookup has not occurred but we should still be able to statelessly look up the existing entry and potentially delete the entry if it is in the opposite direction. This patch extends the check to also hint that if the action has the force flag set, then we will lookup the existing entry so that the force check at the end of skb_nfct_cached has the ability to delete the connection. Fixes: dd41d330b03 ("openvswitch: Add force commit.") CC: Pravin Shelar CC: dev@openvswitch.org Signed-off-by: Joe Stringer Signed-off-by: Greg Rose Signed-off-by: David S. Miller --- net/openvswitch/conntrack.c | 51 ++++++++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 08679ebb3068..e3c4c6c3fef7 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -629,6 +629,34 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, return ct; } +static +struct nf_conn *ovs_ct_executed(struct net *net, + const struct sw_flow_key *key, + const struct ovs_conntrack_info *info, + struct sk_buff *skb, + bool *ct_executed) +{ + struct nf_conn *ct = NULL; + + /* If no ct, check if we have evidence that an existing conntrack entry + * might be found for this skb. This happens when we lose a skb->_nfct + * due to an upcall, or if the direction is being forced. If the + * connection was not confirmed, it is not cached and needs to be run + * through conntrack again. + */ + *ct_executed = (key->ct_state & OVS_CS_F_TRACKED) && + !(key->ct_state & OVS_CS_F_INVALID) && + (key->ct_zone == info->zone.id); + + if (*ct_executed || (!key->ct_state && info->force)) { + ct = ovs_ct_find_existing(net, &info->zone, info->family, skb, + !!(key->ct_state & + OVS_CS_F_NAT_MASK)); + } + + return ct; +} + /* Determine whether skb->_nfct is equal to the result of conntrack lookup. */ static bool skb_nfct_cached(struct net *net, const struct sw_flow_key *key, @@ -637,24 +665,17 @@ static bool skb_nfct_cached(struct net *net, { enum ip_conntrack_info ctinfo; struct nf_conn *ct; + bool ct_executed = true; ct = nf_ct_get(skb, &ctinfo); - /* If no ct, check if we have evidence that an existing conntrack entry - * might be found for this skb. This happens when we lose a skb->_nfct - * due to an upcall. If the connection was not confirmed, it is not - * cached and needs to be run through conntrack again. - */ - if (!ct && key->ct_state & OVS_CS_F_TRACKED && - !(key->ct_state & OVS_CS_F_INVALID) && - key->ct_zone == info->zone.id) { - ct = ovs_ct_find_existing(net, &info->zone, info->family, skb, - !!(key->ct_state - & OVS_CS_F_NAT_MASK)); - if (ct) - nf_ct_get(skb, &ctinfo); - } if (!ct) + ct = ovs_ct_executed(net, key, info, skb, &ct_executed); + + if (ct) + nf_ct_get(skb, &ctinfo); + else return false; + if (!net_eq(net, read_pnet(&ct->ct_net))) return false; if (!nf_ct_zone_equal_any(info->ct, nf_ct_zone(ct))) @@ -679,7 +700,7 @@ static bool skb_nfct_cached(struct net *net, return false; } - return true; + return ct_executed; } #ifdef CONFIG_NF_NAT_NEEDED -- cgit v1.2.3 From 4aea287e90dd61a48268ff2994b56f9799441b62 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Fri, 14 Jul 2017 17:49:21 -0400 Subject: tcp_bbr: cut pacing rate only if filled pipe In bbr_set_pacing_rate(), which decides whether to cut the pacing rate, there was some code that considered exiting STARTUP to be equivalent to the notion of filling the pipe (i.e., bbr_full_bw_reached()). Specifically, as the code was structured, exiting STARTUP and going into PROBE_RTT could cause us to cut the pacing rate down to something silly and low, based on whatever bandwidth samples we've had so far, when it's possible that all of them have been small app-limited bandwidth samples that are not representative of the bandwidth available in the path. (The code was correct at the time it was written, but the state machine changed without this spot being adjusted correspondingly.) Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_bbr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index dbcc9352a48f..743e97511dc8 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -220,12 +220,11 @@ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain) */ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) { - struct bbr *bbr = inet_csk_ca(sk); u64 rate = bw; rate = bbr_rate_bytes_per_sec(sk, rate, gain); rate = min_t(u64, rate, sk->sk_max_pacing_rate); - if (bbr->mode != BBR_STARTUP || rate > sk->sk_pacing_rate) + if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate) sk->sk_pacing_rate = rate; } -- cgit v1.2.3 From f19fd62dafaf1ed6cf615dba655b82fa9df59074 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Fri, 14 Jul 2017 17:49:22 -0400 Subject: tcp_bbr: introduce bbr_bw_to_pacing_rate() helper Introduce a helper to convert a BBR bandwidth and gain factor to a pacing rate in bytes per second. This is a pure refactor, but is needed for two following fixes. Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_bbr.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 743e97511dc8..29e23b851b97 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -211,6 +211,16 @@ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain) return rate >> BW_SCALE; } +/* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */ +static u32 bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain) +{ + u64 rate = bw; + + rate = bbr_rate_bytes_per_sec(sk, rate, gain); + rate = min_t(u64, rate, sk->sk_max_pacing_rate); + return rate; +} + /* Pace using current bw estimate and a gain factor. In order to help drive the * network toward lower queues while maintaining high utilization and low * latency, the average pacing rate aims to be slightly (~1%) lower than the @@ -220,10 +230,8 @@ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain) */ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) { - u64 rate = bw; + u32 rate = bbr_bw_to_pacing_rate(sk, bw, gain); - rate = bbr_rate_bytes_per_sec(sk, rate, gain); - rate = min_t(u64, rate, sk->sk_max_pacing_rate); if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate) sk->sk_pacing_rate = rate; } -- cgit v1.2.3 From 79135b89b8af304456bd67916b80116ddf03d7b6 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Fri, 14 Jul 2017 17:49:23 -0400 Subject: tcp_bbr: introduce bbr_init_pacing_rate_from_rtt() helper Introduce a helper to initialize the BBR pacing rate unconditionally, based on the current cwnd and RTT estimate. This is a pure refactor, but is needed for two following fixes. Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_bbr.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 29e23b851b97..3276140c2506 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -221,6 +221,23 @@ static u32 bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain) return rate; } +/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */ +static void bbr_init_pacing_rate_from_rtt(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + u64 bw; + u32 rtt_us; + + if (tp->srtt_us) { /* any RTT sample yet? */ + rtt_us = max(tp->srtt_us >> 3, 1U); + } else { /* no RTT sample yet */ + rtt_us = USEC_PER_MSEC; /* use nominal default RTT */ + } + bw = (u64)tp->snd_cwnd * BW_UNIT; + do_div(bw, rtt_us); + sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain); +} + /* Pace using current bw estimate and a gain factor. In order to help drive the * network toward lower queues while maintaining high utilization and low * latency, the average pacing rate aims to be slightly (~1%) lower than the @@ -805,7 +822,6 @@ static void bbr_init(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); - u64 bw; bbr->prior_cwnd = 0; bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */ @@ -821,11 +837,8 @@ static void bbr_init(struct sock *sk) minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */ - /* Initialize pacing rate to: high_gain * init_cwnd / RTT. */ - bw = (u64)tp->snd_cwnd * BW_UNIT; - do_div(bw, (tp->srtt_us >> 3) ? : USEC_PER_MSEC); sk->sk_pacing_rate = 0; /* force an update of sk_pacing_rate */ - bbr_set_pacing_rate(sk, bw, bbr_high_gain); + bbr_init_pacing_rate_from_rtt(sk); bbr->restore_cwnd = 0; bbr->round_start = 0; -- cgit v1.2.3 From 1d3648eb5d1fe9ed3d095ed8fa19ad11ca4c8bc0 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Fri, 14 Jul 2017 17:49:24 -0400 Subject: tcp_bbr: remove sk_pacing_rate=0 transient during init Fix a corner case noticed by Eric Dumazet, where BBR's setting sk->sk_pacing_rate to 0 during initialization could theoretically cause packets in the sending host to hang if there were packets "in flight" in the pacing infrastructure at the time the BBR congestion control state is initialized. This could occur if the pacing infrastructure happened to race with bbr_init() in a way such that the pacer read the 0 rather than the immediately following non-zero pacing rate. Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") Reported-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_bbr.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 3276140c2506..42e0017f2ebc 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -837,7 +837,6 @@ static void bbr_init(struct sock *sk) minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */ - sk->sk_pacing_rate = 0; /* force an update of sk_pacing_rate */ bbr_init_pacing_rate_from_rtt(sk); bbr->restore_cwnd = 0; -- cgit v1.2.3 From 32984565574da7ed3afa10647bb4020d7a9e6c93 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Fri, 14 Jul 2017 17:49:25 -0400 Subject: tcp_bbr: init pacing rate on first RTT sample Fixes the following behavior: for connections that had no RTT sample at the time of initializing congestion control, BBR was initializing the pacing rate to a high nominal rate (based an a guess of RTT=1ms, in case this is LAN traffic). Then BBR never adjusted the pacing rate downward upon obtaining an actual RTT sample, if the connection never filled the pipe (e.g. all sends were small app-limited writes()). This fix adjusts the pacing rate upon obtaining the first RTT sample. Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_bbr.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 42e0017f2ebc..69ee877574d0 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -112,7 +112,8 @@ struct bbr { cwnd_gain:10, /* current gain for setting cwnd */ full_bw_cnt:3, /* number of rounds without large bw gains */ cycle_idx:3, /* current index in pacing_gain cycle array */ - unused_b:6; + has_seen_rtt:1, /* have we seen an RTT sample yet? */ + unused_b:5; u32 prior_cwnd; /* prior cwnd upon entering loss recovery */ u32 full_bw; /* recent bw, to estimate if pipe is full */ }; @@ -225,11 +226,13 @@ static u32 bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain) static void bbr_init_pacing_rate_from_rtt(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); u64 bw; u32 rtt_us; if (tp->srtt_us) { /* any RTT sample yet? */ rtt_us = max(tp->srtt_us >> 3, 1U); + bbr->has_seen_rtt = 1; } else { /* no RTT sample yet */ rtt_us = USEC_PER_MSEC; /* use nominal default RTT */ } @@ -247,8 +250,12 @@ static void bbr_init_pacing_rate_from_rtt(struct sock *sk) */ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) { + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); u32 rate = bbr_bw_to_pacing_rate(sk, bw, gain); + if (unlikely(!bbr->has_seen_rtt && tp->srtt_us)) + bbr_init_pacing_rate_from_rtt(sk); if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate) sk->sk_pacing_rate = rate; } @@ -837,6 +844,7 @@ static void bbr_init(struct sock *sk) minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */ + bbr->has_seen_rtt = 0; bbr_init_pacing_rate_from_rtt(sk); bbr->restore_cwnd = 0; -- cgit v1.2.3 From cd7b03e9cc94f249ae3b54cf5a41d4b9fb297e0b Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Sat, 15 Jul 2017 09:55:42 +0530 Subject: isdn: hisax: constify pci_device_id. pci_device_id are not supposed to change at runtime. All functions working with pci_device_id provided by work with const pci_device_id. So mark the non-const structs as const. File size before: text data bss dec hex filename 13686 2064 4416 20166 4ec6 drivers/isdn/hisax/config.o File size After adding 'const': text data bss dec hex filename 15030 720 4416 20166 4ec6 drivers/isdn/hisax/config.o Signed-off-by: Arvind Yadav Signed-off-by: David S. Miller --- drivers/isdn/hisax/config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/isdn/hisax/config.c b/drivers/isdn/hisax/config.c index c7d68675b028..7108bdb8742e 100644 --- a/drivers/isdn/hisax/config.c +++ b/drivers/isdn/hisax/config.c @@ -1909,7 +1909,7 @@ static void EChannel_proc_rcv(struct hisax_d_if *d_if) #ifdef CONFIG_PCI #include -static struct pci_device_id hisax_pci_tbl[] __used = { +static const struct pci_device_id hisax_pci_tbl[] __used = { #ifdef CONFIG_HISAX_FRITZPCI {PCI_VDEVICE(AVM, PCI_DEVICE_ID_AVM_A1) }, #endif -- cgit v1.2.3 From 3651003d4fd805c3a7761d1db3a7491d5547afb3 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Sat, 15 Jul 2017 09:55:43 +0530 Subject: isdn: hisax: hfc4s8s_l1: constify pci_device_id. pci_device_id are not supposed to change at runtime. All functions working with pci_device_id provided by work with const pci_device_id. So mark the non-const structs as const. File size before: text data bss dec hex filename 10512 536 4 11052 2b2c drivers/isdn/hisax/hfc4s8s_l1.o File size After adding 'const': text data bss dec hex filename 10672 376 4 11052 2b2c drivers/isdn/hisax/hfc4s8s_l1.o Signed-off-by: Arvind Yadav Signed-off-by: David S. Miller --- drivers/isdn/hisax/hfc4s8s_l1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/isdn/hisax/hfc4s8s_l1.c b/drivers/isdn/hisax/hfc4s8s_l1.c index 90f051ce0259..9090cc1e1f29 100644 --- a/drivers/isdn/hisax/hfc4s8s_l1.c +++ b/drivers/isdn/hisax/hfc4s8s_l1.c @@ -86,7 +86,7 @@ typedef struct { char *device_name; } hfc4s8s_param; -static struct pci_device_id hfc4s8s_ids[] = { +static const struct pci_device_id hfc4s8s_ids[] = { {.vendor = PCI_VENDOR_ID_CCD, .device = PCI_DEVICE_ID_4S, .subvendor = 0x1397, -- cgit v1.2.3 From 6cfc3d86be2647686c8eebe41df69e5cd37dd8e6 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Sat, 15 Jul 2017 09:55:44 +0530 Subject: isdn: hisax: hisax_fcpcipnp: constify pci_device_id. pci_device_id are not supposed to change at runtime. All functions working with pci_device_id provided by work with const pci_device_id. So mark the non-const structs as const. File size before: text data bss dec hex filename 5989 576 0 6565 19a5 isdn/hisax/hisax_fcpcipnp.o File size After adding 'const': text data bss dec hex filename 6085 480 0 6565 19a5 isdn/hisax/hisax_fcpcipnp.o Signed-off-by: Arvind Yadav Signed-off-by: David S. Miller --- drivers/isdn/hisax/hisax_fcpcipnp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/isdn/hisax/hisax_fcpcipnp.c b/drivers/isdn/hisax/hisax_fcpcipnp.c index 5a9f39ed1d5d..e4f7573ba9bf 100644 --- a/drivers/isdn/hisax/hisax_fcpcipnp.c +++ b/drivers/isdn/hisax/hisax_fcpcipnp.c @@ -52,7 +52,7 @@ module_param(debug, int, 0); MODULE_AUTHOR("Kai Germaschewski /Karsten Keil "); MODULE_DESCRIPTION("AVM Fritz!PCI/PnP ISDN driver"); -static struct pci_device_id fcpci_ids[] = { +static const struct pci_device_id fcpci_ids[] = { { .vendor = PCI_VENDOR_ID_AVM, .device = PCI_DEVICE_ID_AVM_A1, .subvendor = PCI_ANY_ID, -- cgit v1.2.3 From cf46d3518934bc61d9a01eb25aaaa364f325f876 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Sat, 15 Jul 2017 09:55:45 +0530 Subject: isdn: eicon: constify pci_device_id. pci_device_id are not supposed to change at runtime. All functions working with pci_device_id provided by work with const pci_device_id. So mark the non-const structs as const. File size before: text data bss dec hex filename 6224 655 8 6887 1ae7 isdn/hardware/eicon/divasmain.o File size After adding 'const': text data bss dec hex filename 6608 271 8 6887 1ae7 isdn/hardware/eicon/divasmain.o Signed-off-by: Arvind Yadav Signed-off-by: David S. Miller --- drivers/isdn/hardware/eicon/divasmain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/isdn/hardware/eicon/divasmain.c b/drivers/isdn/hardware/eicon/divasmain.c index 8b7ad4f1ab01..b2023e08dcd2 100644 --- a/drivers/isdn/hardware/eicon/divasmain.c +++ b/drivers/isdn/hardware/eicon/divasmain.c @@ -110,7 +110,7 @@ typedef struct _diva_os_thread_dpc { /* This table should be sorted by PCI device ID */ -static struct pci_device_id divas_pci_tbl[] = { +static const struct pci_device_id divas_pci_tbl[] = { /* Diva Server BRI-2M PCI 0xE010 */ { PCI_VDEVICE(EICON, PCI_DEVICE_ID_EICON_MAESTRA), CARDTYPE_MAESTRA_PCI }, -- cgit v1.2.3 From 0d41668987f5c0df5ad5b5e1e3bf69aaf4b36c52 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Sat, 15 Jul 2017 09:55:46 +0530 Subject: isdn: mISDN: netjet: constify pci_device_id. pci_device_id are not supposed to change at runtime. All functions working with pci_device_id provided by work with const pci_device_id. So mark the non-const structs as const. File size before: text data bss dec hex filename 10941 1776 16 12733 31bd isdn/hardware/mISDN/netjet.o File size After adding 'const': text data bss dec hex filename 11005 1712 16 12733 31bd isdn/hardware/mISDN/netjet.o Signed-off-by: Arvind Yadav Signed-off-by: David S. Miller --- drivers/isdn/hardware/mISDN/netjet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/isdn/hardware/mISDN/netjet.c b/drivers/isdn/hardware/mISDN/netjet.c index afde4edef9ae..6a6d848bd18e 100644 --- a/drivers/isdn/hardware/mISDN/netjet.c +++ b/drivers/isdn/hardware/mISDN/netjet.c @@ -1137,7 +1137,7 @@ static void nj_remove(struct pci_dev *pdev) /* We cannot select cards with PCI_SUB... IDs, since here are cards with * SUB IDs set to PCI_ANY_ID, so we need to match all and reject * known other cards which not work with this driver - see probe function */ -static struct pci_device_id nj_pci_ids[] = { +static const struct pci_device_id nj_pci_ids[] = { { PCI_VENDOR_ID_TIGERJET, PCI_DEVICE_ID_TIGERJET_300, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, { } -- cgit v1.2.3 From e3b79fcff622fb5537d40db30fc9a2632d6a8456 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Sat, 15 Jul 2017 09:55:47 +0530 Subject: isdn: mISDN: hfcmulti: constify pci_device_id. pci_device_id are not supposed to change at runtime. All functions working with pci_device_id provided by work with const pci_device_id. So mark the non-const structs as const. File size before: text data bss dec hex filename 63450 1536 1492 66478 103ae isdn/hardware/mISDN/hfcmulti.o File size After adding 'const': text data bss dec hex filename 64698 288 1492 66478 103ae isdn/hardware/mISDN/hfcmulti.o Signed-off-by: Arvind Yadav Signed-off-by: David S. Miller --- drivers/isdn/hardware/mISDN/hfcmulti.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c index aea0c9616ea5..3cf07b8ced1c 100644 --- a/drivers/isdn/hardware/mISDN/hfcmulti.c +++ b/drivers/isdn/hardware/mISDN/hfcmulti.c @@ -5348,7 +5348,7 @@ static const struct hm_map hfcm_map[] = { #undef H #define H(x) ((unsigned long)&hfcm_map[x]) -static struct pci_device_id hfmultipci_ids[] = { +static const struct pci_device_id hfmultipci_ids[] = { /* Cards with HFC-4S Chip */ { PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_VENDOR_ID_CCD, -- cgit v1.2.3 From e8336ed0b35261603871a6c5e829f69051530505 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Sat, 15 Jul 2017 09:55:48 +0530 Subject: isdn: mISDN: w6692: constify pci_device_id. pci_device_id are not supposed to change at runtime. All functions working with pci_device_id provided by work with const pci_device_id. So mark the non-const structs as const. File size before: text data bss dec hex filename 13959 4080 24 18063 468f isdn/hardware/mISDN/w6692.o File size After adding 'const': text data bss dec hex filename 14087 3952 24 18063 468f isdn/hardware/mISDN/w6692.o Signed-off-by: Arvind Yadav Signed-off-by: David S. Miller --- drivers/isdn/hardware/mISDN/w6692.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/isdn/hardware/mISDN/w6692.c b/drivers/isdn/hardware/mISDN/w6692.c index 3052c836b89f..d80072fef434 100644 --- a/drivers/isdn/hardware/mISDN/w6692.c +++ b/drivers/isdn/hardware/mISDN/w6692.c @@ -1398,7 +1398,7 @@ w6692_remove_pci(struct pci_dev *pdev) pr_notice("%s: drvdata already removed\n", __func__); } -static struct pci_device_id w6692_ids[] = { +static const struct pci_device_id w6692_ids[] = { { PCI_VENDOR_ID_DYNALINK, PCI_DEVICE_ID_DYNALINK_IS64PH, PCI_ANY_ID, PCI_ANY_ID, 0, 0, (ulong)&w6692_map[0]}, { PCI_VENDOR_ID_WINBOND2, PCI_DEVICE_ID_WINBOND2_6692, -- cgit v1.2.3 From 1d9c8fa013cdea5d864f5332d0e203312de3a93d Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Sat, 15 Jul 2017 09:55:49 +0530 Subject: isdn: mISDN: avmfritz: constify pci_device_id. pci_device_id are not supposed to change at runtime. All functions working with pci_device_id provided by work with const pci_device_id. So mark the non-const structs as const. File size before: text data bss dec hex filename 9963 1936 16 11915 2e8b isdn/hardware/mISDN/avmfritz.o File size After adding 'const': text data bss dec hex filename 10091 1808 16 11915 2e8b isdn/hardware/mISDN/avmfritz.o Signed-off-by: Arvind Yadav Signed-off-by: David S. Miller --- drivers/isdn/hardware/mISDN/avmfritz.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/isdn/hardware/mISDN/avmfritz.c b/drivers/isdn/hardware/mISDN/avmfritz.c index e3fa1cd64470..dce6632daae1 100644 --- a/drivers/isdn/hardware/mISDN/avmfritz.c +++ b/drivers/isdn/hardware/mISDN/avmfritz.c @@ -1142,7 +1142,7 @@ fritz_remove_pci(struct pci_dev *pdev) pr_info("%s: drvdata already removed\n", __func__); } -static struct pci_device_id fcpci_ids[] = { +static const struct pci_device_id fcpci_ids[] = { { PCI_VENDOR_ID_AVM, PCI_DEVICE_ID_AVM_A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, (unsigned long) "Fritz!Card PCI"}, { PCI_VENDOR_ID_AVM, PCI_DEVICE_ID_AVM_A1_V2, PCI_ANY_ID, PCI_ANY_ID, -- cgit v1.2.3 From ed038e7e68ca8fc92b5cb877cd44d930ef98c52a Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Sat, 15 Jul 2017 09:55:50 +0530 Subject: isdn: mISDN: hfcpci: constify pci_device_id. pci_device_id are not supposed to change at runtime. All functions working with pci_device_id provided by work with const pci_device_id. So mark the non-const structs as const. File size before: text data bss dec hex filename 21656 1024 96 22776 58f8 isdn/hardware/mISDN/hfcpci.o File size After adding 'const': text data bss dec hex filename 22424 256 96 22776 58f8 isdn/hardware/mISDN/hfcpci.o Signed-off-by: Arvind Yadav Signed-off-by: David S. Miller --- drivers/isdn/hardware/mISDN/hfcpci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c index 5dc246d71c16..d2e401a8090e 100644 --- a/drivers/isdn/hardware/mISDN/hfcpci.c +++ b/drivers/isdn/hardware/mISDN/hfcpci.c @@ -2161,7 +2161,7 @@ static const struct _hfc_map hfc_map[] = {}, }; -static struct pci_device_id hfc_ids[] = +static const struct pci_device_id hfc_ids[] = { { PCI_VDEVICE(CCD, PCI_DEVICE_ID_CCD_2BD0), (unsigned long) &hfc_map[0] }, -- cgit v1.2.3 From 65f96417363148989bc5b7735b2fc4e824c9d2b9 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Sat, 15 Jul 2017 09:55:51 +0530 Subject: isdn: avm: c4: constify pci_device_id. pci_device_id are not supposed to change at runtime. All functions working with pci_device_id provided by work with const pci_device_id. So mark the non-const structs as const. File size before: text data bss dec hex filename 11803 544 1 12348 303c isdn/hardware/avm/c4.o File size After adding 'const': text data bss dec hex filename 11931 416 1 12348 303c isdn/hardware/avm/c4.o Signed-off-by: Arvind Yadav Signed-off-by: David S. Miller --- drivers/isdn/hardware/avm/c4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/isdn/hardware/avm/c4.c b/drivers/isdn/hardware/avm/c4.c index 40c7e2cf423b..034cabac699d 100644 --- a/drivers/isdn/hardware/avm/c4.c +++ b/drivers/isdn/hardware/avm/c4.c @@ -42,7 +42,7 @@ static char *revision = "$Revision: 1.1.2.2 $"; static bool suppress_pollack; -static struct pci_device_id c4_pci_tbl[] = { +static const struct pci_device_id c4_pci_tbl[] = { { PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_DEC_21285, PCI_VENDOR_ID_AVM, PCI_DEVICE_ID_AVM_C4, 0, 0, (unsigned long)4 }, { PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_DEC_21285, PCI_VENDOR_ID_AVM, PCI_DEVICE_ID_AVM_C2, 0, 0, (unsigned long)2 }, { } /* Terminating entry */ -- cgit v1.2.3 From ea6c3077678f969577e0f75aecda9478c3840912 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 15 Jul 2017 10:24:47 -0700 Subject: dt-bindings: net: Remove duplicate NSP Ethernet MAC binding document Commit 07d4510f5251 ("dt-bindings: net: bgmac: add bindings documentation for bgmac") added both brcm,amac-nsp.txt and brcm,bgmac-nsp.txt. The former is actually the one that got updated and is in use by the bgmac driver while the latter is duplicating the former and is not used nor updated. Fixes: 07d4510f5251 ("dt-bindings: net: bgmac: add bindings documentation for bgmac") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- .../devicetree/bindings/net/brcm,bgmac-nsp.txt | 24 ---------------------- 1 file changed, 24 deletions(-) delete mode 100644 Documentation/devicetree/bindings/net/brcm,bgmac-nsp.txt diff --git a/Documentation/devicetree/bindings/net/brcm,bgmac-nsp.txt b/Documentation/devicetree/bindings/net/brcm,bgmac-nsp.txt deleted file mode 100644 index 022946caa7e2..000000000000 --- a/Documentation/devicetree/bindings/net/brcm,bgmac-nsp.txt +++ /dev/null @@ -1,24 +0,0 @@ -Broadcom GMAC Ethernet Controller Device Tree Bindings -------------------------------------------------------------- - -Required properties: - - compatible: "brcm,bgmac-nsp" - - reg: Address and length of the GMAC registers, - Address and length of the GMAC IDM registers - - reg-names: Names of the registers. Must have both "gmac_base" and - "idm_base" - - interrupts: Interrupt number - -Optional properties: -- mac-address: See ethernet.txt file in the same directory - -Examples: - -gmac0: ethernet@18022000 { - compatible = "brcm,bgmac-nsp"; - reg = <0x18022000 0x1000>, - <0x18110000 0x1000>; - reg-names = "gmac_base", "idm_base"; - interrupts = ; - status = "disabled"; -}; -- cgit v1.2.3 From 876dbadd53a7102e2a84afc84ea2bd3ee6dc5636 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Fri, 14 Jul 2017 16:12:09 -0700 Subject: net: bcmgenet: Fix unmapping of fragments in bcmgenet_xmit() In case we fail to map a single fragment, we would be leaving the transmit ring populated with stale entries. This commit introduces the helper function bcmgenet_put_txcb() which takes care of rewinding the per-ring write pointer back to where we left. It also consolidates the functionality of bcmgenet_xmit_single() and bcmgenet_xmit_frag() into the bcmgenet_xmit() function to make the unmapping of control blocks cleaner. Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Suggested-by: Florian Fainelli Signed-off-by: Doug Berger Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 191 +++++++++++-------------- 1 file changed, 85 insertions(+), 106 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index daca1c9d254b..20021525f795 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1202,6 +1202,23 @@ static struct enet_cb *bcmgenet_get_txcb(struct bcmgenet_priv *priv, return tx_cb_ptr; } +static struct enet_cb *bcmgenet_put_txcb(struct bcmgenet_priv *priv, + struct bcmgenet_tx_ring *ring) +{ + struct enet_cb *tx_cb_ptr; + + tx_cb_ptr = ring->cbs; + tx_cb_ptr += ring->write_ptr - ring->cb_ptr; + + /* Rewinding local write pointer */ + if (ring->write_ptr == ring->cb_ptr) + ring->write_ptr = ring->end_ptr; + else + ring->write_ptr--; + + return tx_cb_ptr; +} + /* Simple helper to free a control block's resources */ static void bcmgenet_free_cb(struct enet_cb *cb) { @@ -1380,95 +1397,6 @@ static void bcmgenet_tx_reclaim_all(struct net_device *dev) bcmgenet_tx_reclaim(dev, &priv->tx_rings[DESC_INDEX]); } -/* Transmits a single SKB (either head of a fragment or a single SKB) - * caller must hold priv->lock - */ -static int bcmgenet_xmit_single(struct net_device *dev, - struct sk_buff *skb, - u16 dma_desc_flags, - struct bcmgenet_tx_ring *ring) -{ - struct bcmgenet_priv *priv = netdev_priv(dev); - struct device *kdev = &priv->pdev->dev; - struct enet_cb *tx_cb_ptr; - unsigned int skb_len; - dma_addr_t mapping; - u32 length_status; - int ret; - - tx_cb_ptr = bcmgenet_get_txcb(priv, ring); - - if (unlikely(!tx_cb_ptr)) - BUG(); - - tx_cb_ptr->skb = skb; - - skb_len = skb_headlen(skb); - - mapping = dma_map_single(kdev, skb->data, skb_len, DMA_TO_DEVICE); - ret = dma_mapping_error(kdev, mapping); - if (ret) { - priv->mib.tx_dma_failed++; - netif_err(priv, tx_err, dev, "Tx DMA map failed\n"); - dev_kfree_skb(skb); - return ret; - } - - dma_unmap_addr_set(tx_cb_ptr, dma_addr, mapping); - dma_unmap_len_set(tx_cb_ptr, dma_len, skb_len); - length_status = (skb_len << DMA_BUFLENGTH_SHIFT) | dma_desc_flags | - (priv->hw_params->qtag_mask << DMA_TX_QTAG_SHIFT) | - DMA_TX_APPEND_CRC; - - if (skb->ip_summed == CHECKSUM_PARTIAL) - length_status |= DMA_TX_DO_CSUM; - - dmadesc_set(priv, tx_cb_ptr->bd_addr, mapping, length_status); - - return 0; -} - -/* Transmit a SKB fragment */ -static int bcmgenet_xmit_frag(struct net_device *dev, - skb_frag_t *frag, - u16 dma_desc_flags, - struct bcmgenet_tx_ring *ring) -{ - struct bcmgenet_priv *priv = netdev_priv(dev); - struct device *kdev = &priv->pdev->dev; - struct enet_cb *tx_cb_ptr; - unsigned int frag_size; - dma_addr_t mapping; - int ret; - - tx_cb_ptr = bcmgenet_get_txcb(priv, ring); - - if (unlikely(!tx_cb_ptr)) - BUG(); - - tx_cb_ptr->skb = NULL; - - frag_size = skb_frag_size(frag); - - mapping = skb_frag_dma_map(kdev, frag, 0, frag_size, DMA_TO_DEVICE); - ret = dma_mapping_error(kdev, mapping); - if (ret) { - priv->mib.tx_dma_failed++; - netif_err(priv, tx_err, dev, "%s: Tx DMA map failed\n", - __func__); - return ret; - } - - dma_unmap_addr_set(tx_cb_ptr, dma_addr, mapping); - dma_unmap_len_set(tx_cb_ptr, dma_len, frag_size); - - dmadesc_set(priv, tx_cb_ptr->bd_addr, mapping, - (frag_size << DMA_BUFLENGTH_SHIFT) | dma_desc_flags | - (priv->hw_params->qtag_mask << DMA_TX_QTAG_SHIFT)); - - return 0; -} - /* Reallocate the SKB to put enough headroom in front of it and insert * the transmit checksum offsets in the descriptors */ @@ -1535,11 +1463,16 @@ static struct sk_buff *bcmgenet_put_tx_csum(struct net_device *dev, static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); + struct device *kdev = &priv->pdev->dev; struct bcmgenet_tx_ring *ring = NULL; + struct enet_cb *tx_cb_ptr; struct netdev_queue *txq; unsigned long flags = 0; int nr_frags, index; - u16 dma_desc_flags; + dma_addr_t mapping; + unsigned int size; + skb_frag_t *frag; + u32 len_stat; int ret; int i; @@ -1592,27 +1525,49 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev) } } - dma_desc_flags = DMA_SOP; - if (nr_frags == 0) - dma_desc_flags |= DMA_EOP; + for (i = 0; i <= nr_frags; i++) { + tx_cb_ptr = bcmgenet_get_txcb(priv, ring); - /* Transmit single SKB or head of fragment list */ - ret = bcmgenet_xmit_single(dev, skb, dma_desc_flags, ring); - if (ret) { - ret = NETDEV_TX_OK; - goto out; - } + if (unlikely(!tx_cb_ptr)) + BUG(); - /* xmit fragment */ - for (i = 0; i < nr_frags; i++) { - ret = bcmgenet_xmit_frag(dev, - &skb_shinfo(skb)->frags[i], - (i == nr_frags - 1) ? DMA_EOP : 0, - ring); + if (!i) { + /* Transmit single SKB or head of fragment list */ + tx_cb_ptr->skb = skb; + size = skb_headlen(skb); + mapping = dma_map_single(kdev, skb->data, size, + DMA_TO_DEVICE); + } else { + /* xmit fragment */ + tx_cb_ptr->skb = NULL; + frag = &skb_shinfo(skb)->frags[i - 1]; + size = skb_frag_size(frag); + mapping = skb_frag_dma_map(kdev, frag, 0, size, + DMA_TO_DEVICE); + } + + ret = dma_mapping_error(kdev, mapping); if (ret) { + priv->mib.tx_dma_failed++; + netif_err(priv, tx_err, dev, "Tx DMA map failed\n"); ret = NETDEV_TX_OK; - goto out; + goto out_unmap_frags; } + dma_unmap_addr_set(tx_cb_ptr, dma_addr, mapping); + dma_unmap_len_set(tx_cb_ptr, dma_len, size); + + len_stat = (size << DMA_BUFLENGTH_SHIFT) | + (priv->hw_params->qtag_mask << DMA_TX_QTAG_SHIFT); + + if (!i) { + len_stat |= DMA_TX_APPEND_CRC | DMA_SOP; + if (skb->ip_summed == CHECKSUM_PARTIAL) + len_stat |= DMA_TX_DO_CSUM; + } + if (i == nr_frags) + len_stat |= DMA_EOP; + + dmadesc_set(priv, tx_cb_ptr->bd_addr, mapping, len_stat); } skb_tx_timestamp(skb); @@ -1635,6 +1590,30 @@ out: spin_unlock_irqrestore(&ring->lock, flags); return ret; + +out_unmap_frags: + /* Back up for failed control block mapping */ + bcmgenet_put_txcb(priv, ring); + + /* Unmap successfully mapped control blocks */ + while (i-- > 0) { + tx_cb_ptr = bcmgenet_put_txcb(priv, ring); + if (tx_cb_ptr->skb) + dma_unmap_single(kdev, + dma_unmap_addr(tx_cb_ptr, dma_addr), + dma_unmap_len(tx_cb_ptr, dma_len), + DMA_TO_DEVICE); + else + dma_unmap_page(kdev, + dma_unmap_addr(tx_cb_ptr, dma_addr), + dma_unmap_len(tx_cb_ptr, dma_len), + DMA_TO_DEVICE); + dma_unmap_addr_set(tx_cb_ptr, dma_addr, 0); + tx_cb_ptr->skb = NULL; + } + + dev_kfree_skb(skb); + goto out; } static struct sk_buff *bcmgenet_rx_refill(struct bcmgenet_priv *priv, -- cgit v1.2.3 From f48bed16a756f5bc0244acd581f61968f7d7c2a4 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Fri, 14 Jul 2017 16:12:10 -0700 Subject: net: bcmgenet: Free skb after last Tx frag Since the skb is attached to the first control block of a fragmented skb it is possible that the skb could be freed when reclaiming that control block before all fragments of the skb have been consumed by the hardware and unmapped. This commit introduces first_cb and last_cb pointers to the skb control block used by the driver to keep track of which transmit control blocks within a transmit ring are the first and last ones associated with the skb. It then splits the bcmgenet_free_cb() function into transmit (bcmgenet_free_tx_cb) and receive (bcmgenet_free_rx_cb) versions that can handle the unmapping of dma mapped memory and cleaning up the corresponding control block structure so that the skb is only freed after the last associated transmit control block is reclaimed. Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Signed-off-by: Doug Berger Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 142 ++++++++++++++----------- drivers/net/ethernet/broadcom/genet/bcmgenet.h | 2 + 2 files changed, 84 insertions(+), 60 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 20021525f795..7b0b399aaedd 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1219,14 +1219,6 @@ static struct enet_cb *bcmgenet_put_txcb(struct bcmgenet_priv *priv, return tx_cb_ptr; } -/* Simple helper to free a control block's resources */ -static void bcmgenet_free_cb(struct enet_cb *cb) -{ - dev_kfree_skb_any(cb->skb); - cb->skb = NULL; - dma_unmap_addr_set(cb, dma_addr, 0); -} - static inline void bcmgenet_rx_ring16_int_disable(struct bcmgenet_rx_ring *ring) { bcmgenet_intrl2_0_writel(ring->priv, UMAC_IRQ_RXDMA_DONE, @@ -1277,18 +1269,72 @@ static inline void bcmgenet_tx_ring_int_disable(struct bcmgenet_tx_ring *ring) INTRL2_CPU_MASK_SET); } +/* Simple helper to free a transmit control block's resources + * Returns an skb when the last transmit control block associated with the + * skb is freed. The skb should be freed by the caller if necessary. + */ +static struct sk_buff *bcmgenet_free_tx_cb(struct device *dev, + struct enet_cb *cb) +{ + struct sk_buff *skb; + + skb = cb->skb; + + if (skb) { + cb->skb = NULL; + if (cb == GENET_CB(skb)->first_cb) + dma_unmap_single(dev, dma_unmap_addr(cb, dma_addr), + dma_unmap_len(cb, dma_len), + DMA_TO_DEVICE); + else + dma_unmap_page(dev, dma_unmap_addr(cb, dma_addr), + dma_unmap_len(cb, dma_len), + DMA_TO_DEVICE); + dma_unmap_addr_set(cb, dma_addr, 0); + + if (cb == GENET_CB(skb)->last_cb) + return skb; + + } else if (dma_unmap_addr(cb, dma_addr)) { + dma_unmap_page(dev, + dma_unmap_addr(cb, dma_addr), + dma_unmap_len(cb, dma_len), + DMA_TO_DEVICE); + dma_unmap_addr_set(cb, dma_addr, 0); + } + + return 0; +} + +/* Simple helper to free a receive control block's resources */ +static struct sk_buff *bcmgenet_free_rx_cb(struct device *dev, + struct enet_cb *cb) +{ + struct sk_buff *skb; + + skb = cb->skb; + cb->skb = NULL; + + if (dma_unmap_addr(cb, dma_addr)) { + dma_unmap_single(dev, dma_unmap_addr(cb, dma_addr), + dma_unmap_len(cb, dma_len), DMA_FROM_DEVICE); + dma_unmap_addr_set(cb, dma_addr, 0); + } + + return skb; +} + /* Unlocked version of the reclaim routine */ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev, struct bcmgenet_tx_ring *ring) { struct bcmgenet_priv *priv = netdev_priv(dev); - struct device *kdev = &priv->pdev->dev; - struct enet_cb *tx_cb_ptr; - unsigned int pkts_compl = 0; + unsigned int txbds_processed = 0; unsigned int bytes_compl = 0; - unsigned int c_index; + unsigned int pkts_compl = 0; unsigned int txbds_ready; - unsigned int txbds_processed = 0; + unsigned int c_index; + struct sk_buff *skb; /* Clear status before servicing to reduce spurious interrupts */ if (ring->index == DESC_INDEX) @@ -1309,21 +1355,12 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev, /* Reclaim transmitted buffers */ while (txbds_processed < txbds_ready) { - tx_cb_ptr = &priv->tx_cbs[ring->clean_ptr]; - if (tx_cb_ptr->skb) { + skb = bcmgenet_free_tx_cb(&priv->pdev->dev, + &priv->tx_cbs[ring->clean_ptr]); + if (skb) { pkts_compl++; - bytes_compl += GENET_CB(tx_cb_ptr->skb)->bytes_sent; - dma_unmap_single(kdev, - dma_unmap_addr(tx_cb_ptr, dma_addr), - dma_unmap_len(tx_cb_ptr, dma_len), - DMA_TO_DEVICE); - bcmgenet_free_cb(tx_cb_ptr); - } else if (dma_unmap_addr(tx_cb_ptr, dma_addr)) { - dma_unmap_page(kdev, - dma_unmap_addr(tx_cb_ptr, dma_addr), - dma_unmap_len(tx_cb_ptr, dma_len), - DMA_TO_DEVICE); - dma_unmap_addr_set(tx_cb_ptr, dma_addr, 0); + bytes_compl += GENET_CB(skb)->bytes_sent; + dev_kfree_skb_any(skb); } txbds_processed++; @@ -1533,13 +1570,12 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev) if (!i) { /* Transmit single SKB or head of fragment list */ - tx_cb_ptr->skb = skb; + GENET_CB(skb)->first_cb = tx_cb_ptr; size = skb_headlen(skb); mapping = dma_map_single(kdev, skb->data, size, DMA_TO_DEVICE); } else { /* xmit fragment */ - tx_cb_ptr->skb = NULL; frag = &skb_shinfo(skb)->frags[i - 1]; size = skb_frag_size(frag); mapping = skb_frag_dma_map(kdev, frag, 0, size, @@ -1556,6 +1592,8 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev) dma_unmap_addr_set(tx_cb_ptr, dma_addr, mapping); dma_unmap_len_set(tx_cb_ptr, dma_len, size); + tx_cb_ptr->skb = skb; + len_stat = (size << DMA_BUFLENGTH_SHIFT) | (priv->hw_params->qtag_mask << DMA_TX_QTAG_SHIFT); @@ -1570,6 +1608,7 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev) dmadesc_set(priv, tx_cb_ptr->bd_addr, mapping, len_stat); } + GENET_CB(skb)->last_cb = tx_cb_ptr; skb_tx_timestamp(skb); /* Decrement total BD count and advance our write pointer */ @@ -1598,18 +1637,7 @@ out_unmap_frags: /* Unmap successfully mapped control blocks */ while (i-- > 0) { tx_cb_ptr = bcmgenet_put_txcb(priv, ring); - if (tx_cb_ptr->skb) - dma_unmap_single(kdev, - dma_unmap_addr(tx_cb_ptr, dma_addr), - dma_unmap_len(tx_cb_ptr, dma_len), - DMA_TO_DEVICE); - else - dma_unmap_page(kdev, - dma_unmap_addr(tx_cb_ptr, dma_addr), - dma_unmap_len(tx_cb_ptr, dma_len), - DMA_TO_DEVICE); - dma_unmap_addr_set(tx_cb_ptr, dma_addr, 0); - tx_cb_ptr->skb = NULL; + bcmgenet_free_tx_cb(kdev, tx_cb_ptr); } dev_kfree_skb(skb); @@ -1645,14 +1673,12 @@ static struct sk_buff *bcmgenet_rx_refill(struct bcmgenet_priv *priv, } /* Grab the current Rx skb from the ring and DMA-unmap it */ - rx_skb = cb->skb; - if (likely(rx_skb)) - dma_unmap_single(kdev, dma_unmap_addr(cb, dma_addr), - priv->rx_buf_len, DMA_FROM_DEVICE); + rx_skb = bcmgenet_free_rx_cb(kdev, cb); /* Put the new Rx skb on the ring */ cb->skb = skb; dma_unmap_addr_set(cb, dma_addr, mapping); + dma_unmap_len_set(cb, dma_len, priv->rx_buf_len); dmadesc_set_addr(priv, cb->bd_addr, mapping); /* Return the current Rx skb to caller */ @@ -1859,22 +1885,16 @@ static int bcmgenet_alloc_rx_buffers(struct bcmgenet_priv *priv, static void bcmgenet_free_rx_buffers(struct bcmgenet_priv *priv) { - struct device *kdev = &priv->pdev->dev; + struct sk_buff *skb; struct enet_cb *cb; int i; for (i = 0; i < priv->num_rx_bds; i++) { cb = &priv->rx_cbs[i]; - if (dma_unmap_addr(cb, dma_addr)) { - dma_unmap_single(kdev, - dma_unmap_addr(cb, dma_addr), - priv->rx_buf_len, DMA_FROM_DEVICE); - dma_unmap_addr_set(cb, dma_addr, 0); - } - - if (cb->skb) - bcmgenet_free_cb(cb); + skb = bcmgenet_free_rx_cb(&priv->pdev->dev, cb); + if (skb) + dev_kfree_skb_any(skb); } } @@ -2458,8 +2478,10 @@ static int bcmgenet_dma_teardown(struct bcmgenet_priv *priv) static void bcmgenet_fini_dma(struct bcmgenet_priv *priv) { - int i; struct netdev_queue *txq; + struct sk_buff *skb; + struct enet_cb *cb; + int i; bcmgenet_fini_rx_napi(priv); bcmgenet_fini_tx_napi(priv); @@ -2468,10 +2490,10 @@ static void bcmgenet_fini_dma(struct bcmgenet_priv *priv) bcmgenet_dma_teardown(priv); for (i = 0; i < priv->num_tx_bds; i++) { - if (priv->tx_cbs[i].skb != NULL) { - dev_kfree_skb(priv->tx_cbs[i].skb); - priv->tx_cbs[i].skb = NULL; - } + cb = priv->tx_cbs + i; + skb = bcmgenet_free_tx_cb(&priv->pdev->dev, cb); + if (skb) + dev_kfree_skb(skb); } for (i = 0; i < priv->hw_params->tx_queues; i++) { diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index efd07020b89f..b9344de669f8 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -544,6 +544,8 @@ struct bcmgenet_hw_params { }; struct bcmgenet_skb_cb { + struct enet_cb *first_cb; /* First control block of SKB */ + struct enet_cb *last_cb; /* Last control block of SKB */ unsigned int bytes_sent; /* bytes on the wire (no TSB) */ }; -- cgit v1.2.3 From f55ce7b024090a51382ccab2730b96e2f7b4e9cf Mon Sep 17 00:00:00 2001 From: Mateusz Jurczyk Date: Wed, 7 Jun 2017 15:50:38 +0200 Subject: netfilter: nfnetlink: Improve input length sanitization in nfnetlink_rcv Verify that the length of the socket buffer is sufficient to cover the nlmsghdr structure before accessing the nlh->nlmsg_len field for further input sanitization. If the client only supplies 1-3 bytes of data in sk_buff, then nlh->nlmsg_len remains partially uninitialized and contains leftover memory from the corresponding kernel allocation. Operating on such data may result in indeterminate evaluation of the nlmsg_len < NLMSG_HDRLEN expression. The bug was discovered by a runtime instrumentation designed to detect use of uninitialized memory in the kernel. The patch prevents this and other similar tools (e.g. KMSAN) from flagging this behavior in the future. Signed-off-by: Mateusz Jurczyk Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 92b05e188fd1..733d3e4a30d8 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -472,8 +472,7 @@ static void nfnetlink_rcv_skb_batch(struct sk_buff *skb, struct nlmsghdr *nlh) if (msglen > skb->len) msglen = skb->len; - if (nlh->nlmsg_len < NLMSG_HDRLEN || - skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg)) + if (skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg)) return; err = nla_parse(cda, NFNL_BATCH_MAX, attr, attrlen, nfnl_batch_policy, @@ -500,7 +499,8 @@ static void nfnetlink_rcv(struct sk_buff *skb) { struct nlmsghdr *nlh = nlmsg_hdr(skb); - if (nlh->nlmsg_len < NLMSG_HDRLEN || + if (skb->len < NLMSG_HDRLEN || + nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) return; -- cgit v1.2.3 From cf56c2f892a8a1870a8358114ad896772da7543a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 6 Jul 2017 23:17:44 +0200 Subject: netfilter: remove old pre-netns era hook api no more users in the tree, remove this. The old api is racy wrt. module removal, all users have been converted to the netns-aware api. The old api pretended we still have global hooks but that has not been true for a long time. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 9 --- net/netfilter/core.c | 143 ---------------------------------------------- 2 files changed, 152 deletions(-) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index a4b97be30b28..22f081065d49 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -61,8 +61,6 @@ typedef unsigned int nf_hookfn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); struct nf_hook_ops { - struct list_head list; - /* User fills in from here down. */ nf_hookfn *hook; struct net_device *dev; @@ -160,13 +158,6 @@ int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg, void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg, unsigned int n); -int nf_register_hook(struct nf_hook_ops *reg); -void nf_unregister_hook(struct nf_hook_ops *reg); -int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n); -void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n); -int _nf_register_hooks(struct nf_hook_ops *reg, unsigned int n); -void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n); - /* Functions to register get/setsockopt ranges (non-inclusive). You need to check permissions yourself! */ int nf_register_sockopt(struct nf_sockopt_ops *reg); diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 552d606e57ca..368610dbc3c0 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -227,114 +227,6 @@ void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg, } EXPORT_SYMBOL(nf_unregister_net_hooks); -static LIST_HEAD(nf_hook_list); - -static int _nf_register_hook(struct nf_hook_ops *reg) -{ - struct net *net, *last; - int ret; - - for_each_net(net) { - ret = nf_register_net_hook(net, reg); - if (ret && ret != -ENOENT) - goto rollback; - } - list_add_tail(®->list, &nf_hook_list); - - return 0; -rollback: - last = net; - for_each_net(net) { - if (net == last) - break; - nf_unregister_net_hook(net, reg); - } - return ret; -} - -int nf_register_hook(struct nf_hook_ops *reg) -{ - int ret; - - rtnl_lock(); - ret = _nf_register_hook(reg); - rtnl_unlock(); - - return ret; -} -EXPORT_SYMBOL(nf_register_hook); - -static void _nf_unregister_hook(struct nf_hook_ops *reg) -{ - struct net *net; - - list_del(®->list); - for_each_net(net) - nf_unregister_net_hook(net, reg); -} - -void nf_unregister_hook(struct nf_hook_ops *reg) -{ - rtnl_lock(); - _nf_unregister_hook(reg); - rtnl_unlock(); -} -EXPORT_SYMBOL(nf_unregister_hook); - -int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n) -{ - unsigned int i; - int err = 0; - - for (i = 0; i < n; i++) { - err = nf_register_hook(®[i]); - if (err) - goto err; - } - return err; - -err: - if (i > 0) - nf_unregister_hooks(reg, i); - return err; -} -EXPORT_SYMBOL(nf_register_hooks); - -/* Caller MUST take rtnl_lock() */ -int _nf_register_hooks(struct nf_hook_ops *reg, unsigned int n) -{ - unsigned int i; - int err = 0; - - for (i = 0; i < n; i++) { - err = _nf_register_hook(®[i]); - if (err) - goto err; - } - return err; - -err: - if (i > 0) - _nf_unregister_hooks(reg, i); - return err; -} -EXPORT_SYMBOL(_nf_register_hooks); - -void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n) -{ - while (n-- > 0) - nf_unregister_hook(®[n]); -} -EXPORT_SYMBOL(nf_unregister_hooks); - -/* Caller MUST take rtnl_lock */ -void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n) -{ - while (n-- > 0) - _nf_unregister_hook(®[n]); -} -EXPORT_SYMBOL(_nf_unregister_hooks); - /* Returns 1 if okfn() needs to be executed by the caller, * -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, @@ -450,37 +342,6 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *); EXPORT_SYMBOL(nf_nat_decode_session_hook); #endif -static int nf_register_hook_list(struct net *net) -{ - struct nf_hook_ops *elem; - int ret; - - rtnl_lock(); - list_for_each_entry(elem, &nf_hook_list, list) { - ret = nf_register_net_hook(net, elem); - if (ret && ret != -ENOENT) - goto out_undo; - } - rtnl_unlock(); - return 0; - -out_undo: - list_for_each_entry_continue_reverse(elem, &nf_hook_list, list) - nf_unregister_net_hook(net, elem); - rtnl_unlock(); - return ret; -} - -static void nf_unregister_hook_list(struct net *net) -{ - struct nf_hook_ops *elem; - - rtnl_lock(); - list_for_each_entry(elem, &nf_hook_list, list) - nf_unregister_net_hook(net, elem); - rtnl_unlock(); -} - static int __net_init netfilter_net_init(struct net *net) { int i, h, ret; @@ -500,16 +361,12 @@ static int __net_init netfilter_net_init(struct net *net) return -ENOMEM; } #endif - ret = nf_register_hook_list(net); - if (ret) - remove_proc_entry("netfilter", net->proc_net); return ret; } static void __net_exit netfilter_net_exit(struct net *net) { - nf_unregister_hook_list(net); remove_proc_entry("netfilter", net->proc_net); } -- cgit v1.2.3 From 97772bcd56efa21d9d8976db6f205574ea602f51 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 7 Jul 2017 13:07:17 +0200 Subject: netfilter: nat: fix src map lookup When doing initial conversion to rhashtable I replaced the bucket walk with a single rhashtable_lookup_fast(). When moving to rhlist I failed to properly walk the list of identical tuples, but that is what is needed for this to work correctly. The table contains the original tuples, so the reply tuples are all distinct. We currently decide that mapping is (not) in range only based on the first entry, but in case its not we need to try the reply tuple of the next entry until we either find an in-range mapping or we checked all the entries. This bug makes nat core attempt collision resolution while it might be able to use the mapping as-is. Fixes: 870190a9ec90 ("netfilter: nat: convert nat bysrc hash to rhashtable") Reported-by: Jaco Kroon Tested-by: Jaco Kroon Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_core.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 832c5a08d9a5..eb541786ccb7 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -222,20 +222,21 @@ find_appropriate_src(struct net *net, .tuple = tuple, .zone = zone }; - struct rhlist_head *hl; + struct rhlist_head *hl, *h; hl = rhltable_lookup(&nf_nat_bysource_table, &key, nf_nat_bysource_params); - if (!hl) - return 0; - ct = container_of(hl, typeof(*ct), nat_bysource); + rhl_for_each_entry_rcu(ct, h, hl, nat_bysource) { + nf_ct_invert_tuplepr(result, + &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + result->dst = tuple->dst; - nf_ct_invert_tuplepr(result, - &ct->tuplehash[IP_CT_DIR_REPLY].tuple); - result->dst = tuple->dst; + if (in_range(l3proto, l4proto, result, range)) + return 1; + } - return in_range(l3proto, l4proto, result, range); + return 0; } /* For [FUTURE] fragmentation handling, we want the least-used -- cgit v1.2.3 From 974292defee033bc43ccfcb2fcefc3eba3905340 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 7 Jul 2017 13:29:03 +0200 Subject: netfilter: nf_tables: only allow in/output for arp packets arp packets cannot be forwarded. They can be bridged, but then they can be filtered using either ebtables or nftables bridge family. The bridge netfilter exposes a "call-arptables" switch which pushes packets into arptables, but lets not expose this for nftables, so better close this asap. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_tables_arp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c index 805c8ddfe860..4bbc273b45e8 100644 --- a/net/ipv4/netfilter/nf_tables_arp.c +++ b/net/ipv4/netfilter/nf_tables_arp.c @@ -72,8 +72,7 @@ static const struct nf_chain_type filter_arp = { .family = NFPROTO_ARP, .owner = THIS_MODULE, .hook_mask = (1 << NF_ARP_IN) | - (1 << NF_ARP_OUT) | - (1 << NF_ARP_FORWARD), + (1 << NF_ARP_OUT), }; static int __init nf_tables_arp_init(void) -- cgit v1.2.3 From 36ac344e16e04e3e55e8fed7446095a6458c64e6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 10 Jul 2017 13:53:53 +0200 Subject: netfilter: expect: fix crash when putting uninited expectation We crash in __nf_ct_expect_check, it calls nf_ct_remove_expect on the uninitialised expectation instead of existing one, so del_timer chokes on random memory address. Fixes: ec0e3f01114ad32711243 ("netfilter: nf_ct_expect: Add nf_ct_remove_expect()") Reported-by: Sergey Kvachonok Tested-by: Sergey Kvachonok Cc: Gao Feng Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_expect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index e03d16ed550d..899c2c36da13 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -422,7 +422,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) h = nf_ct_expect_dst_hash(net, &expect->tuple); hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) { if (expect_matches(i, expect)) { - if (nf_ct_remove_expect(expect)) + if (nf_ct_remove_expect(i)) break; } else if (expect_clash(i, expect)) { ret = -EBUSY; -- cgit v1.2.3 From 13c401f33e19c20431d9888a91d9ea82e5133bd9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 14 Jul 2017 23:03:49 -0700 Subject: jhash: fix -Wimplicit-fallthrough warnings GCC 7 added a new -Wimplicit-fallthrough warning. It's only enabled with W=1, but since linux/jhash.h is included in over hundred places (including other global headers) it seems worthwhile fixing this warning. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/jhash.h | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/include/linux/jhash.h b/include/linux/jhash.h index 348c6f47e4cc..8037850f3104 100644 --- a/include/linux/jhash.h +++ b/include/linux/jhash.h @@ -85,19 +85,18 @@ static inline u32 jhash(const void *key, u32 length, u32 initval) k += 12; } /* Last block: affect all 32 bits of (c) */ - /* All the case statements fall through */ switch (length) { - case 12: c += (u32)k[11]<<24; - case 11: c += (u32)k[10]<<16; - case 10: c += (u32)k[9]<<8; - case 9: c += k[8]; - case 8: b += (u32)k[7]<<24; - case 7: b += (u32)k[6]<<16; - case 6: b += (u32)k[5]<<8; - case 5: b += k[4]; - case 4: a += (u32)k[3]<<24; - case 3: a += (u32)k[2]<<16; - case 2: a += (u32)k[1]<<8; + case 12: c += (u32)k[11]<<24; /* fall through */ + case 11: c += (u32)k[10]<<16; /* fall through */ + case 10: c += (u32)k[9]<<8; /* fall through */ + case 9: c += k[8]; /* fall through */ + case 8: b += (u32)k[7]<<24; /* fall through */ + case 7: b += (u32)k[6]<<16; /* fall through */ + case 6: b += (u32)k[5]<<8; /* fall through */ + case 5: b += k[4]; /* fall through */ + case 4: a += (u32)k[3]<<24; /* fall through */ + case 3: a += (u32)k[2]<<16; /* fall through */ + case 2: a += (u32)k[1]<<8; /* fall through */ case 1: a += k[0]; __jhash_final(a, b, c); case 0: /* Nothing left to add */ @@ -131,10 +130,10 @@ static inline u32 jhash2(const u32 *k, u32 length, u32 initval) k += 3; } - /* Handle the last 3 u32's: all the case statements fall through */ + /* Handle the last 3 u32's */ switch (length) { - case 3: c += k[2]; - case 2: b += k[1]; + case 3: c += k[2]; /* fall through */ + case 2: b += k[1]; /* fall through */ case 1: a += k[0]; __jhash_final(a, b, c); case 0: /* Nothing left to add */ -- cgit v1.2.3 From df39a9f106d53532443a804352894480ca6ca5fd Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 17 Jul 2017 11:42:55 -0700 Subject: bpf: check NULL for sk_to_full_sk() return value When req->rsk_listener is NULL, sk_to_full_sk() returns NULL too, so we have to check its return value against NULL here. Fixes: 40304b2a1567 ("bpf: BPF support for sock_ops") Reported-by: David Ahern Tested-by: David Ahern Cc: Lawrence Brakmo Cc: Daniel Borkmann Signed-off-by: Cong Wang Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf-cgroup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 360c082e885c..d41d40ac3efd 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -85,7 +85,7 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, int __ret = 0; \ if (cgroup_bpf_enabled && (sock_ops)->sk) { \ typeof(sk) __sk = sk_to_full_sk((sock_ops)->sk); \ - if (sk_fullsock(__sk)) \ + if (__sk && sk_fullsock(__sk)) \ __ret = __cgroup_bpf_run_filter_sock_ops(__sk, \ sock_ops, \ BPF_CGROUP_SOCK_OPS); \ -- cgit v1.2.3 From e5dadc65f9e0177eb649bcd9d333f1ebf871223e Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Mon, 17 Jul 2017 18:34:42 +0800 Subject: ppp: Fix false xmit recursion detect with two ppp devices The global percpu variable ppp_xmit_recursion is used to detect the ppp xmit recursion to avoid the deadlock, which is caused by one CPU tries to lock the xmit lock twice. But it would report false recursion when one CPU wants to send the skb from two different PPP devices, like one L2TP on the PPPoE. It is a normal case actually. Now use one percpu member of struct ppp instead of the gloable variable to detect the xmit recursion of one ppp device. Fixes: 55454a565836 ("ppp: avoid dealock on recursive xmit") Signed-off-by: Gao Feng Signed-off-by: Liu Jianying Signed-off-by: David S. Miller --- drivers/net/ppp/ppp_generic.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 13028833bee3..bd4303944e44 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -120,6 +120,7 @@ struct ppp { int n_channels; /* how many channels are attached 54 */ spinlock_t rlock; /* lock for receive side 58 */ spinlock_t wlock; /* lock for transmit side 5c */ + int *xmit_recursion __percpu; /* xmit recursion detect */ int mru; /* max receive unit 60 */ unsigned int flags; /* control bits 64 */ unsigned int xstate; /* transmit state bits 68 */ @@ -1025,6 +1026,7 @@ static int ppp_dev_configure(struct net *src_net, struct net_device *dev, struct ppp *ppp = netdev_priv(dev); int indx; int err; + int cpu; ppp->dev = dev; ppp->ppp_net = src_net; @@ -1039,6 +1041,15 @@ static int ppp_dev_configure(struct net *src_net, struct net_device *dev, INIT_LIST_HEAD(&ppp->channels); spin_lock_init(&ppp->rlock); spin_lock_init(&ppp->wlock); + + ppp->xmit_recursion = alloc_percpu(int); + if (!ppp->xmit_recursion) { + err = -ENOMEM; + goto err1; + } + for_each_possible_cpu(cpu) + (*per_cpu_ptr(ppp->xmit_recursion, cpu)) = 0; + #ifdef CONFIG_PPP_MULTILINK ppp->minseq = -1; skb_queue_head_init(&ppp->mrq); @@ -1050,11 +1061,15 @@ static int ppp_dev_configure(struct net *src_net, struct net_device *dev, err = ppp_unit_register(ppp, conf->unit, conf->ifname_is_set); if (err < 0) - return err; + goto err2; conf->file->private_data = &ppp->file; return 0; +err2: + free_percpu(ppp->xmit_recursion); +err1: + return err; } static const struct nla_policy ppp_nl_policy[IFLA_PPP_MAX + 1] = { @@ -1400,18 +1415,16 @@ static void __ppp_xmit_process(struct ppp *ppp) ppp_xmit_unlock(ppp); } -static DEFINE_PER_CPU(int, ppp_xmit_recursion); - static void ppp_xmit_process(struct ppp *ppp) { local_bh_disable(); - if (unlikely(__this_cpu_read(ppp_xmit_recursion))) + if (unlikely(*this_cpu_ptr(ppp->xmit_recursion))) goto err; - __this_cpu_inc(ppp_xmit_recursion); + (*this_cpu_ptr(ppp->xmit_recursion))++; __ppp_xmit_process(ppp); - __this_cpu_dec(ppp_xmit_recursion); + (*this_cpu_ptr(ppp->xmit_recursion))--; local_bh_enable(); @@ -1905,7 +1918,7 @@ static void __ppp_channel_push(struct channel *pch) read_lock(&pch->upl); ppp = pch->ppp; if (ppp) - __ppp_xmit_process(ppp); + ppp_xmit_process(ppp); read_unlock(&pch->upl); } } @@ -1914,9 +1927,7 @@ static void ppp_channel_push(struct channel *pch) { local_bh_disable(); - __this_cpu_inc(ppp_xmit_recursion); __ppp_channel_push(pch); - __this_cpu_dec(ppp_xmit_recursion); local_bh_enable(); } @@ -3057,6 +3068,7 @@ static void ppp_destroy_interface(struct ppp *ppp) #endif /* CONFIG_PPP_FILTER */ kfree_skb(ppp->xmit_pending); + free_percpu(ppp->xmit_recursion); free_netdev(ppp->dev); } -- cgit v1.2.3 From 18bcf2907df935981266532e1e0d052aff2e6fae Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Mon, 17 Jul 2017 12:35:58 +0200 Subject: ipv4: ipv6: initialize treq->txhash in cookie_v[46]_check() KMSAN reported use of uninitialized memory in skb_set_hash_from_sk(), which originated from the TCP request socket created in cookie_v6_check(): ================================================================== BUG: KMSAN: use of uninitialized memory in tcp_transmit_skb+0xf77/0x3ec0 CPU: 1 PID: 2949 Comm: syz-execprog Not tainted 4.11.0-rc5+ #2931 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 TCP: request_sock_TCPv6: Possible SYN flooding on port 20028. Sending cookies. Check SNMP counters. Call Trace: __dump_stack lib/dump_stack.c:16 dump_stack+0x172/0x1c0 lib/dump_stack.c:52 kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:927 __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:469 skb_set_hash_from_sk ./include/net/sock.h:2011 tcp_transmit_skb+0xf77/0x3ec0 net/ipv4/tcp_output.c:983 tcp_send_ack+0x75b/0x830 net/ipv4/tcp_output.c:3493 tcp_delack_timer_handler+0x9a6/0xb90 net/ipv4/tcp_timer.c:284 tcp_delack_timer+0x1b0/0x310 net/ipv4/tcp_timer.c:309 call_timer_fn+0x240/0x520 kernel/time/timer.c:1268 expire_timers kernel/time/timer.c:1307 __run_timers+0xc13/0xf10 kernel/time/timer.c:1601 run_timer_softirq+0x36/0xa0 kernel/time/timer.c:1614 __do_softirq+0x485/0x942 kernel/softirq.c:284 invoke_softirq kernel/softirq.c:364 irq_exit+0x1fa/0x230 kernel/softirq.c:405 exiting_irq+0xe/0x10 ./arch/x86/include/asm/apic.h:657 smp_apic_timer_interrupt+0x5a/0x80 arch/x86/kernel/apic/apic.c:966 apic_timer_interrupt+0x86/0x90 arch/x86/entry/entry_64.S:489 RIP: 0010:native_restore_fl ./arch/x86/include/asm/irqflags.h:36 RIP: 0010:arch_local_irq_restore ./arch/x86/include/asm/irqflags.h:77 RIP: 0010:__msan_poison_alloca+0xed/0x120 mm/kmsan/kmsan_instr.c:440 RSP: 0018:ffff880024917cd8 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff10 RAX: 0000000000000246 RBX: ffff8800224c0000 RCX: 0000000000000005 RDX: 0000000000000004 RSI: ffff880000000000 RDI: ffffea0000b6d770 RBP: ffff880024917d58 R08: 0000000000000dd8 R09: 0000000000000004 R10: 0000160000000000 R11: 0000000000000000 R12: ffffffff85abf810 R13: ffff880024917dd8 R14: 0000000000000010 R15: ffffffff81cabde4 poll_select_copy_remaining+0xac/0x6b0 fs/select.c:293 SYSC_select+0x4b4/0x4e0 fs/select.c:653 SyS_select+0x76/0xa0 fs/select.c:634 entry_SYSCALL_64_fastpath+0x13/0x94 arch/x86/entry/entry_64.S:204 RIP: 0033:0x4597e7 RSP: 002b:000000c420037ee0 EFLAGS: 00000246 ORIG_RAX: 0000000000000017 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00000000004597e7 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 000000c420037ef0 R08: 000000c420037ee0 R09: 0000000000000059 R10: 0000000000000000 R11: 0000000000000246 R12: 000000000042dc20 R13: 00000000000000f3 R14: 0000000000000030 R15: 0000000000000003 chained origin: save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302 kmsan_save_stack mm/kmsan/kmsan.c:317 kmsan_internal_chain_origin+0x12a/0x1f0 mm/kmsan/kmsan.c:547 __msan_store_shadow_origin_4+0xac/0x110 mm/kmsan/kmsan_instr.c:259 tcp_create_openreq_child+0x709/0x1ae0 net/ipv4/tcp_minisocks.c:472 tcp_v6_syn_recv_sock+0x7eb/0x2a30 net/ipv6/tcp_ipv6.c:1103 tcp_get_cookie_sock+0x136/0x5f0 net/ipv4/syncookies.c:212 cookie_v6_check+0x17a9/0x1b50 net/ipv6/syncookies.c:245 tcp_v6_cookie_check net/ipv6/tcp_ipv6.c:989 tcp_v6_do_rcv+0xdd8/0x1c60 net/ipv6/tcp_ipv6.c:1298 tcp_v6_rcv+0x41a3/0x4f00 net/ipv6/tcp_ipv6.c:1487 ip6_input_finish+0x82f/0x1ee0 net/ipv6/ip6_input.c:279 NF_HOOK ./include/linux/netfilter.h:257 ip6_input+0x239/0x290 net/ipv6/ip6_input.c:322 dst_input ./include/net/dst.h:492 ip6_rcv_finish net/ipv6/ip6_input.c:69 NF_HOOK ./include/linux/netfilter.h:257 ipv6_rcv+0x1dbd/0x22e0 net/ipv6/ip6_input.c:203 __netif_receive_skb_core+0x2f6f/0x3a20 net/core/dev.c:4208 __netif_receive_skb net/core/dev.c:4246 process_backlog+0x667/0xba0 net/core/dev.c:4866 napi_poll net/core/dev.c:5268 net_rx_action+0xc95/0x1590 net/core/dev.c:5333 __do_softirq+0x485/0x942 kernel/softirq.c:284 origin: save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302 kmsan_internal_poison_shadow+0xb1/0x1a0 mm/kmsan/kmsan.c:198 kmsan_kmalloc+0x7f/0xe0 mm/kmsan/kmsan.c:337 kmem_cache_alloc+0x1c2/0x1e0 mm/slub.c:2766 reqsk_alloc ./include/net/request_sock.h:87 inet_reqsk_alloc+0xa4/0x5b0 net/ipv4/tcp_input.c:6200 cookie_v6_check+0x4f4/0x1b50 net/ipv6/syncookies.c:169 tcp_v6_cookie_check net/ipv6/tcp_ipv6.c:989 tcp_v6_do_rcv+0xdd8/0x1c60 net/ipv6/tcp_ipv6.c:1298 tcp_v6_rcv+0x41a3/0x4f00 net/ipv6/tcp_ipv6.c:1487 ip6_input_finish+0x82f/0x1ee0 net/ipv6/ip6_input.c:279 NF_HOOK ./include/linux/netfilter.h:257 ip6_input+0x239/0x290 net/ipv6/ip6_input.c:322 dst_input ./include/net/dst.h:492 ip6_rcv_finish net/ipv6/ip6_input.c:69 NF_HOOK ./include/linux/netfilter.h:257 ipv6_rcv+0x1dbd/0x22e0 net/ipv6/ip6_input.c:203 __netif_receive_skb_core+0x2f6f/0x3a20 net/core/dev.c:4208 __netif_receive_skb net/core/dev.c:4246 process_backlog+0x667/0xba0 net/core/dev.c:4866 napi_poll net/core/dev.c:5268 net_rx_action+0xc95/0x1590 net/core/dev.c:5333 __do_softirq+0x485/0x942 kernel/softirq.c:284 ================================================================== Similar error is reported for cookie_v4_check(). Fixes: 58d607d3e52f ("tcp: provide skb->hash to synack packets") Signed-off-by: Alexander Potapenko Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/syncookies.c | 1 + net/ipv6/syncookies.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 0905cf04c2a4..03ad8778c395 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -335,6 +335,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; treq->ts_off = 0; + treq->txhash = net_tx_rndhash(); req->mss = mss; ireq->ir_num = ntohs(th->dest); ireq->ir_rmt_port = th->source; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 7b75b0620730..4e7817abc0b9 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -216,6 +216,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; treq->ts_off = 0; + treq->txhash = net_tx_rndhash(); /* * We need to lookup the dst_entry to get the correct window size. -- cgit v1.2.3 From 799f917233f6ed242ee9416bf80b14819f0c97f3 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Mon, 17 Jul 2017 19:42:41 +0200 Subject: atm: zatm: Fix an error handling path in 'zatm_init_one()' If 'dma_set_mask_and_coherent()' fails, we must undo the previous 'pci_request_regions()' call. Adjust corresponding 'goto' to jump at the right place of the error handling path. Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/atm/zatm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c index 292dec18ffb8..07bdd51b3b9a 100644 --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c @@ -1613,7 +1613,7 @@ static int zatm_init_one(struct pci_dev *pci_dev, ret = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32)); if (ret < 0) - goto out_disable; + goto out_release; zatm_dev->pci_dev = pci_dev; dev->dev_data = zatm_dev; -- cgit v1.2.3 From 0ddf3fb2c43d2e65aee5de158ed694ea11ef229d Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 18 Jul 2017 11:57:55 +0200 Subject: udp: preserve skb->dst if required for IP options processing Eric noticed that in udp_recvmsg() we still need to access skb->dst while processing the IP options. Since commit 0a463c78d25b ("udp: avoid a cache miss on dequeue") skb->dst is no more available at recvmsg() time and bad things will happen if we enter the relevant code path. This commit address the issue, avoid clearing skb->dst if any IP options are present into the relevant skb. Since the IP CB is contained in the first skb cacheline, we can test it to decide to leverage the consume_stateless_skb() optimization, without measurable additional cost in the faster path. v1 -> v2: updated commit message tags Fixes: 0a463c78d25b ("udp: avoid a cache miss on dequeue") Reported-by: Andrey Konovalov Reported-by: Eric Dumazet Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/ipv4/udp.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 25294d43e147..b057653ceca9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1388,6 +1388,11 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) unlock_sock_fast(sk, slow); } + /* we cleared the head states previously only if the skb lacks any IP + * options, see __udp_queue_rcv_skb(). + */ + if (unlikely(IPCB(skb)->opt.optlen > 0)) + skb_release_head_state(skb); consume_stateless_skb(skb); } EXPORT_SYMBOL_GPL(skb_consume_udp); @@ -1779,8 +1784,12 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) sk_mark_napi_id_once(sk, skb); } - /* clear all pending head states while they are hot in the cache */ - skb_release_head_state(skb); + /* At recvmsg() time we need skb->dst to process IP options-related + * cmsg, elsewhere can we clear all pending head states while they are + * hot in the cache + */ + if (likely(IPCB(skb)->opt.optlen == 0)) + skb_release_head_state(skb); rc = __udp_enqueue_schedule_skb(sk, skb); if (rc < 0) { -- cgit v1.2.3 From 073dd5ad34b1d3aaadaa7e5e8cbe576d9545f163 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 18 Jul 2017 22:38:56 +0300 Subject: netfilter: fix netfilter_net_init() return We accidentally return an uninitialized variable. Fixes: cf56c2f892a8 ("netfilter: remove old pre-netns era hook api") Signed-off-by: Dan Carpenter Acked-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/netfilter/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 368610dbc3c0..974cf2a3795a 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -344,7 +344,7 @@ EXPORT_SYMBOL(nf_nat_decode_session_hook); static int __net_init netfilter_net_init(struct net *net) { - int i, h, ret; + int i, h; for (i = 0; i < ARRAY_SIZE(net->nf.hooks); i++) { for (h = 0; h < NF_MAX_HOOKS; h++) @@ -362,7 +362,7 @@ static int __net_init netfilter_net_init(struct net *net) } #endif - return ret; + return 0; } static void __net_exit netfilter_net_exit(struct net *net) -- cgit v1.2.3 From 98de4e0ea47d106846fc0e30ce4e644283fa7fc2 Mon Sep 17 00:00:00 2001 From: "Levin, Alexander" Date: Tue, 18 Jul 2017 04:23:16 +0000 Subject: wireless: wext: terminate ifr name coming from userspace ifr name is assumed to be a valid string by the kernel, but nothing was forcing username to pass a valid string. In turn, this would cause panics as we tried to access the string past it's valid memory. Signed-off-by: Sasha Levin Signed-off-by: David S. Miller --- net/core/dev_ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 82fd4c9c4a1b..7657ad6bc13d 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -424,6 +424,8 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (copy_from_user(&iwr, arg, sizeof(iwr))) return -EFAULT; + iwr.ifr_name[sizeof(iwr.ifr_name) - 1] = 0; + return wext_handle_ioctl(net, &iwr, cmd, arg); } -- cgit v1.2.3 From 63679112c536289826fec61c917621de95ba2ade Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 19 Jul 2017 13:33:24 -0700 Subject: net: Zero terminate ifr_name in dev_ifname(). The ifr.ifr_name is passed around and assumed to be NULL terminated. Signed-off-by: David S. Miller --- net/core/dev_ioctl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 7657ad6bc13d..06b147d7d9e2 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -28,6 +28,7 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg) if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; + ifr.ifr_name[IFNAMSIZ-1] = 0; error = netdev_get_name(net, ifr.ifr_name, ifr.ifr_ifindex); if (error) -- cgit v1.2.3 From 90f522a20e3d16d153e5a5f84cf4ff92281ee417 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Tue, 18 Jul 2017 17:07:15 +0300 Subject: NET: dwmac: Make dwmac reset unconditional Unconditional reset dwmac before HW init if reset controller is present. In existing implementation we reset dwmac only after second module probing: (module load -> unload -> load again [reset happens]) Now we reset dwmac at every module load: (module load [reset happens] -> unload -> load again [reset happens]) Also some reset controllers have only reset callback instead of assert + deassert callbacks pair, so handle this case. Signed-off-by: Eugeniy Paltsev Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 1853f7ff6657..1763e48c84e2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4120,8 +4120,15 @@ int stmmac_dvr_probe(struct device *device, if ((phyaddr >= 0) && (phyaddr <= 31)) priv->plat->phy_addr = phyaddr; - if (priv->plat->stmmac_rst) + if (priv->plat->stmmac_rst) { + ret = reset_control_assert(priv->plat->stmmac_rst); reset_control_deassert(priv->plat->stmmac_rst); + /* Some reset controllers have only reset callback instead of + * assert + deassert callbacks pair. + */ + if (ret == -ENOTSUPP) + reset_control_reset(priv->plat->stmmac_rst); + } /* Init MAC and get the capabilities */ ret = stmmac_hw_init(priv); -- cgit v1.2.3 From 65e3c766359992f6cbe3fa1ef0a19546ecbdf380 Mon Sep 17 00:00:00 2001 From: Arun Parameswaran Date: Thu, 6 Jul 2017 10:37:57 -0700 Subject: dt-binding: ptp: Add SoC compatibility strings for dte ptp clock Add SoC specific compatibility strings to the Broadcom DTE based PTP clock binding document. Fixed the document heading and node name. Fixes: 80d6076140b2 ("dt-binding: ptp: add bindings document for dte based ptp clock") Signed-off-by: Arun Parameswaran Acked-by: Rob Herring Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/ptp/brcm,ptp-dte.txt | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/ptp/brcm,ptp-dte.txt b/Documentation/devicetree/bindings/ptp/brcm,ptp-dte.txt index 07590bcdad15..7c04e22a5d6a 100644 --- a/Documentation/devicetree/bindings/ptp/brcm,ptp-dte.txt +++ b/Documentation/devicetree/bindings/ptp/brcm,ptp-dte.txt @@ -1,13 +1,20 @@ -* Broadcom Digital Timing Engine(DTE) based PTP clock driver +* Broadcom Digital Timing Engine(DTE) based PTP clock Required properties: -- compatible: should be "brcm,ptp-dte" +- compatible: should contain the core compatibility string + and the SoC compatibility string. The SoC + compatibility string is to handle SoC specific + hardware differences. + Core compatibility string: + "brcm,ptp-dte" + SoC compatibility strings: + "brcm,iproc-ptp-dte" - for iproc based SoC's - reg: address and length of the DTE block's NCO registers Example: -ptp_dte: ptp_dte@180af650 { - compatible = "brcm,ptp-dte"; +ptp: ptp-dte@180af650 { + compatible = "brcm,iproc-ptp-dte", "brcm,ptp-dte"; reg = <0x180af650 0x10>; status = "okay"; }; -- cgit v1.2.3 From bb0a2675f72b458e64f47071e8aabdb225a6af4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Hundeb=C3=B8ll?= Date: Wed, 19 Jul 2017 08:17:02 +0200 Subject: net: dsa: mv88e6xxx: Enable CMODE config support for 6390X MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit f39908d3b1c45 ('net: dsa: mv88e6xxx: Set the CMODE for mv88e6390 ports 9 & 10') added support for setting the CMODE for the 6390X family, but only enabled it for 9290 and 6390 - and left out 6390X. Fix support for setting the CMODE on 6390X also by assigning mv88e6390x_port_set_cmode() to the .port_set_cmode function pointer in mv88e6390x_ops too. Fixes: f39908d3b1c4 ("net: dsa: mv88e6xxx: Set the CMODE for mv88e6390 ports 9 & 10") Signed-off-by: Martin Hundebøll Reviewed-by: Andrew Lunn Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 53b088166c28..5bcdd33101b0 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3178,6 +3178,7 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = { .port_set_jumbo_size = mv88e6165_port_set_jumbo_size, .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting, .port_pause_limit = mv88e6390_port_pause_limit, + .port_set_cmode = mv88e6390x_port_set_cmode, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, .stats_snapshot = mv88e6390_g1_stats_snapshot, -- cgit v1.2.3 From 3753654e541938717b13f2b25791c3171a3a06aa Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 19 Jul 2017 10:22:40 -0700 Subject: Revert "rtnetlink: Do not generate notifications for CHANGEADDR event" This reverts commit cd8966e75ed3c6b41a37047a904617bc44fa481f. The duplicate CHANGEADDR event message is sent regardless of link status whereas the setlink changes only generate a notification when the link is up. Not sending a notification when the link is down breaks dhcpcd which only processes hwaddr changes when the link is down. Fixes reported regression: https://bugzilla.kernel.org/show_bug.cgi?id=196355 Reported-by: Yaroslav Isakov Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d1ba90980be1..11b25fbf3dd2 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4241,6 +4241,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi switch (event) { case NETDEV_REBOOT: + case NETDEV_CHANGEADDR: case NETDEV_CHANGENAME: case NETDEV_FEAT_CHANGE: case NETDEV_BONDING_FAILOVER: -- cgit v1.2.3 From 1e6c22aef28364dcc5f03c04a05ec463bc2b3431 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 19 Jul 2017 18:46:59 +0100 Subject: net: tehuti: don't process data if it has not been copied from userspace The array data is only populated with valid information from userspace if cmd != SIOCDEVPRIVATE, other cases the array contains garbage on the stack. The subsequent switch statement acts on a subcommand in data[0] which could be any garbage value if cmd is SIOCDEVPRIVATE which seems incorrect to me. Instead, just return EOPNOTSUPP for the case where cmd == SIOCDEVPRIVATE to avoid this issue. As a side note, I suspect that the original intention of the code was for this ioctl to work just for cmd == SIOCDEVPRIVATE (and the current logic is reversed). However, I don't wont to change the current semantics in case any userspace code relies on this existing behaviour. Detected by CoverityScan, CID#139647 ("Uninitialized scalar variable") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/tehuti/tehuti.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c index 711fbbbc4b1f..163d8d16bc24 100644 --- a/drivers/net/ethernet/tehuti/tehuti.c +++ b/drivers/net/ethernet/tehuti/tehuti.c @@ -654,6 +654,8 @@ static int bdx_ioctl_priv(struct net_device *ndev, struct ifreq *ifr, int cmd) RET(-EFAULT); } DBG("%d 0x%x 0x%x\n", data[0], data[1], data[2]); + } else { + return -EOPNOTSUPP; } if (!capable(CAP_SYS_RAWIO)) -- cgit v1.2.3 From 6399f1fae4ec29fab5ec76070435555e256ca3a6 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 19 Jul 2017 22:28:55 +0200 Subject: ipv6: avoid overflow of offset in ip6_find_1stfragopt In some cases, offset can overflow and can cause an infinite loop in ip6_find_1stfragopt(). Make it unsigned int to prevent the overflow, and cap it at IPV6_MAXPLEN, since packets larger than that should be invalid. This problem has been here since before the beginning of git history. Signed-off-by: Sabrina Dubroca Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/output_core.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index e9065b8d3af8..abb2c307fbe8 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -78,7 +78,7 @@ EXPORT_SYMBOL(ipv6_select_ident); int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) { - u16 offset = sizeof(struct ipv6hdr); + unsigned int offset = sizeof(struct ipv6hdr); unsigned int packet_len = skb_tail_pointer(skb) - skb_network_header(skb); int found_rhdr = 0; @@ -86,6 +86,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) while (offset <= packet_len) { struct ipv6_opt_hdr *exthdr; + unsigned int len; switch (**nexthdr) { @@ -111,7 +112,10 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + offset); - offset += ipv6_optlen(exthdr); + len = ipv6_optlen(exthdr); + if (len + offset >= IPV6_MAXPLEN) + return -EINVAL; + offset += len; *nexthdr = &exthdr->nexthdr; } -- cgit v1.2.3 From 4cabc5b186b5427b9ee5a7495172542af105f02b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 21 Jul 2017 00:00:21 +0200 Subject: bpf: fix mixed signed/unsigned derived min/max value bounds Edward reported that there's an issue in min/max value bounds tracking when signed and unsigned compares both provide hints on limits when having unknown variables. E.g. a program such as the following should have been rejected: 0: (7a) *(u64 *)(r10 -8) = 0 1: (bf) r2 = r10 2: (07) r2 += -8 3: (18) r1 = 0xffff8a94cda93400 5: (85) call bpf_map_lookup_elem#1 6: (15) if r0 == 0x0 goto pc+7 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp 7: (7a) *(u64 *)(r10 -16) = -8 8: (79) r1 = *(u64 *)(r10 -16) 9: (b7) r2 = -1 10: (2d) if r1 > r2 goto pc+3 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=0 R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp 11: (65) if r1 s> 0x1 goto pc+2 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=0,max_value=1 R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp 12: (0f) r0 += r1 13: (72) *(u8 *)(r0 +0) = 0 R0=map_value_adj(ks=8,vs=8,id=0),min_value=0,max_value=1 R1=inv,min_value=0,max_value=1 R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp 14: (b7) r0 = 0 15: (95) exit What happens is that in the first part ... 8: (79) r1 = *(u64 *)(r10 -16) 9: (b7) r2 = -1 10: (2d) if r1 > r2 goto pc+3 ... r1 carries an unsigned value, and is compared as unsigned against a register carrying an immediate. Verifier deduces in reg_set_min_max() that since the compare is unsigned and operation is greater than (>), that in the fall-through/false case, r1's minimum bound must be 0 and maximum bound must be r2. Latter is larger than the bound and thus max value is reset back to being 'invalid' aka BPF_REGISTER_MAX_RANGE. Thus, r1 state is now 'R1=inv,min_value=0'. The subsequent test ... 11: (65) if r1 s> 0x1 goto pc+2 ... is a signed compare of r1 with immediate value 1. Here, verifier deduces in reg_set_min_max() that since the compare is signed this time and operation is greater than (>), that in the fall-through/false case, we can deduce that r1's maximum bound must be 1, meaning with prior test, we result in r1 having the following state: R1=inv,min_value=0,max_value=1. Given that the actual value this holds is -8, the bounds are wrongly deduced. When this is being added to r0 which holds the map_value(_adj) type, then subsequent store access in above case will go through check_mem_access() which invokes check_map_access_adj(), that will then probe whether the map memory is in bounds based on the min_value and max_value as well as access size since the actual unknown value is min_value <= x <= max_value; commit fce366a9dd0d ("bpf, verifier: fix alu ops against map_value{, _adj} register types") provides some more explanation on the semantics. It's worth to note in this context that in the current code, min_value and max_value tracking are used for two things, i) dynamic map value access via check_map_access_adj() and since commit 06c1c049721a ("bpf: allow helpers access to variable memory") ii) also enforced at check_helper_mem_access() when passing a memory address (pointer to packet, map value, stack) and length pair to a helper and the length in this case is an unknown value defining an access range through min_value/max_value in that case. The min_value/max_value tracking is /not/ used in the direct packet access case to track ranges. However, the issue also affects case ii), for example, the following crafted program based on the same principle must be rejected as well: 0: (b7) r2 = 0 1: (bf) r3 = r10 2: (07) r3 += -512 3: (7a) *(u64 *)(r10 -16) = -8 4: (79) r4 = *(u64 *)(r10 -16) 5: (b7) r6 = -1 6: (2d) if r4 > r6 goto pc+5 R1=ctx R2=imm0,min_value=0,max_value=0,min_align=2147483648 R3=fp-512 R4=inv,min_value=0 R6=imm-1,max_value=18446744073709551615,min_align=1 R10=fp 7: (65) if r4 s> 0x1 goto pc+4 R1=ctx R2=imm0,min_value=0,max_value=0,min_align=2147483648 R3=fp-512 R4=inv,min_value=0,max_value=1 R6=imm-1,max_value=18446744073709551615,min_align=1 R10=fp 8: (07) r4 += 1 9: (b7) r5 = 0 10: (6a) *(u16 *)(r10 -512) = 0 11: (85) call bpf_skb_load_bytes#26 12: (b7) r0 = 0 13: (95) exit Meaning, while we initialize the max_value stack slot that the verifier thinks we access in the [1,2] range, in reality we pass -7 as length which is interpreted as u32 in the helper. Thus, this issue is relevant also for the case of helper ranges. Resetting both bounds in check_reg_overflow() in case only one of them exceeds limits is also not enough as similar test can be created that uses values which are within range, thus also here learned min value in r1 is incorrect when mixed with later signed test to create a range: 0: (7a) *(u64 *)(r10 -8) = 0 1: (bf) r2 = r10 2: (07) r2 += -8 3: (18) r1 = 0xffff880ad081fa00 5: (85) call bpf_map_lookup_elem#1 6: (15) if r0 == 0x0 goto pc+7 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp 7: (7a) *(u64 *)(r10 -16) = -8 8: (79) r1 = *(u64 *)(r10 -16) 9: (b7) r2 = 2 10: (3d) if r2 >= r1 goto pc+3 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp 11: (65) if r1 s> 0x4 goto pc+2 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3,max_value=4 R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp 12: (0f) r0 += r1 13: (72) *(u8 *)(r0 +0) = 0 R0=map_value_adj(ks=8,vs=8,id=0),min_value=3,max_value=4 R1=inv,min_value=3,max_value=4 R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp 14: (b7) r0 = 0 15: (95) exit This leaves us with two options for fixing this: i) to invalidate all prior learned information once we switch signed context, ii) to track min/max signed and unsigned boundaries separately as done in [0]. (Given latter introduces major changes throughout the whole verifier, it's rather net-next material, thus this patch follows option i), meaning we can derive bounds either from only signed tests or only unsigned tests.) There is still the case of adjust_reg_min_max_vals(), where we adjust bounds on ALU operations, meaning programs like the following where boundaries on the reg get mixed in context later on when bounds are merged on the dst reg must get rejected, too: 0: (7a) *(u64 *)(r10 -8) = 0 1: (bf) r2 = r10 2: (07) r2 += -8 3: (18) r1 = 0xffff89b2bf87ce00 5: (85) call bpf_map_lookup_elem#1 6: (15) if r0 == 0x0 goto pc+6 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp 7: (7a) *(u64 *)(r10 -16) = -8 8: (79) r1 = *(u64 *)(r10 -16) 9: (b7) r2 = 2 10: (3d) if r2 >= r1 goto pc+2 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp 11: (b7) r7 = 1 12: (65) if r7 s> 0x0 goto pc+2 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 R2=imm2,min_value=2,max_value=2,min_align=2 R7=imm1,max_value=0 R10=fp 13: (b7) r0 = 0 14: (95) exit from 12 to 15: R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 R2=imm2,min_value=2,max_value=2,min_align=2 R7=imm1,min_value=1 R10=fp 15: (0f) r7 += r1 16: (65) if r7 s> 0x4 goto pc+2 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 R2=imm2,min_value=2,max_value=2,min_align=2 R7=inv,min_value=4,max_value=4 R10=fp 17: (0f) r0 += r7 18: (72) *(u8 *)(r0 +0) = 0 R0=map_value_adj(ks=8,vs=8,id=0),min_value=4,max_value=4 R1=inv,min_value=3 R2=imm2,min_value=2,max_value=2,min_align=2 R7=inv,min_value=4,max_value=4 R10=fp 19: (b7) r0 = 0 20: (95) exit Meaning, in adjust_reg_min_max_vals() we must also reset range values on the dst when src/dst registers have mixed signed/ unsigned derived min/max value bounds with one unbounded value as otherwise they can be added together deducing false boundaries. Once both boundaries are established from either ALU ops or compare operations w/o mixing signed/unsigned insns, then they can safely be added to other regs also having both boundaries established. Adding regs with one unbounded side to a map value where the bounded side has been learned w/o mixing ops is possible, but the resulting map value won't recover from that, meaning such op is considered invalid on the time of actual access. Invalid bounds are set on the dst reg in case i) src reg, or ii) in case dst reg already had them. The only way to recover would be to perform i) ALU ops but only 'add' is allowed on map value types or ii) comparisons, but these are disallowed on pointers in case they span a range. This is fine as only BPF_JEQ and BPF_JNE may be performed on PTR_TO_MAP_VALUE_OR_NULL registers which potentially turn them into PTR_TO_MAP_VALUE type depending on the branch, so only here min/max value cannot be invalidated for them. In terms of state pruning, value_from_signed is considered as well in states_equal() when dealing with adjusted map values. With regards to breaking existing programs, there is a small risk, but use-cases are rather quite narrow where this could occur and mixing compares probably unlikely. Joint work with Josef and Edward. [0] https://lists.iovisor.org/pipermail/iovisor-dev/2017-June/000822.html Fixes: 484611357c19 ("bpf: allow access into map value arrays") Reported-by: Edward Cree Signed-off-by: Daniel Borkmann Signed-off-by: Edward Cree Signed-off-by: Josef Bacik Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 1 + kernel/bpf/verifier.c | 108 +++++++++++++++++++++++++++++++++++++------ 2 files changed, 95 insertions(+), 14 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 621076f56251..8e5d31f6faef 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -43,6 +43,7 @@ struct bpf_reg_state { u32 min_align; u32 aux_off; u32 aux_off_align; + bool value_from_signed; }; enum bpf_stack_slot_type { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6a86723c5b64..af9e84a4944e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -504,6 +504,7 @@ static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno) { regs[regno].min_value = BPF_REGISTER_MIN_RANGE; regs[regno].max_value = BPF_REGISTER_MAX_RANGE; + regs[regno].value_from_signed = false; regs[regno].min_align = 0; } @@ -777,12 +778,13 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, return -EACCES; } -static bool is_pointer_value(struct bpf_verifier_env *env, int regno) +static bool __is_pointer_value(bool allow_ptr_leaks, + const struct bpf_reg_state *reg) { - if (env->allow_ptr_leaks) + if (allow_ptr_leaks) return false; - switch (env->cur_state.regs[regno].type) { + switch (reg->type) { case UNKNOWN_VALUE: case CONST_IMM: return false; @@ -791,6 +793,11 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno) } } +static bool is_pointer_value(struct bpf_verifier_env *env, int regno) +{ + return __is_pointer_value(env->allow_ptr_leaks, &env->cur_state.regs[regno]); +} + static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg, int off, int size, bool strict) { @@ -1832,10 +1839,24 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, dst_align = dst_reg->min_align; /* We don't know anything about what was done to this register, mark it - * as unknown. + * as unknown. Also, if both derived bounds came from signed/unsigned + * mixed compares and one side is unbounded, we cannot really do anything + * with them as boundaries cannot be trusted. Thus, arithmetic of two + * regs of such kind will get invalidated bounds on the dst side. */ - if (min_val == BPF_REGISTER_MIN_RANGE && - max_val == BPF_REGISTER_MAX_RANGE) { + if ((min_val == BPF_REGISTER_MIN_RANGE && + max_val == BPF_REGISTER_MAX_RANGE) || + (BPF_SRC(insn->code) == BPF_X && + ((min_val != BPF_REGISTER_MIN_RANGE && + max_val == BPF_REGISTER_MAX_RANGE) || + (min_val == BPF_REGISTER_MIN_RANGE && + max_val != BPF_REGISTER_MAX_RANGE) || + (dst_reg->min_value != BPF_REGISTER_MIN_RANGE && + dst_reg->max_value == BPF_REGISTER_MAX_RANGE) || + (dst_reg->min_value == BPF_REGISTER_MIN_RANGE && + dst_reg->max_value != BPF_REGISTER_MAX_RANGE)) && + regs[insn->dst_reg].value_from_signed != + regs[insn->src_reg].value_from_signed)) { reset_reg_range_values(regs, insn->dst_reg); return; } @@ -2023,6 +2044,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) regs[insn->dst_reg].max_value = insn->imm; regs[insn->dst_reg].min_value = insn->imm; regs[insn->dst_reg].min_align = calc_align(insn->imm); + regs[insn->dst_reg].value_from_signed = false; } } else if (opcode > BPF_END) { @@ -2198,40 +2220,63 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, struct bpf_reg_state *false_reg, u64 val, u8 opcode) { + bool value_from_signed = true; + bool is_range = true; + switch (opcode) { case BPF_JEQ: /* If this is false then we know nothing Jon Snow, but if it is * true then we know for sure. */ true_reg->max_value = true_reg->min_value = val; + is_range = false; break; case BPF_JNE: /* If this is true we know nothing Jon Snow, but if it is false * we know the value for sure; */ false_reg->max_value = false_reg->min_value = val; + is_range = false; break; case BPF_JGT: - /* Unsigned comparison, the minimum value is 0. */ - false_reg->min_value = 0; + value_from_signed = false; /* fallthrough */ case BPF_JSGT: + if (true_reg->value_from_signed != value_from_signed) + reset_reg_range_values(true_reg, 0); + if (false_reg->value_from_signed != value_from_signed) + reset_reg_range_values(false_reg, 0); + if (opcode == BPF_JGT) { + /* Unsigned comparison, the minimum value is 0. */ + false_reg->min_value = 0; + } /* If this is false then we know the maximum val is val, * otherwise we know the min val is val+1. */ false_reg->max_value = val; + false_reg->value_from_signed = value_from_signed; true_reg->min_value = val + 1; + true_reg->value_from_signed = value_from_signed; break; case BPF_JGE: - /* Unsigned comparison, the minimum value is 0. */ - false_reg->min_value = 0; + value_from_signed = false; /* fallthrough */ case BPF_JSGE: + if (true_reg->value_from_signed != value_from_signed) + reset_reg_range_values(true_reg, 0); + if (false_reg->value_from_signed != value_from_signed) + reset_reg_range_values(false_reg, 0); + if (opcode == BPF_JGE) { + /* Unsigned comparison, the minimum value is 0. */ + false_reg->min_value = 0; + } /* If this is false then we know the maximum value is val - 1, * otherwise we know the mimimum value is val. */ false_reg->max_value = val - 1; + false_reg->value_from_signed = value_from_signed; true_reg->min_value = val; + true_reg->value_from_signed = value_from_signed; break; default: break; @@ -2239,6 +2284,12 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, check_reg_overflow(false_reg); check_reg_overflow(true_reg); + if (is_range) { + if (__is_pointer_value(false, false_reg)) + reset_reg_range_values(false_reg, 0); + if (__is_pointer_value(false, true_reg)) + reset_reg_range_values(true_reg, 0); + } } /* Same as above, but for the case that dst_reg is a CONST_IMM reg and src_reg @@ -2248,41 +2299,64 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, struct bpf_reg_state *false_reg, u64 val, u8 opcode) { + bool value_from_signed = true; + bool is_range = true; + switch (opcode) { case BPF_JEQ: /* If this is false then we know nothing Jon Snow, but if it is * true then we know for sure. */ true_reg->max_value = true_reg->min_value = val; + is_range = false; break; case BPF_JNE: /* If this is true we know nothing Jon Snow, but if it is false * we know the value for sure; */ false_reg->max_value = false_reg->min_value = val; + is_range = false; break; case BPF_JGT: - /* Unsigned comparison, the minimum value is 0. */ - true_reg->min_value = 0; + value_from_signed = false; /* fallthrough */ case BPF_JSGT: + if (true_reg->value_from_signed != value_from_signed) + reset_reg_range_values(true_reg, 0); + if (false_reg->value_from_signed != value_from_signed) + reset_reg_range_values(false_reg, 0); + if (opcode == BPF_JGT) { + /* Unsigned comparison, the minimum value is 0. */ + true_reg->min_value = 0; + } /* * If this is false, then the val is <= the register, if it is * true the register <= to the val. */ false_reg->min_value = val; + false_reg->value_from_signed = value_from_signed; true_reg->max_value = val - 1; + true_reg->value_from_signed = value_from_signed; break; case BPF_JGE: - /* Unsigned comparison, the minimum value is 0. */ - true_reg->min_value = 0; + value_from_signed = false; /* fallthrough */ case BPF_JSGE: + if (true_reg->value_from_signed != value_from_signed) + reset_reg_range_values(true_reg, 0); + if (false_reg->value_from_signed != value_from_signed) + reset_reg_range_values(false_reg, 0); + if (opcode == BPF_JGE) { + /* Unsigned comparison, the minimum value is 0. */ + true_reg->min_value = 0; + } /* If this is false then constant < register, if it is true then * the register < constant. */ false_reg->min_value = val + 1; + false_reg->value_from_signed = value_from_signed; true_reg->max_value = val; + true_reg->value_from_signed = value_from_signed; break; default: break; @@ -2290,6 +2364,12 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, check_reg_overflow(false_reg); check_reg_overflow(true_reg); + if (is_range) { + if (__is_pointer_value(false, false_reg)) + reset_reg_range_values(false_reg, 0); + if (__is_pointer_value(false, true_reg)) + reset_reg_range_values(true_reg, 0); + } } static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id, -- cgit v1.2.3 From d655490417ee22da3267fe6592a0ec2023c3c0db Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 21 Jul 2017 00:00:22 +0200 Subject: bpf: allow to specify log level and reduce it for test_verifier For the test_verifier case, it's quite hard to parse log level 2 to figure out what's causing an issue when used to log level 1. We do want to use bpf_verify_program() in order to simulate some of the tests with strict alignment. So just add an argument to pass the level and put it to 1 for test_verifier. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- tools/lib/bpf/bpf.c | 4 ++-- tools/lib/bpf/bpf.h | 2 +- tools/testing/selftests/bpf/test_align.c | 2 +- tools/testing/selftests/bpf/test_verifier.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 7e0405e1651d..412a7c82995a 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -120,7 +120,7 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, size_t insns_cnt, int strict_alignment, const char *license, __u32 kern_version, - char *log_buf, size_t log_buf_sz) + char *log_buf, size_t log_buf_sz, int log_level) { union bpf_attr attr; @@ -131,7 +131,7 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, attr.license = ptr_to_u64(license); attr.log_buf = ptr_to_u64(log_buf); attr.log_size = log_buf_sz; - attr.log_level = 2; + attr.log_level = log_level; log_buf[0] = 0; attr.kern_version = kern_version; attr.prog_flags = strict_alignment ? BPF_F_STRICT_ALIGNMENT : 0; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 16de44a14b48..418c86e69bcb 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -38,7 +38,7 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, size_t insns_cnt, int strict_alignment, const char *license, __u32 kern_version, - char *log_buf, size_t log_buf_sz); + char *log_buf, size_t log_buf_sz, int log_level); int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags); diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c index bccebd935907..29793694cbc7 100644 --- a/tools/testing/selftests/bpf/test_align.c +++ b/tools/testing/selftests/bpf/test_align.c @@ -380,7 +380,7 @@ static int do_test_single(struct bpf_align_test *test) prog_len = probe_filter_length(prog); fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, prog, prog_len, 1, "GPL", 0, - bpf_vlog, sizeof(bpf_vlog)); + bpf_vlog, sizeof(bpf_vlog), 2); if (fd_prog < 0) { printf("Failed to load program.\n"); printf("%s", bpf_vlog); diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 404aec520812..f4d0a1de3925 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -5633,7 +5633,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, prog, prog_len, test->flags & F_LOAD_WITH_STRICT_ALIGNMENT, - "GPL", 0, bpf_vlog, sizeof(bpf_vlog)); + "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 1); expected_ret = unpriv && test->result_unpriv != UNDEF ? test->result_unpriv : test->result; -- cgit v1.2.3 From a1502132866fd2d2705eef4041dd6d7d849f48a2 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 21 Jul 2017 00:00:23 +0200 Subject: bpf: fix up test cases with mixed signed/unsigned bounds Fix the few existing test cases that used mixed signed/unsigned bounds and switch them only to one flavor. Reason why we need this is that proper boundaries cannot be derived from mixed tests. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/test_verifier.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index f4d0a1de3925..64b39d37d91d 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -4969,7 +4969,7 @@ static struct bpf_test tests[] = { BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val), 4), BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), + BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2), BPF_MOV64_IMM(BPF_REG_3, 0), BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_MOV64_IMM(BPF_REG_0, 0), @@ -4995,7 +4995,7 @@ static struct bpf_test tests[] = { BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val) + 1, 4), BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), + BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2), BPF_MOV64_IMM(BPF_REG_3, 0), BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_MOV64_IMM(BPF_REG_0, 0), @@ -5023,7 +5023,7 @@ static struct bpf_test tests[] = { BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val) - 20, 4), BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), + BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2), BPF_MOV64_IMM(BPF_REG_3, 0), BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_MOV64_IMM(BPF_REG_0, 0), @@ -5050,7 +5050,7 @@ static struct bpf_test tests[] = { BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val) - 19, 4), BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), + BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2), BPF_MOV64_IMM(BPF_REG_3, 0), BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_MOV64_IMM(BPF_REG_0, 0), -- cgit v1.2.3 From b712296a41ce0a114895fdff68fc22aada165b07 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Fri, 21 Jul 2017 00:00:24 +0200 Subject: bpf: add test for mixed signed and unsigned bounds checks These failed due to a bug in verifier bounds handling. Signed-off-by: Edward Cree Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/test_verifier.c | 52 +++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 64b39d37d91d..48b7997c0ae7 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -5510,6 +5510,58 @@ static struct bpf_test tests[] = { .errstr = "invalid bpf_context access", .prog_type = BPF_PROG_TYPE_LWT_IN, }, + { + "bounds checks mixing signed and unsigned, positive bounds", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_MOV64_IMM(BPF_REG_2, 2), + BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 3), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 4, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "R0 min value is negative", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "bounds checks mixing signed and unsigned", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_MOV64_IMM(BPF_REG_2, -1), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "R0 min value is negative", + .result = REJECT, + .result_unpriv = REJECT, + }, }; static int probe_filter_length(const struct bpf_insn *fp) -- cgit v1.2.3 From 8641250251bfcd93479c71783c6792ae3325d7e4 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 21 Jul 2017 00:00:25 +0200 Subject: bpf: more tests for mixed signed and unsigned bounds checks Add a couple of more test cases to BPF selftests that are related to mixed signed and unsigned checks. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/test_verifier.c | 418 ++++++++++++++++++++++++++++ 1 file changed, 418 insertions(+) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 48b7997c0ae7..af7d173910f4 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -5562,6 +5562,424 @@ static struct bpf_test tests[] = { .result = REJECT, .result_unpriv = REJECT, }, + { + "bounds checks mixing signed and unsigned, variant 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_MOV64_IMM(BPF_REG_2, -1), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 5), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_1), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8), + BPF_ST_MEM(BPF_B, BPF_REG_8, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "R8 invalid mem access 'inv'", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "bounds checks mixing signed and unsigned, variant 3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_MOV64_IMM(BPF_REG_2, -1), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 4), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8), + BPF_ST_MEM(BPF_B, BPF_REG_8, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "R8 invalid mem access 'inv'", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "bounds checks mixing signed and unsigned, variant 4", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "R0 min value is negative", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "bounds checks mixing signed and unsigned, variant 5", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_MOV64_IMM(BPF_REG_2, -1), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 5), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 4), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "R0 invalid mem access", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "bounds checks mixing signed and unsigned, variant 6", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -512), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -16), + BPF_MOV64_IMM(BPF_REG_6, -1), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_6, 5), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_4, 1, 4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 1), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_ST_MEM(BPF_H, BPF_REG_10, -512, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_load_bytes), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "R4 min value is negative, either use unsigned", + .errstr = "R4 min value is negative, either use unsigned", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "bounds checks mixing signed and unsigned, variant 7", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_MOV64_IMM(BPF_REG_2, 1024 * 1024 * 1024), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "R0 min value is negative", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "bounds checks mixing signed and unsigned, variant 8", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_MOV64_IMM(BPF_REG_2, 1024 * 1024 * 1024 + 1), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "R0 min value is negative", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "bounds checks mixing signed and unsigned, variant 9", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_MOV64_IMM(BPF_REG_2, -1), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "R0 min value is negative", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "bounds checks mixing signed and unsigned, variant 10", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_LD_IMM64(BPF_REG_2, -9223372036854775808ULL), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "R0 min value is negative", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "bounds checks mixing signed and unsigned, variant 11", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "R0 min value is negative", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "bounds checks mixing signed and unsigned, variant 12", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_MOV64_IMM(BPF_REG_2, -1), + BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2), + /* Dead branch. */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "R0 min value is negative", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "bounds checks mixing signed and unsigned, variant 13", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_MOV64_IMM(BPF_REG_2, -6), + BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "R0 min value is negative", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "bounds checks mixing signed and unsigned, variant 14", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_MOV64_IMM(BPF_REG_2, 2), + BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2), + BPF_MOV64_IMM(BPF_REG_7, 1), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_1), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, 4, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_7), + BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "R0 min value is negative", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "bounds checks mixing signed and unsigned, variant 15", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_MOV64_IMM(BPF_REG_2, -1), + BPF_MOV64_IMM(BPF_REG_8, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 42, 6), + BPF_JMP_REG(BPF_JSGT, BPF_REG_8, BPF_REG_1, 3), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, -3), + BPF_JMP_IMM(BPF_JA, 0, 0, -7), + }, + .fixup_map1 = { 4 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "R0 min value is negative", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "bounds checks mixing signed and unsigned, variant 16", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_MOV64_IMM(BPF_REG_2, -6), + BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_JMP_IMM(BPF_JGT, BPF_REG_0, 1, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "R0 min value is negative", + .result = REJECT, + .result_unpriv = REJECT, + }, }; static int probe_filter_length(const struct bpf_insn *fp) -- cgit v1.2.3 From be35e8c516c1915a3035d266a2015b41f73ba3f9 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 20 Jul 2017 12:25:22 -0700 Subject: net: dsa: b53: Add missing ARL entries for BCM53125 The BCM53125 entry was missing an arl_entries member which would basically prevent the ARL search from terminating properly. This switch has 4 ARL entries, so add that. Fixes: 1da6df85c6fb ("net: dsa: b53: Implement ARL add/del/dump operations") Signed-off-by: Florian Fainelli Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index e68d368e20ac..7f36d3e3c98b 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1665,6 +1665,7 @@ static const struct b53_chip_data b53_switch_chips[] = { .dev_name = "BCM53125", .vlans = 4096, .enabled_ports = 0xff, + .arl_entries = 4, .cpu_port = B53_CPU_PORT, .vta_regs = B53_VTA_REGS, .duplex_reg = B53_DUPLEX_STAT_GE, -- cgit v1.2.3 From 153711f9421be5dbc973dc57a4109dc9d54c89b1 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 20 Jul 2017 11:27:57 -0700 Subject: rtnetlink: allocate more memory for dev_set_mac_address() virtnet_set_mac_address() interprets mac address as struct sockaddr, but upper layer only allocates dev->addr_len which is ETH_ALEN + sizeof(sa_family_t) in this case. We lack a unified definition for mac address, so just fix the upper layer, this also allows drivers to interpret it to struct sockaddr freely. Reported-by: David Ahern Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 11b25fbf3dd2..9201e3621351 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2031,7 +2031,8 @@ static int do_setlink(const struct sk_buff *skb, struct sockaddr *sa; int len; - len = sizeof(sa_family_t) + dev->addr_len; + len = sizeof(sa_family_t) + max_t(size_t, dev->addr_len, + sizeof(*sa)); sa = kmalloc(len, GFP_KERNEL); if (!sa) { err = -ENOMEM; -- cgit v1.2.3 From 8799a221f5944a7d74516ecf46d58c28ec1d1f75 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Wed, 19 Jul 2017 15:41:33 -0700 Subject: ipv4: initialize fib_trie prior to register_netdev_notifier call. Net stack initialization currently initializes fib-trie after the first call to netdevice_notifier() call. In fact fib_trie initialization needs to happen before first rtnl_register(). It does not cause any problem since there are no devices UP at this moment, but trying to bring 'lo' UP at initialization would make this assumption wrong and exposes the issue. Fixes following crash Call Trace: ? alternate_node_alloc+0x76/0xa0 fib_table_insert+0x1b7/0x4b0 fib_magic.isra.17+0xea/0x120 fib_add_ifaddr+0x7b/0x190 fib_netdev_event+0xc0/0x130 register_netdevice_notifier+0x1c1/0x1d0 ip_fib_init+0x72/0x85 ip_rt_init+0x187/0x1e9 ip_init+0xe/0x1a inet_init+0x171/0x26c ? ipv4_offload_init+0x66/0x66 do_one_initcall+0x43/0x160 kernel_init_freeable+0x191/0x219 ? rest_init+0x80/0x80 kernel_init+0xe/0x150 ret_from_fork+0x22/0x30 Code: f6 46 23 04 74 86 4c 89 f7 e8 ae 45 01 00 49 89 c7 4d 85 ff 0f 85 7b ff ff ff 31 db eb 08 4c 89 ff e8 16 47 01 00 48 8b 44 24 38 <45> 8b 6e 14 4d 63 76 74 48 89 04 24 0f 1f 44 00 00 48 83 c4 08 RIP: kmem_cache_alloc+0xcf/0x1c0 RSP: ffff9b1500017c28 CR2: 0000000000000014 Fixes: 7b1a74fdbb9e ("[NETNS]: Refactor fib initialization so it can handle multiple namespaces.") Fixes: 7f9b80529b8a ("[IPV4]: fib hash|trie initialization") Signed-off-by: Mahesh Bandewar Acked-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 4e678fa892dd..044d2a159a3c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1334,13 +1334,14 @@ static struct pernet_operations fib_net_ops = { void __init ip_fib_init(void) { - rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL); - rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL); - rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL); + fib_trie_init(); register_pernet_subsys(&fib_net_ops); + register_netdevice_notifier(&fib_netdev_notifier); register_inetaddr_notifier(&fib_inetaddr_notifier); - fib_trie_init(); + rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL); + rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL); + rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL); } -- cgit v1.2.3 From 070f9c658a59f9a736b1c040001d37b0952e778e Mon Sep 17 00:00:00 2001 From: Keerthy Date: Thu, 20 Jul 2017 16:59:52 +0530 Subject: net: ethernet: ti: cpsw: Push the request_irq function to the end of probe Push the request_irq function to the end of probe so as to ensure all the required fields are populated in the event of an ISR getting executed right after requesting the irq. Currently while loading the crash kernel a crash was seen as soon as devm_request_threaded_irq was called. This was due to n->poll being NULL which is called as part of net_rx_action function. Suggested-by: Sekhar Nori Signed-off-by: Keerthy Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 49 +++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 1850e348f555..badd0a8caeb9 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -3089,6 +3089,31 @@ static int cpsw_probe(struct platform_device *pdev) cpsw->quirk_irq = true; } + ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; + + ndev->netdev_ops = &cpsw_netdev_ops; + ndev->ethtool_ops = &cpsw_ethtool_ops; + netif_napi_add(ndev, &cpsw->napi_rx, cpsw_rx_poll, CPSW_POLL_WEIGHT); + netif_tx_napi_add(ndev, &cpsw->napi_tx, cpsw_tx_poll, CPSW_POLL_WEIGHT); + cpsw_split_res(ndev); + + /* register the network device */ + SET_NETDEV_DEV(ndev, &pdev->dev); + ret = register_netdev(ndev); + if (ret) { + dev_err(priv->dev, "error registering net device\n"); + ret = -ENODEV; + goto clean_ale_ret; + } + + if (cpsw->data.dual_emac) { + ret = cpsw_probe_dual_emac(priv); + if (ret) { + cpsw_err(priv, probe, "error probe slave 2 emac interface\n"); + goto clean_unregister_netdev_ret; + } + } + /* Grab RX and TX IRQs. Note that we also have RX_THRESHOLD and * MISC IRQs which are always kept disabled with this driver so * we will not request them. @@ -3127,33 +3152,9 @@ static int cpsw_probe(struct platform_device *pdev) goto clean_ale_ret; } - ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; - - ndev->netdev_ops = &cpsw_netdev_ops; - ndev->ethtool_ops = &cpsw_ethtool_ops; - netif_napi_add(ndev, &cpsw->napi_rx, cpsw_rx_poll, CPSW_POLL_WEIGHT); - netif_tx_napi_add(ndev, &cpsw->napi_tx, cpsw_tx_poll, CPSW_POLL_WEIGHT); - cpsw_split_res(ndev); - - /* register the network device */ - SET_NETDEV_DEV(ndev, &pdev->dev); - ret = register_netdev(ndev); - if (ret) { - dev_err(priv->dev, "error registering net device\n"); - ret = -ENODEV; - goto clean_ale_ret; - } - cpsw_notice(priv, probe, "initialized device (regs %pa, irq %d, pool size %d)\n", &ss_res->start, ndev->irq, dma_params.descs_pool_size); - if (cpsw->data.dual_emac) { - ret = cpsw_probe_dual_emac(priv); - if (ret) { - cpsw_err(priv, probe, "error probe slave 2 emac interface\n"); - goto clean_unregister_netdev_ret; - } - } pm_runtime_put(&pdev->dev); -- cgit v1.2.3 From e623a48ee433985f6ca0fb238f0002cc2eccdf53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20Bugge?= Date: Thu, 20 Jul 2017 12:28:55 +0200 Subject: rds: Make sure updates to cp_send_gen can be observed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cp->cp_send_gen is treated as a normal variable, although it may be used by different threads. This is fixed by using {READ,WRITE}_ONCE when it is incremented and READ_ONCE when it is read outside the {acquire,release}_in_xmit protection. Normative reference from the Linux-Kernel Memory Model: Loads from and stores to shared (but non-atomic) variables should be protected with the READ_ONCE(), WRITE_ONCE(), and ACCESS_ONCE(). Clause 5.1.2.4/25 in the C standard is also relevant. Signed-off-by: Håkon Bugge Reviewed-by: Knut Omang Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/send.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/rds/send.c b/net/rds/send.c index e81aa176f4e2..41b9f0f5bb9c 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -170,8 +170,8 @@ restart: * The acquire_in_xmit() check above ensures that only one * caller can increment c_send_gen at any time. */ - cp->cp_send_gen++; - send_gen = cp->cp_send_gen; + send_gen = READ_ONCE(cp->cp_send_gen) + 1; + WRITE_ONCE(cp->cp_send_gen, send_gen); /* * rds_conn_shutdown() sets the conn state and then tests RDS_IN_XMIT, @@ -431,7 +431,7 @@ over_batch: smp_mb(); if ((test_bit(0, &conn->c_map_queued) || !list_empty(&cp->cp_send_queue)) && - send_gen == cp->cp_send_gen) { + send_gen == READ_ONCE(cp->cp_send_gen)) { rds_stats_inc(s_send_lock_queue_raced); if (batch_count < send_batch_count) goto restart; -- cgit v1.2.3 From cbf5ecb305601d063dc94a57680dfbc3f96c188d Mon Sep 17 00:00:00 2001 From: Kosuke Tatsukawa Date: Thu, 20 Jul 2017 05:20:40 +0000 Subject: net: bonding: Fix transmit load balancing in balance-alb mode balance-alb mode used to have transmit dynamic load balancing feature enabled by default. However, transmit dynamic load balancing no longer works in balance-alb after commit 8b426dc54cf4 ("bonding: remove hardcoded value"). Both balance-tlb and balance-alb use the function bond_do_alb_xmit() to send packets. This function uses the parameter tlb_dynamic_lb. tlb_dynamic_lb used to have the default value of 1 for balance-alb, but now the value is set to 0 except in balance-tlb. Re-enable transmit dyanmic load balancing by initializing tlb_dynamic_lb for balance-alb similar to balance-tlb. Fixes: 8b426dc54cf4 ("bonding: remove hardcoded value") Signed-off-by: Kosuke Tatsukawa Acked-by: Andy Gospodarek Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 14ff622190a5..181839d6fbea 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4596,7 +4596,7 @@ static int bond_check_params(struct bond_params *params) } ad_user_port_key = valptr->value; - if (bond_mode == BOND_MODE_TLB) { + if ((bond_mode == BOND_MODE_TLB) || (bond_mode == BOND_MODE_ALB)) { bond_opt_initstr(&newval, "default"); valptr = bond_opt_parse(bond_opt_get(BOND_OPT_TLB_DYNAMIC_LB), &newval); -- cgit v1.2.3