summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/net/meson-dwmac.txt1
-rw-r--r--Documentation/devicetree/bindings/net/microchip,lan78xx.txt54
-rw-r--r--Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt19
-rw-r--r--Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt9
-rw-r--r--Documentation/filesystems/nfs/nfsroot.txt70
-rw-r--r--Documentation/networking/ip-sysctl.txt13
-rw-r--r--Documentation/networking/netdev-features.txt7
-rw-r--r--MAINTAINERS22
-rw-r--r--drivers/connector/cn_proc.c4
-rw-r--r--drivers/dca/dca-core.c2
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c21
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h2
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c2
-rw-r--r--drivers/net/Kconfig1
-rw-r--r--drivers/net/dsa/b53/b53_common.c81
-rw-r--r--drivers/net/dsa/b53/b53_priv.h6
-rw-r--r--drivers/net/dsa/bcm_sf2.c1
-rw-r--r--drivers/net/dsa/dsa_loop.c12
-rw-r--r--drivers/net/dsa/lan9303-core.c11
-rw-r--r--drivers/net/dsa/microchip/ksz_common.c11
-rw-r--r--drivers/net/dsa/mt7530.c11
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c63
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.h4
-rw-r--r--drivers/net/dsa/mv88e6xxx/global2.c62
-rw-r--r--drivers/net/dsa/mv88e6xxx/global2.h25
-rw-r--r--drivers/net/dsa/qca8k.c10
-rw-r--r--drivers/net/ethernet/8390/Kconfig17
-rw-r--r--drivers/net/ethernet/8390/Makefile1
-rw-r--r--drivers/net/ethernet/8390/ax88796.c228
-rw-r--r--drivers/net/ethernet/8390/xsurf100.c382
-rw-r--r--drivers/net/ethernet/amd/amd8111e.c16
-rw-r--r--drivers/net/ethernet/broadcom/bcmsysport.c6
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/Makefile1
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c147
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h9
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c166
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c124
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.h23
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c8
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c40
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c19
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h17
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c2
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.c6
-rw-r--r--drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c60
-rw-r--r--drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h14
-rw-r--r--drivers/net/ethernet/cavium/liquidio/lio_core.c314
-rw-r--r--drivers/net/ethernet/cavium/liquidio/lio_ethtool.c532
-rw-r--r--drivers/net/ethernet/cavium/liquidio/lio_main.c350
-rw-r--r--drivers/net/ethernet/cavium/liquidio/lio_vf_main.c254
-rw-r--r--drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c15
-rw-r--r--drivers/net/ethernet/cavium/liquidio/liquidio_common.h81
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_device.c12
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_device.h2
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_iq.h4
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c52
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_mailbox.h7
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_network.h62
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_main.c2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c3
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/srq.c3
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h3
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h12
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c16
-rw-r--r--drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h15
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_port.c8
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c19
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c167
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h20
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c7
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c7
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c5
-rw-r--r--drivers/net/ethernet/huawei/hinic/Kconfig2
-rw-r--r--drivers/net/ethernet/intel/e100.c28
-rw-r--r--drivers/net/ethernet/intel/e1000/Makefile26
-rw-r--r--drivers/net/ethernet/intel/e1000/e1000.h29
-rw-r--r--drivers/net/ethernet/intel/e1000/e1000_ethtool.c23
-rw-r--r--drivers/net/ethernet/intel/e1000/e1000_hw.c28
-rw-r--r--drivers/net/ethernet/intel/e1000/e1000_hw.h28
-rw-r--r--drivers/net/ethernet/intel/e1000/e1000_main.c28
-rw-r--r--drivers/net/ethernet/intel/e1000/e1000_osdep.h29
-rw-r--r--drivers/net/ethernet/intel/e1000/e1000_param.c28
-rw-r--r--drivers/net/ethernet/intel/e1000e/80003es2lan.c21
-rw-r--r--drivers/net/ethernet/intel/e1000e/80003es2lan.h21
-rw-r--r--drivers/net/ethernet/intel/e1000e/82571.c21
-rw-r--r--drivers/net/ethernet/intel/e1000e/82571.h21
-rw-r--r--drivers/net/ethernet/intel/e1000e/Makefile27
-rw-r--r--drivers/net/ethernet/intel/e1000e/defines.h21
-rw-r--r--drivers/net/ethernet/intel/e1000e/e1000.h21
-rw-r--r--drivers/net/ethernet/intel/e1000e/ethtool.c21
-rw-r--r--drivers/net/ethernet/intel/e1000e/hw.h21
-rw-r--r--drivers/net/ethernet/intel/e1000e/ich8lan.c21
-rw-r--r--drivers/net/ethernet/intel/e1000e/ich8lan.h21
-rw-r--r--drivers/net/ethernet/intel/e1000e/mac.c21
-rw-r--r--drivers/net/ethernet/intel/e1000e/mac.h21
-rw-r--r--drivers/net/ethernet/intel/e1000e/manage.c21
-rw-r--r--drivers/net/ethernet/intel/e1000e/manage.h21
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c21
-rw-r--r--drivers/net/ethernet/intel/e1000e/nvm.c21
-rw-r--r--drivers/net/ethernet/intel/e1000e/nvm.h21
-rw-r--r--drivers/net/ethernet/intel/e1000e/param.c21
-rw-r--r--drivers/net/ethernet/intel/e1000e/phy.c21
-rw-r--r--drivers/net/ethernet/intel/e1000e/phy.h21
-rw-r--r--drivers/net/ethernet/intel/e1000e/ptp.c21
-rw-r--r--drivers/net/ethernet/intel/e1000e/regs.h21
-rw-r--r--drivers/net/ethernet/intel/fm10k/Makefile23
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k.h20
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_common.c20
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_common.h20
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c20
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c20
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c20
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_iov.c20
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_main.c27
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_mbx.c20
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_mbx.h20
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_netdev.c32
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_pci.c20
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_pf.c20
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_pf.h20
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_tlv.c20
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_tlv.h20
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_type.h20
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_vf.c20
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_vf.h20
-rw-r--r--drivers/net/ethernet/intel/i40e/Makefile26
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e.h33
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_adminq.c26
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_adminq.h26
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h26
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_alloc.h26
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_client.c32
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_client.h26
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_common.c63
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_dcb.c117
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_dcb.h26
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c37
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_debugfs.c34
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_devids.h26
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_diag.c26
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_diag.h26
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c54
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_hmc.c27
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_hmc.h26
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c26
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h26
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c160
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_nvm.c27
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_osdep.h26
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_prototype.h26
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ptp.c71
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_register.h26
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_status.h26
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_trace.h23
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c65
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.h29
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_type.h34
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c111
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h26
-rw-r--r--drivers/net/ethernet/intel/i40evf/Makefile26
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_adminq.c26
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_adminq.h26
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h26
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_alloc.h26
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_common.c27
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_devids.h26
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_hmc.h26
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h26
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_osdep.h26
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_prototype.h26
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_register.h26
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_status.h26
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_trace.h23
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_txrx.c30
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_txrx.h26
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_type.h36
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf.h26
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_client.c6
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_client.h2
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c33
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_main.c51
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c37
-rw-r--r--drivers/net/ethernet/intel/igb/Makefile28
-rw-r--r--drivers/net/ethernet/intel/igb/e1000_82575.c23
-rw-r--r--drivers/net/ethernet/intel/igb/e1000_82575.h23
-rw-r--r--drivers/net/ethernet/intel/igb/e1000_defines.h25
-rw-r--r--drivers/net/ethernet/intel/igb/e1000_hw.h22
-rw-r--r--drivers/net/ethernet/intel/igb/e1000_i210.c23
-rw-r--r--drivers/net/ethernet/intel/igb/e1000_i210.h23
-rw-r--r--drivers/net/ethernet/intel/igb/e1000_mac.c23
-rw-r--r--drivers/net/ethernet/intel/igb/e1000_mac.h23
-rw-r--r--drivers/net/ethernet/intel/igb/e1000_mbx.c23
-rw-r--r--drivers/net/ethernet/intel/igb/e1000_mbx.h23
-rw-r--r--drivers/net/ethernet/intel/igb/e1000_nvm.c22
-rw-r--r--drivers/net/ethernet/intel/igb/e1000_nvm.h23
-rw-r--r--drivers/net/ethernet/intel/igb/e1000_phy.c23
-rw-r--r--drivers/net/ethernet/intel/igb/e1000_phy.h23
-rw-r--r--drivers/net/ethernet/intel/igb/e1000_regs.h23
-rw-r--r--drivers/net/ethernet/intel/igb/igb.h36
-rw-r--r--drivers/net/ethernet/intel/igb/igb_ethtool.c96
-rw-r--r--drivers/net/ethernet/intel/igb/igb_hwmon.c23
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c389
-rw-r--r--drivers/net/ethernet/intel/igb/igb_ptp.c19
-rw-r--r--drivers/net/ethernet/intel/igbvf/Makefile28
-rw-r--r--drivers/net/ethernet/intel/igbvf/defines.h26
-rw-r--r--drivers/net/ethernet/intel/igbvf/ethtool.c26
-rw-r--r--drivers/net/ethernet/intel/igbvf/igbvf.h26
-rw-r--r--drivers/net/ethernet/intel/igbvf/mbx.c26
-rw-r--r--drivers/net/ethernet/intel/igbvf/mbx.h26
-rw-r--r--drivers/net/ethernet/intel/igbvf/netdev.c26
-rw-r--r--drivers/net/ethernet/intel/igbvf/regs.h26
-rw-r--r--drivers/net/ethernet/intel/igbvf/vf.c26
-rw-r--r--drivers/net/ethernet/intel/igbvf/vf.h26
-rw-r--r--drivers/net/ethernet/intel/ixgb/Makefile27
-rw-r--r--drivers/net/ethernet/intel/ixgb/ixgb.h28
-rw-r--r--drivers/net/ethernet/intel/ixgb/ixgb_ee.c29
-rw-r--r--drivers/net/ethernet/intel/ixgb/ixgb_ee.h28
-rw-r--r--drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c29
-rw-r--r--drivers/net/ethernet/intel/ixgb/ixgb_hw.c29
-rw-r--r--drivers/net/ethernet/intel/ixgb/ixgb_hw.h28
-rw-r--r--drivers/net/ethernet/intel/ixgb/ixgb_ids.h28
-rw-r--r--drivers/net/ethernet/intel/ixgb/ixgb_main.c29
-rw-r--r--drivers/net/ethernet/intel/ixgb/ixgb_osdep.h28
-rw-r--r--drivers/net/ethernet/intel/ixgb/ixgb_param.c29
-rw-r--r--drivers/net/ethernet/intel/ixgbe/Makefile29
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe.h32
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c28
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c28
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_common.c28
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_common.h28
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c30
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h28
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c28
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.h28
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c29
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h28
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c29
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c28
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c29
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c29
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h28
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c28
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h27
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c29
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c373
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c29
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h28
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_model.h26
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c29
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h28
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c28
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c34
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h28
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_sysfs.c29
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_type.h28
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c29
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h24
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c26
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/Makefile28
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/defines.h26
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ethtool.c27
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ixgbevf.h26
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c27
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/mbx.c27
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/mbx.h26
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/regs.h26
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/vf.c27
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/vf.h26
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_tx.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Kconfig12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c71
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h86
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h72
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c197
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h87
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c278
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h50
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c89
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_dim.c28
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c35
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c125
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c42
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c32
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.h9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c37
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c37
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c562
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h68
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/cmd.h31
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/pci.c148
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/pci_hw.h74
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/resources.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c50
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c173
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c95
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c76
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.h43
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/jit.c231
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.h6
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/main.h9
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/metadata.c20
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/offload.c50
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_main.c5
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h2
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c1
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h2
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c45
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c59
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_debug.c118
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_l2.c19
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_main.c9
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_mcp.c18
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_mcp.h16
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_sriov.c247
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_vf.c29
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_vf.h21
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_filter.c7
-rw-r--r--drivers/net/ethernet/realtek/r8169.c1133
-rw-r--r--drivers/net/ethernet/renesas/sh_eth.c34
-rw-r--r--drivers/net/ethernet/rocker/rocker_main.c2
-rw-r--r--drivers/net/ethernet/sfc/efx.c36
-rw-r--r--drivers/net/ethernet/socionext/Kconfig2
-rw-r--r--drivers/net/ethernet/socionext/netsec.c27
-rw-r--r--drivers/net/ethernet/socionext/sni_ave.c252
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/Makefile3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/chain_mode.c34
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/common.h229
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c120
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac1000.h1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c29
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c23
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4.h1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c41
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac5.c19
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac5.h6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/enh_desc.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/hwif.c220
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/hwif.h438
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/norm_desc.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/ring_mode.c39
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac.h1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c82
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c34
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c522
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c18
-rw-r--r--drivers/net/ethernet/ti/netcp.h3
-rw-r--r--drivers/net/ethernet/ti/netcp_core.c32
-rw-r--r--drivers/net/ethernet/ti/netcp_ethss.c181
-rw-r--r--drivers/net/geneve.c72
-rw-r--r--drivers/net/hamradio/mkiss.c2
-rw-r--r--drivers/net/hyperv/Kconfig1
-rw-r--r--drivers/net/hyperv/hyperv_net.h164
-rw-r--r--drivers/net/hyperv/netvsc.c61
-rw-r--r--drivers/net/hyperv/rndis_filter.c28
-rw-r--r--drivers/net/macvlan.c68
-rw-r--r--drivers/net/phy/Kconfig6
-rw-r--r--drivers/net/phy/Makefile1
-rw-r--r--drivers/net/phy/asix.c63
-rw-r--r--drivers/net/phy/bcm-phy-lib.c6
-rw-r--r--drivers/net/phy/marvell.c5
-rw-r--r--drivers/net/phy/mdio-bitbang.c9
-rw-r--r--drivers/net/phy/mdio-boardinfo.c5
-rw-r--r--drivers/net/phy/mdio-gpio.c122
-rw-r--r--drivers/net/phy/micrel.c5
-rw-r--r--drivers/net/phy/microchip.c25
-rw-r--r--drivers/net/phy/smsc.c5
-rw-r--r--drivers/net/team/team.c2
-rw-r--r--drivers/net/tun.c78
-rw-r--r--drivers/net/usb/Kconfig1
-rw-r--r--drivers/net/usb/lan78xx.c238
-rw-r--r--drivers/net/virtio_net.c74
-rw-r--r--drivers/net/vrf.c25
-rw-r--r--drivers/net/vxlan.c17
-rw-r--r--drivers/ptp/ptp_pch.c7
-rw-r--r--drivers/s390/net/lcs.c3
-rw-r--r--drivers/s390/net/qeth_core.h61
-rw-r--r--drivers/s390/net/qeth_core_main.c148
-rw-r--r--drivers/s390/net/qeth_core_mpc.h2
-rw-r--r--drivers/s390/net/qeth_core_sys.c2
-rw-r--r--drivers/s390/net/qeth_l2_main.c98
-rw-r--r--drivers/s390/net/qeth_l3_main.c207
-rw-r--r--drivers/soc/ti/knav_dma.c8
-rw-r--r--drivers/soc/ti/knav_qmss.h6
-rw-r--r--drivers/soc/ti/knav_qmss_queue.c98
-rw-r--r--drivers/vhost/net.c19
-rw-r--r--include/dt-bindings/net/microchip-lan78xx.h21
-rw-r--r--include/linux/bpf.h20
-rw-r--r--include/linux/btf.h48
-rw-r--r--include/linux/ethtool.h5
-rw-r--r--include/linux/filter.h24
-rw-r--r--include/linux/if_bridge.h28
-rw-r--r--include/linux/if_macvlan.h29
-rw-r--r--include/linux/if_tun.h4
-rw-r--r--include/linux/mdio-bitbang.h2
-rw-r--r--include/linux/mdio-gpio.h9
-rw-r--r--include/linux/microchipphy.h3
-rw-r--r--include/linux/mlx5/mlx5_ifc.h16
-rw-r--r--include/linux/mlx5/mlx5_ifc_fpga.h77
-rw-r--r--include/linux/net.h1
-rw-r--r--include/linux/net_dim.h69
-rw-r--r--include/linux/netdev_features.h7
-rw-r--r--include/linux/netdevice.h42
-rw-r--r--include/linux/phy.h46
-rw-r--r--include/linux/platform_data/mdio-gpio.h33
-rw-r--r--include/linux/qed/qed_eth_if.h1
-rw-r--r--include/linux/qed/qed_if.h3
-rw-r--r--include/linux/rhashtable.h38
-rw-r--r--include/linux/skbuff.h8
-rw-r--r--include/linux/soc/ti/knav_dma.h12
-rw-r--r--include/linux/soc/ti/knav_qmss.h1
-rw-r--r--include/linux/tcp.h3
-rw-r--r--include/linux/udp.h3
-rw-r--r--include/net/addrconf.h14
-rw-r--r--include/net/ax88796.h14
-rw-r--r--include/net/dsa.h12
-rw-r--r--include/net/fib_rules.h3
-rw-r--r--include/net/if_inet6.h5
-rw-r--r--include/net/inet_connection_sock.h2
-rw-r--r--include/net/inet_sock.h1
-rw-r--r--include/net/ip.h6
-rw-r--r--include/net/ip6_fib.h180
-rw-r--r--include/net/ip6_route.h59
-rw-r--r--include/net/ip_tunnels.h11
-rw-r--r--include/net/ipv6.h4
-rw-r--r--include/net/neighbour.h19
-rw-r--r--include/net/netns/ipv6.h4
-rw-r--r--include/net/page_pool.h143
-rw-r--r--include/net/sctp/constants.h5
-rw-r--r--include/net/sctp/sctp.h52
-rw-r--r--include/net/sctp/sm.h4
-rw-r--r--include/net/sctp/structs.h6
-rw-r--r--include/net/sock.h21
-rw-r--r--include/net/switchdev.h1
-rw-r--r--include/net/tcp.h12
-rw-r--r--include/net/tls.h120
-rw-r--r--include/net/udp.h5
-rw-r--r--include/net/vxlan.h1
-rw-r--r--include/net/xdp.h83
-rw-r--r--include/trace/events/tcp.h69
-rw-r--r--include/uapi/linux/bpf.h1790
-rw-r--r--include/uapi/linux/btf.h128
-rw-r--r--include/uapi/linux/cn_proc.h4
-rw-r--r--include/uapi/linux/if_link.h1
-rw-r--r--include/uapi/linux/pci_regs.h2
-rw-r--r--include/uapi/linux/snmp.h2
-rw-r--r--include/uapi/linux/tcp.h16
-rw-r--r--include/uapi/linux/tipc.h12
-rw-r--r--include/uapi/linux/tipc_config.h5
-rw-r--r--include/uapi/linux/tipc_netlink.h1
-rw-r--r--include/uapi/linux/udp.h1
-rw-r--r--kernel/bpf/Makefile1
-rw-r--r--kernel/bpf/arraymap.c50
-rw-r--r--kernel/bpf/btf.c2064
-rw-r--r--kernel/bpf/cpumap.c132
-rw-r--r--kernel/bpf/inode.c156
-rw-r--r--kernel/bpf/syscall.c52
-rw-r--r--kernel/bpf/verifier.c24
-rw-r--r--lib/kobject_uevent.c178
-rw-r--r--lib/rhashtable.c51
-rw-r--r--net/Kconfig6
-rw-r--r--net/bpf/test_run.c3
-rw-r--r--net/bridge/br.c16
-rw-r--r--net/bridge/br_fdb.c69
-rw-r--r--net/bridge/br_forward.c3
-rw-r--r--net/bridge/br_if.c11
-rw-r--r--net/bridge/br_private.h19
-rw-r--r--net/bridge/br_switchdev.c12
-rw-r--r--net/bridge/br_vlan.c39
-rw-r--r--net/core/Makefile1
-rw-r--r--net/core/dev.c31
-rw-r--r--net/core/dst.c1
-rw-r--r--net/core/ethtool.c63
-rw-r--r--net/core/fib_rules.c495
-rw-r--r--net/core/filter.c100
-rw-r--r--net/core/neighbour.c8
-rw-r--r--net/core/page_pool.c317
-rw-r--r--net/core/rtnetlink.c8
-rw-r--r--net/core/skbuff.c25
-rw-r--r--net/core/sock.c5
-rw-r--r--net/core/xdp.c269
-rw-r--r--net/decnet/dn_rules.c7
-rw-r--r--net/dsa/master.c62
-rw-r--r--net/dsa/port.c90
-rw-r--r--net/dsa/slave.c10
-rw-r--r--net/ipv4/Makefile3
-rw-r--r--net/ipv4/af_inet.c5
-rw-r--r--net/ipv4/fib_rules.c7
-rw-r--r--net/ipv4/fib_semantics.c43
-rw-r--r--net/ipv4/ip_gre.c6
-rw-r--r--net/ipv4/ip_output.c41
-rw-r--r--net/ipv4/ipconfig.c151
-rw-r--r--net/ipv4/ipmr.c3
-rw-r--r--net/ipv4/metrics.c53
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/tcp.c203
-rw-r--r--net/ipv4/tcp_input.c82
-rw-r--r--net/ipv4/tcp_output.c34
-rw-r--r--net/ipv4/udp.c85
-rw-r--r--net/ipv4/udp_offload.c67
-rw-r--r--net/ipv6/addrconf.c190
-rw-r--r--net/ipv6/af_inet6.c58
-rw-r--r--net/ipv6/anycast.c33
-rw-r--r--net/ipv6/exthdrs.c55
-rw-r--r--net/ipv6/fib6_rules.c7
-rw-r--r--net/ipv6/ip6_fib.c491
-rw-r--r--net/ipv6/ip6_gre.c8
-rw-r--r--net/ipv6/ip6_input.c2
-rw-r--r--net/ipv6/ip6_offload.c6
-rw-r--r--net/ipv6/ip6_output.c71
-rw-r--r--net/ipv6/ip6mr.c3
-rw-r--r--net/ipv6/ndisc.c42
-rw-r--r--net/ipv6/reassembly.c25
-rw-r--r--net/ipv6/route.c1736
-rw-r--r--net/ipv6/seg6_iptunnel.c24
-rw-r--r--net/ipv6/sysctl_net_ipv6.c8
-rw-r--r--net/ipv6/udp.c35
-rw-r--r--net/ipv6/udp_offload.c19
-rw-r--r--net/ipv6/xfrm6_policy.c2
-rw-r--r--net/l2tp/l2tp_debugfs.c20
-rw-r--r--net/l2tp/l2tp_ppp.c21
-rw-r--r--net/ncsi/internal.h34
-rw-r--r--net/ncsi/ncsi-manage.c226
-rw-r--r--net/ncsi/ncsi-netlink.c20
-rw-r--r--net/ncsi/ncsi-rsp.c178
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c5
-rw-r--r--net/qrtr/Kconfig7
-rw-r--r--net/qrtr/Makefile2
-rw-r--r--net/qrtr/tun.c161
-rw-r--r--net/sched/act_csum.c6
-rw-r--r--net/sched/cls_flower.c275
-rw-r--r--net/sctp/associola.c85
-rw-r--r--net/sctp/chunk.c12
-rw-r--r--net/sctp/output.c28
-rw-r--r--net/sctp/outqueue.c48
-rw-r--r--net/sctp/sm_make_chunk.c143
-rw-r--r--net/sctp/socket.c43
-rw-r--r--net/sctp/transport.c37
-rw-r--r--net/smc/af_smc.c137
-rw-r--r--net/smc/smc.h4
-rw-r--r--net/smc/smc_cdc.c2
-rw-r--r--net/smc/smc_cdc.h2
-rw-r--r--net/smc/smc_core.c28
-rw-r--r--net/smc/smc_core.h4
-rw-r--r--net/smc/smc_diag.c39
-rw-r--r--net/smc/smc_llc.c62
-rw-r--r--net/smc/smc_llc.h3
-rw-r--r--net/smc/smc_rx.c2
-rw-r--r--net/smc/smc_rx.h1
-rw-r--r--net/smc/smc_tx.c24
-rw-r--r--net/smc/smc_wr.c1
-rw-r--r--net/tipc/bearer.c29
-rw-r--r--net/tipc/bearer.h3
-rw-r--r--net/tipc/node.c33
-rw-r--r--net/tipc/node.h3
-rw-r--r--net/tipc/socket.c13
-rw-r--r--net/tipc/udp_media.c4
-rw-r--r--net/tipc/udp_media.h14
-rw-r--r--net/tls/Kconfig10
-rw-r--r--net/tls/Makefile2
-rw-r--r--net/tls/tls_device.c764
-rw-r--r--net/tls/tls_device_fallback.c450
-rw-r--r--net/tls/tls_main.c143
-rw-r--r--net/tls/tls_sw.c141
-rw-r--r--samples/bpf/Makefile5
-rw-r--r--samples/bpf/bpf_load.c2
-rw-r--r--samples/bpf/sock_example.c4
-rwxr-xr-xsamples/bpf/test_tunnel_bpf.sh319
-rw-r--r--samples/bpf/xdp_adjust_tail_kern.c152
-rw-r--r--samples/bpf/xdp_adjust_tail_user.c142
-rw-r--r--samples/bpf/xdp_monitor_user.c2
-rw-r--r--samples/sockmap/Makefile78
-rwxr-xr-xsamples/sockmap/sockmap_test.sh488
-rwxr-xr-xscripts/bpf_helpers_doc.py421
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-cgroup.rst11
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-map.rst29
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst3
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool14
-rw-r--r--tools/bpf/bpftool/cgroup.c15
-rw-r--r--tools/bpf/bpftool/map.c17
-rw-r--r--tools/bpf/bpftool/prog.c6
-rw-r--r--tools/include/uapi/linux/bpf.h1790
-rw-r--r--tools/include/uapi/linux/btf.h128
-rw-r--r--tools/lib/bpf/Build2
-rw-r--r--tools/lib/bpf/bpf.c92
-rw-r--r--tools/lib/bpf/bpf.h17
-rw-r--r--tools/lib/bpf/btf.c374
-rw-r--r--tools/lib/bpf/btf.h22
-rw-r--r--tools/lib/bpf/libbpf.c160
-rw-r--r--tools/lib/bpf/libbpf.h7
-rw-r--r--tools/testing/selftests/bpf/.gitignore1
-rw-r--r--tools/testing/selftests/bpf/Makefile32
-rw-r--r--tools/testing/selftests/bpf/bpf_helpers.h7
-rw-r--r--tools/testing/selftests/bpf/test_adjust_tail.c30
-rw-r--r--tools/testing/selftests/bpf/test_btf.c1669
-rw-r--r--tools/testing/selftests/bpf/test_btf_haskv.c48
-rw-r--r--tools/testing/selftests/bpf/test_btf_nokv.c43
-rw-r--r--tools/testing/selftests/bpf/test_progs.c32
-rw-r--r--tools/testing/selftests/bpf/test_sockmap.c (renamed from samples/sockmap/sockmap_user.c)880
-rw-r--r--tools/testing/selftests/bpf/test_sockmap_kern.c (renamed from samples/sockmap/sockmap_kern.c)35
-rwxr-xr-xtools/testing/selftests/bpf/test_tunnel.sh729
-rw-r--r--tools/testing/selftests/bpf/test_tunnel_kern.c (renamed from samples/bpf/tcbpf2_kern.c)263
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c266
-rw-r--r--tools/testing/selftests/net/.gitignore4
-rw-r--r--tools/testing/selftests/net/Makefile7
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_aware.sh26
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh26
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh133
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre.sh161
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bound.sh226
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_changes.sh212
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_flower.sh129
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_gre_lib.sh85
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_neigh.sh115
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_nh.sh127
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh129
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_lib.sh40
-rwxr-xr-xtools/testing/selftests/net/forwarding/router.sh14
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_multipath.sh29
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_actions.sh25
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_chains.sh7
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower.sh80
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_shblocks.sh5
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh4
-rw-r--r--tools/testing/selftests/net/tcp_inq.c189
-rw-r--r--tools/testing/selftests/net/tcp_mmap.c447
-rw-r--r--tools/testing/selftests/net/udpgso.c620
-rwxr-xr-xtools/testing/selftests/net/udpgso.sh29
-rwxr-xr-xtools/testing/selftests/net/udpgso_bench.sh74
-rw-r--r--tools/testing/selftests/net/udpgso_bench_rx.c265
-rw-r--r--tools/testing/selftests/net/udpgso_bench_tx.c420
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/csum.json74
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/ife.json1036
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/sample.json588
647 files changed, 33114 insertions, 14101 deletions
diff --git a/Documentation/devicetree/bindings/net/meson-dwmac.txt b/Documentation/devicetree/bindings/net/meson-dwmac.txt
index 61cada22ae6c..1321bb194ed9 100644
--- a/Documentation/devicetree/bindings/net/meson-dwmac.txt
+++ b/Documentation/devicetree/bindings/net/meson-dwmac.txt
@@ -11,6 +11,7 @@ Required properties on all platforms:
- "amlogic,meson8b-dwmac"
- "amlogic,meson8m2-dwmac"
- "amlogic,meson-gxbb-dwmac"
+ - "amlogic,meson-axg-dwmac"
Additionally "snps,dwmac" and any applicable more
detailed version number described in net/stmmac.txt
should be used.
diff --git a/Documentation/devicetree/bindings/net/microchip,lan78xx.txt b/Documentation/devicetree/bindings/net/microchip,lan78xx.txt
new file mode 100644
index 000000000000..76786a0f6d3d
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/microchip,lan78xx.txt
@@ -0,0 +1,54 @@
+Microchip LAN78xx Gigabit Ethernet controller
+
+The LAN78XX devices are usually configured by programming their OTP or with
+an external EEPROM, but some platforms (e.g. Raspberry Pi 3 B+) have neither.
+The Device Tree properties, if present, override the OTP and EEPROM.
+
+Required properties:
+- compatible: Should be one of "usb424,7800", "usb424,7801" or "usb424,7850".
+
+Optional properties:
+- local-mac-address: see ethernet.txt
+- mac-address: see ethernet.txt
+
+Optional properties of the embedded PHY:
+- microchip,led-modes: a 0..4 element vector, with each element configuring
+ the operating mode of an LED. Omitted LEDs are turned off. Allowed values
+ are defined in "include/dt-bindings/net/microchip-lan78xx.h".
+
+Example:
+
+/* Based on the configuration for a Raspberry Pi 3 B+ */
+&usb {
+ usb-port@1 {
+ compatible = "usb424,2514";
+ reg = <1>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ usb-port@1 {
+ compatible = "usb424,2514";
+ reg = <1>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ethernet: ethernet@1 {
+ compatible = "usb424,7800";
+ reg = <1>;
+ local-mac-address = [ 00 11 22 33 44 55 ];
+
+ mdio {
+ #address-cells = <0x1>;
+ #size-cells = <0x0>;
+ eth_phy: ethernet-phy@1 {
+ reg = <1>;
+ microchip,led-modes = <
+ LAN78XX_LINK_1000_ACTIVITY
+ LAN78XX_LINK_10_100_ACTIVITY
+ >;
+ };
+ };
+ };
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt
index 96398cc2982f..fc8f01718690 100644
--- a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt
+++ b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt
@@ -13,13 +13,25 @@ Required properties:
- reg: Address where registers are mapped and size of region.
- interrupts: Should contain the MAC interrupt.
- phy-mode: See ethernet.txt in the same directory. Allow to choose
- "rgmii", "rmii", or "mii" according to the PHY.
+ "rgmii", "rmii", "mii", or "internal" according to the PHY.
+ The acceptable mode is SoC-dependent.
- phy-handle: Should point to the external phy device.
See ethernet.txt file in the same directory.
- clocks: A phandle to the clock for the MAC.
+ For Pro4 SoC, that is "socionext,uniphier-pro4-ave4",
+ another MAC clock, GIO bus clock and PHY clock are also required.
+ - clock-names: Should contain
+ - "ether", "ether-gb", "gio", "ether-phy" for Pro4 SoC
+ - "ether" for others
+ - resets: A phandle to the reset control for the MAC. For Pro4 SoC,
+ GIO bus reset is also required.
+ - reset-names: Should contain
+ - "ether", "gio" for Pro4 SoC
+ - "ether" for others
+ - socionext,syscon-phy-mode: A phandle to syscon with one argument
+ that configures phy mode. The argument is the ID of MAC instance.
Optional properties:
- - resets: A phandle to the reset control for the MAC.
- local-mac-address: See ethernet.txt in the same directory.
Required subnode:
@@ -34,8 +46,11 @@ Example:
interrupts = <0 66 4>;
phy-mode = "rgmii";
phy-handle = <&ethphy>;
+ clock-names = "ether";
clocks = <&sys_clk 6>;
+ reset-names = "ether";
resets = <&sys_rst 6>;
+ socionext,syscon-phy-mode = <&soc_glue 0>;
local-mac-address = [00 00 00 00 00 00];
mdio {
diff --git a/Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt b/Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt
index 77cd42cc5f54..b025770eeb92 100644
--- a/Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt
+++ b/Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt
@@ -17,7 +17,8 @@ pool management.
Required properties:
-- compatible : Must be "ti,keystone-navigator-qmss";
+- compatible : Must be "ti,keystone-navigator-qmss".
+ : Must be "ti,66ak2g-navss-qm" for QMSS on K2G SoC.
- clocks : phandle to the reference clock for this device.
- queue-range : <start number> total range of queue numbers for the device.
- linkram0 : <address size> for internal link ram, where size is the total
@@ -39,6 +40,12 @@ Required properties:
- Descriptor memory setup region.
- Queue Management/Queue Proxy region for queue Push.
- Queue Management/Queue Proxy region for queue Pop.
+
+For QMSS on K2G SoC, following QM reg indexes are used in that order
+ - Queue Peek region.
+ - Queue configuration region.
+ - Queue Management/Queue Proxy region for queue Push/Pop.
+
- queue-pools : child node classifying the queue ranges into pools.
Queue ranges are grouped into 3 type of pools:
- qpend : pool of qpend(interruptible) queues
diff --git a/Documentation/filesystems/nfs/nfsroot.txt b/Documentation/filesystems/nfs/nfsroot.txt
index 5efae00f6c7f..d2963123eb1c 100644
--- a/Documentation/filesystems/nfs/nfsroot.txt
+++ b/Documentation/filesystems/nfs/nfsroot.txt
@@ -5,6 +5,7 @@ Written 1996 by Gero Kuhlmann <gero@gkminix.han.de>
Updated 1997 by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
Updated 2006 by Nico Schottelius <nico-kernel-nfsroot@schottelius.org>
Updated 2006 by Horms <horms@verge.net.au>
+Updated 2018 by Chris Novakovic <chris@chrisn.me.uk>
@@ -79,7 +80,7 @@ nfsroot=[<server-ip>:]<root-dir>[,<nfs-options>]
ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>:
- <dns0-ip>:<dns1-ip>
+ <dns0-ip>:<dns1-ip>:<ntp0-ip>
This parameter tells the kernel how to configure IP addresses of devices
and also how to set up the IP routing table. It was originally called
@@ -110,6 +111,9 @@ ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>:
will not be triggered if it is missing and NFS root is not
in operation.
+ Value is exported to /proc/net/pnp with the prefix "bootserver "
+ (see below).
+
Default: Determined using autoconfiguration.
The address of the autoconfiguration server is used.
@@ -123,10 +127,13 @@ ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>:
Default: Determined using autoconfiguration.
- <hostname> Name of the client. May be supplied by autoconfiguration,
- but its absence will not trigger autoconfiguration.
- If specified and DHCP is used, the user provided hostname will
- be carried in the DHCP request to hopefully update DNS record.
+ <hostname> Name of the client. If a '.' character is present, anything
+ before the first '.' is used as the client's hostname, and anything
+ after it is used as its NIS domain name. May be supplied by
+ autoconfiguration, but its absence will not trigger autoconfiguration.
+ If specified and DHCP is used, the user-provided hostname (and NIS
+ domain name, if present) will be carried in the DHCP request; this
+ may cause a DNS record to be created or updated for the client.
Default: Client IP address is used in ASCII notation.
@@ -162,12 +169,55 @@ ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>:
Default: any
- <dns0-ip> IP address of first nameserver.
- Value gets exported by /proc/net/pnp which is often linked
- on embedded systems by /etc/resolv.conf.
+ <dns0-ip> IP address of primary nameserver.
+ Value is exported to /proc/net/pnp with the prefix "nameserver "
+ (see below).
+
+ Default: None if not using autoconfiguration; determined
+ automatically if using autoconfiguration.
+
+ <dns1-ip> IP address of secondary nameserver.
+ See <dns0-ip>.
+
+ <ntp0-ip> IP address of a Network Time Protocol (NTP) server.
+ Value is exported to /proc/net/ipconfig/ntp_servers, but is
+ otherwise unused (see below).
+
+ Default: None if not using autoconfiguration; determined
+ automatically if using autoconfiguration.
+
+ After configuration (whether manual or automatic) is complete, two files
+ are created in the following format; lines are omitted if their respective
+ value is empty following configuration:
+
+ - /proc/net/pnp:
+
+ #PROTO: <DHCP|BOOTP|RARP|MANUAL> (depending on configuration method)
+ domain <dns-domain> (if autoconfigured, the DNS domain)
+ nameserver <dns0-ip> (primary name server IP)
+ nameserver <dns1-ip> (secondary name server IP)
+ nameserver <dns2-ip> (tertiary name server IP)
+ bootserver <server-ip> (NFS server IP)
+
+ - /proc/net/ipconfig/ntp_servers:
+
+ <ntp0-ip> (NTP server IP)
+ <ntp1-ip> (NTP server IP)
+ <ntp2-ip> (NTP server IP)
+
+ <dns-domain> and <dns2-ip> (in /proc/net/pnp) and <ntp1-ip> and <ntp2-ip>
+ (in /proc/net/ipconfig/ntp_servers) are requested during autoconfiguration;
+ they cannot be specified as part of the "ip=" kernel command line parameter.
+
+ Because the "domain" and "nameserver" options are recognised by DNS
+ resolvers, /etc/resolv.conf is often linked to /proc/net/pnp on systems
+ that use an NFS root filesystem.
- <dns1-ip> IP address of second nameserver.
- Same as above.
+ Note that the kernel will not synchronise the system time with any NTP
+ servers it discovers; this is the responsibility of a user space process
+ (e.g. an initrd/initramfs script that passes the IP addresses listed in
+ /proc/net/ipconfig/ntp_servers to an NTP client before mounting the real
+ root filesystem if it is on NFS).
nfsrootdebug
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 35ffaa281b26..59afc9a10b4f 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1428,6 +1428,19 @@ ip6frag_low_thresh - INTEGER
ip6frag_time - INTEGER
Time in seconds to keep an IPv6 fragment in memory.
+IPv6 Segment Routing:
+
+seg6_flowlabel - INTEGER
+ Controls the behaviour of computing the flowlabel of outer
+ IPv6 header in case of SR T.encaps
+
+ -1 set flowlabel to zero.
+ 0 copy flowlabel from Inner packet in case of Inner IPv6
+ (Set flowlabel to 0 in case IPv4/L2)
+ 1 Compute the flowlabel using seg6_make_flowlabel()
+
+ Default is 0.
+
conf/default/*:
Change the interface-specific default settings.
diff --git a/Documentation/networking/netdev-features.txt b/Documentation/networking/netdev-features.txt
index c77f9d57eb91..c4a54c162547 100644
--- a/Documentation/networking/netdev-features.txt
+++ b/Documentation/networking/netdev-features.txt
@@ -113,6 +113,13 @@ whatever headers there might be.
NETIF_F_TSO_ECN means that hardware can properly split packets with CWR bit
set, be it TCPv4 (when NETIF_F_TSO is enabled) or TCPv6 (NETIF_F_TSO6).
+ * Transmit UDP segmentation offload
+
+NETIF_F_GSO_UDP_GSO_L4 accepts a single UDP header with a payload that exceeds
+gso_size. On segmentation, it segments the payload on gso_size boundaries and
+replicates the network and UDP headers (fixing up the last one if less than
+gso_size).
+
* Transmit DMA from high memory
On platforms where this is relevant, NETIF_F_HIGHDMA signals that
diff --git a/MAINTAINERS b/MAINTAINERS
index b1ccabd0dbc3..ebe0b9ed7805 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5328,7 +5328,6 @@ F: include/linux/*mdio*.h
F: include/linux/of_net.h
F: include/linux/phy.h
F: include/linux/phy_fixed.h
-F: include/linux/platform_data/mdio-gpio.h
F: include/linux/platform_data/mdio-bcm-unimac.h
F: include/trace/events/mdio.h
F: include/uapi/linux/mdio.h
@@ -9030,26 +9029,17 @@ W: http://www.mellanox.com
Q: http://patchwork.ozlabs.org/project/netdev/list/
F: drivers/net/ethernet/mellanox/mlx5/core/en_*
-MELLANOX ETHERNET INNOVA DRIVER
-M: Ilan Tayari <ilant@mellanox.com>
-R: Boris Pismenny <borisp@mellanox.com>
+MELLANOX ETHERNET INNOVA DRIVERS
+M: Boris Pismenny <borisp@mellanox.com>
L: netdev@vger.kernel.org
S: Supported
W: http://www.mellanox.com
Q: http://patchwork.ozlabs.org/project/netdev/list/
+F: drivers/net/ethernet/mellanox/mlx5/core/en_accel/*
+F: drivers/net/ethernet/mellanox/mlx5/core/accel/*
F: drivers/net/ethernet/mellanox/mlx5/core/fpga/*
F: include/linux/mlx5/mlx5_ifc_fpga.h
-MELLANOX ETHERNET INNOVA IPSEC DRIVER
-M: Ilan Tayari <ilant@mellanox.com>
-R: Boris Pismenny <borisp@mellanox.com>
-L: netdev@vger.kernel.org
-S: Supported
-W: http://www.mellanox.com
-Q: http://patchwork.ozlabs.org/project/netdev/list/
-F: drivers/net/ethernet/mellanox/mlx5/core/en_ipsec/*
-F: drivers/net/ethernet/mellanox/mlx5/core/ipsec*
-
MELLANOX ETHERNET SWITCH DRIVERS
M: Jiri Pirko <jiri@mellanox.com>
M: Ido Schimmel <idosch@mellanox.com>
@@ -9842,7 +9832,7 @@ F: net/netfilter/xt_CONNSECMARK.c
F: net/netfilter/xt_SECMARK.c
NETWORKING [TLS]
-M: Ilya Lesokhin <ilyal@mellanox.com>
+M: Boris Pismenny <borisp@mellanox.com>
M: Aviad Yehezkel <aviadye@mellanox.com>
M: Dave Watson <davejwatson@fb.com>
L: netdev@vger.kernel.org
@@ -14626,7 +14616,9 @@ M: Woojung Huh <woojung.huh@microchip.com>
M: Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
L: netdev@vger.kernel.org
S: Maintained
+F: Documentation/devicetree/bindings/net/microchip,lan78xx.txt
F: drivers/net/usb/lan78xx.*
+F: include/dt-bindings/net/microchip-lan78xx.h
USB MASS STORAGE DRIVER
M: Alan Stern <stern@rowland.harvard.edu>
diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index a782ce87715c..ed5e42461094 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -262,6 +262,8 @@ void proc_coredump_connector(struct task_struct *task)
ev->what = PROC_EVENT_COREDUMP;
ev->event_data.coredump.process_pid = task->pid;
ev->event_data.coredump.process_tgid = task->tgid;
+ ev->event_data.coredump.parent_pid = task->real_parent->pid;
+ ev->event_data.coredump.parent_tgid = task->real_parent->tgid;
memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
msg->ack = 0; /* not used */
@@ -288,6 +290,8 @@ void proc_exit_connector(struct task_struct *task)
ev->event_data.exit.process_tgid = task->tgid;
ev->event_data.exit.exit_code = task->exit_code;
ev->event_data.exit.exit_signal = task->exit_signal;
+ ev->event_data.exit.parent_pid = task->real_parent->pid;
+ ev->event_data.exit.parent_tgid = task->real_parent->tgid;
memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
msg->ack = 0; /* not used */
diff --git a/drivers/dca/dca-core.c b/drivers/dca/dca-core.c
index 7afbb28d6a0f..1bc5ffb338c8 100644
--- a/drivers/dca/dca-core.c
+++ b/drivers/dca/dca-core.c
@@ -270,7 +270,7 @@ EXPORT_SYMBOL_GPL(dca_remove_requester);
* @dev - the device that wants dca service
* @cpu - the cpuid as returned by get_cpu()
*/
-u8 dca_common_get_tag(struct device *dev, int cpu)
+static u8 dca_common_get_tag(struct device *dev, int cpu)
{
struct dca_provider *dca;
u8 tag;
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
index 4be3aef40bd2..267da8215e08 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
@@ -443,17 +443,16 @@ static u8 opa_vnic_get_rc(struct __opa_veswport_info *info,
}
/* opa_vnic_calc_entropy - calculate the packet entropy */
-u8 opa_vnic_calc_entropy(struct opa_vnic_adapter *adapter, struct sk_buff *skb)
+u8 opa_vnic_calc_entropy(struct sk_buff *skb)
{
- u16 hash16;
-
- /*
- * Get flow based 16-bit hash and then XOR the upper and lower bytes
- * to get the entropy.
- * __skb_tx_hash limits qcount to 16 bits. Hence, get 15-bit hash.
- */
- hash16 = __skb_tx_hash(adapter->netdev, skb, BIT(15));
- return (u8)((hash16 >> 8) ^ (hash16 & 0xff));
+ u32 hash = skb_get_hash(skb);
+
+ /* store XOR of all bytes in lower 8 bits */
+ hash ^= hash >> 8;
+ hash ^= hash >> 16;
+
+ /* return lower 8 bits as entropy */
+ return (u8)(hash & 0xFF);
}
/* opa_vnic_get_def_port - get default port based on entropy */
@@ -490,7 +489,7 @@ void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb)
hdr = skb_push(skb, OPA_VNIC_HDR_LEN);
- entropy = opa_vnic_calc_entropy(adapter, skb);
+ entropy = opa_vnic_calc_entropy(skb);
def_port = opa_vnic_get_def_port(adapter, entropy);
len = opa_vnic_wire_length(skb);
dlid = opa_vnic_get_dlid(adapter, skb, def_port);
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
index afd95f432262..43ac61ffef4a 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
@@ -299,7 +299,7 @@ struct opa_vnic_adapter *opa_vnic_add_netdev(struct ib_device *ibdev,
void opa_vnic_rem_netdev(struct opa_vnic_adapter *adapter);
void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb);
u8 opa_vnic_get_vl(struct opa_vnic_adapter *adapter, struct sk_buff *skb);
-u8 opa_vnic_calc_entropy(struct opa_vnic_adapter *adapter, struct sk_buff *skb);
+u8 opa_vnic_calc_entropy(struct sk_buff *skb);
void opa_vnic_process_vema_config(struct opa_vnic_adapter *adapter);
void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter);
void opa_vnic_query_mac_tbl(struct opa_vnic_adapter *adapter,
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
index ce57e0f10289..0c8aec62a425 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
@@ -104,7 +104,7 @@ static u16 opa_vnic_select_queue(struct net_device *netdev, struct sk_buff *skb,
/* pass entropy and vl as metadata in skb */
mdata = skb_push(skb, sizeof(*mdata));
- mdata->entropy = opa_vnic_calc_entropy(adapter, skb);
+ mdata->entropy = opa_vnic_calc_entropy(skb);
mdata->vl = opa_vnic_get_vl(adapter, skb);
rc = adapter->rn_ops->ndo_select_queue(netdev, skb,
accel_priv, fallback);
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 891846655000..a029b27fd002 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -198,6 +198,7 @@ config VXLAN
config GENEVE
tristate "Generic Network Virtualization Encapsulation"
depends on INET && NET_UDP_TUNNEL
+ depends on IPV6 || !IPV6
select NET_IP_TUNNEL
select GRO_CELLS
---help---
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 78616787f2a3..9f561fe505cb 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -806,16 +806,39 @@ static unsigned int b53_get_mib_size(struct b53_device *dev)
return B53_MIBS_SIZE;
}
-void b53_get_strings(struct dsa_switch *ds, int port, uint8_t *data)
+static struct phy_device *b53_get_phy_device(struct dsa_switch *ds, int port)
+{
+ /* These ports typically do not have built-in PHYs */
+ switch (port) {
+ case B53_CPU_PORT_25:
+ case 7:
+ case B53_CPU_PORT:
+ return NULL;
+ }
+
+ return mdiobus_get_phy(ds->slave_mii_bus, port);
+}
+
+void b53_get_strings(struct dsa_switch *ds, int port, u32 stringset,
+ uint8_t *data)
{
struct b53_device *dev = ds->priv;
const struct b53_mib_desc *mibs = b53_get_mib(dev);
unsigned int mib_size = b53_get_mib_size(dev);
+ struct phy_device *phydev;
unsigned int i;
- for (i = 0; i < mib_size; i++)
- strlcpy(data + i * ETH_GSTRING_LEN,
- mibs[i].name, ETH_GSTRING_LEN);
+ if (stringset == ETH_SS_STATS) {
+ for (i = 0; i < mib_size; i++)
+ strlcpy(data + i * ETH_GSTRING_LEN,
+ mibs[i].name, ETH_GSTRING_LEN);
+ } else if (stringset == ETH_SS_PHY_STATS) {
+ phydev = b53_get_phy_device(ds, port);
+ if (!phydev)
+ return;
+
+ phy_ethtool_get_strings(phydev, data);
+ }
}
EXPORT_SYMBOL(b53_get_strings);
@@ -852,11 +875,34 @@ void b53_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data)
}
EXPORT_SYMBOL(b53_get_ethtool_stats);
-int b53_get_sset_count(struct dsa_switch *ds, int port)
+void b53_get_ethtool_phy_stats(struct dsa_switch *ds, int port, uint64_t *data)
+{
+ struct phy_device *phydev;
+
+ phydev = b53_get_phy_device(ds, port);
+ if (!phydev)
+ return;
+
+ phy_ethtool_get_stats(phydev, NULL, data);
+}
+EXPORT_SYMBOL(b53_get_ethtool_phy_stats);
+
+int b53_get_sset_count(struct dsa_switch *ds, int port, int sset)
{
struct b53_device *dev = ds->priv;
+ struct phy_device *phydev;
+
+ if (sset == ETH_SS_STATS) {
+ return b53_get_mib_size(dev);
+ } else if (sset == ETH_SS_PHY_STATS) {
+ phydev = b53_get_phy_device(ds, port);
+ if (!phydev)
+ return 0;
+
+ return phy_ethtool_get_sset_count(phydev);
+ }
- return b53_get_mib_size(dev);
+ return 0;
}
EXPORT_SYMBOL(b53_get_sset_count);
@@ -1477,7 +1523,7 @@ void b53_br_fast_age(struct dsa_switch *ds, int port)
}
EXPORT_SYMBOL(b53_br_fast_age);
-static bool b53_can_enable_brcm_tags(struct dsa_switch *ds, int port)
+static bool b53_possible_cpu_port(struct dsa_switch *ds, int port)
{
/* Broadcom switches will accept enabling Broadcom tags on the
* following ports: 5, 7 and 8, any other port is not supported
@@ -1489,10 +1535,19 @@ static bool b53_can_enable_brcm_tags(struct dsa_switch *ds, int port)
return true;
}
- dev_warn(ds->dev, "Port %d is not Broadcom tag capable\n", port);
return false;
}
+static bool b53_can_enable_brcm_tags(struct dsa_switch *ds, int port)
+{
+ bool ret = b53_possible_cpu_port(ds, port);
+
+ if (!ret)
+ dev_warn(ds->dev, "Port %d is not Broadcom tag capable\n",
+ port);
+ return ret;
+}
+
enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds, int port)
{
struct b53_device *dev = ds->priv;
@@ -1650,6 +1705,7 @@ static const struct dsa_switch_ops b53_switch_ops = {
.get_strings = b53_get_strings,
.get_ethtool_stats = b53_get_ethtool_stats,
.get_sset_count = b53_get_sset_count,
+ .get_ethtool_phy_stats = b53_get_ethtool_phy_stats,
.phy_read = b53_phy_read16,
.phy_write = b53_phy_write16,
.adjust_link = b53_adjust_link,
@@ -1954,6 +2010,15 @@ static int b53_switch_init(struct b53_device *dev)
dev->num_ports = dev->cpu_port + 1;
dev->enabled_ports |= BIT(dev->cpu_port);
+ /* Include non standard CPU port built-in PHYs to be probed */
+ if (is539x(dev) || is531x5(dev)) {
+ for (i = 0; i < dev->num_ports; i++) {
+ if (!(dev->ds->phys_mii_mask & BIT(i)) &&
+ !b53_possible_cpu_port(dev->ds, i))
+ dev->ds->phys_mii_mask |= BIT(i);
+ }
+ }
+
dev->ports = devm_kzalloc(dev->dev,
sizeof(struct b53_port) * dev->num_ports,
GFP_KERNEL);
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index 1187ebd79287..cc284a514de9 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -286,9 +286,11 @@ static inline int b53_switch_get_reset_gpio(struct b53_device *dev)
/* Exported functions towards other drivers */
void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port);
int b53_configure_vlan(struct dsa_switch *ds);
-void b53_get_strings(struct dsa_switch *ds, int port, uint8_t *data);
+void b53_get_strings(struct dsa_switch *ds, int port, u32 stringset,
+ uint8_t *data);
void b53_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data);
-int b53_get_sset_count(struct dsa_switch *ds, int port);
+int b53_get_sset_count(struct dsa_switch *ds, int port, int sset);
+void b53_get_ethtool_phy_stats(struct dsa_switch *ds, int port, uint64_t *data);
int b53_br_join(struct dsa_switch *ds, int port, struct net_device *bridge);
void b53_br_leave(struct dsa_switch *ds, int port, struct net_device *bridge);
void b53_br_set_stp_state(struct dsa_switch *ds, int port, u8 state);
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 0378eded31f2..97236cfcbae4 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -859,6 +859,7 @@ static const struct dsa_switch_ops bcm_sf2_ops = {
.get_strings = b53_get_strings,
.get_ethtool_stats = b53_get_ethtool_stats,
.get_sset_count = b53_get_sset_count,
+ .get_ethtool_phy_stats = b53_get_ethtool_phy_stats,
.get_phy_flags = bcm_sf2_sw_get_phy_flags,
.adjust_link = bcm_sf2_sw_adjust_link,
.fixed_link_update = bcm_sf2_sw_fixed_link_update,
diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c
index f77be9f85cb3..58f14af04639 100644
--- a/drivers/net/dsa/dsa_loop.c
+++ b/drivers/net/dsa/dsa_loop.c
@@ -86,16 +86,23 @@ static int dsa_loop_setup(struct dsa_switch *ds)
return 0;
}
-static int dsa_loop_get_sset_count(struct dsa_switch *ds, int port)
+static int dsa_loop_get_sset_count(struct dsa_switch *ds, int port, int sset)
{
+ if (sset != ETH_SS_STATS && sset != ETH_SS_PHY_STATS)
+ return 0;
+
return __DSA_LOOP_CNT_MAX;
}
-static void dsa_loop_get_strings(struct dsa_switch *ds, int port, uint8_t *data)
+static void dsa_loop_get_strings(struct dsa_switch *ds, int port,
+ u32 stringset, uint8_t *data)
{
struct dsa_loop_priv *ps = ds->priv;
unsigned int i;
+ if (stringset != ETH_SS_STATS && stringset != ETH_SS_PHY_STATS)
+ return;
+
for (i = 0; i < __DSA_LOOP_CNT_MAX; i++)
memcpy(data + i * ETH_GSTRING_LEN,
ps->ports[port].mib[i].name, ETH_GSTRING_LEN);
@@ -256,6 +263,7 @@ static const struct dsa_switch_ops dsa_loop_driver = {
.get_strings = dsa_loop_get_strings,
.get_ethtool_stats = dsa_loop_get_ethtool_stats,
.get_sset_count = dsa_loop_get_sset_count,
+ .get_ethtool_phy_stats = dsa_loop_get_ethtool_stats,
.phy_read = dsa_loop_phy_read,
.phy_write = dsa_loop_phy_write,
.port_bridge_join = dsa_loop_port_bridge_join,
diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c
index fefa454f3e56..b4f6e1a67dd9 100644
--- a/drivers/net/dsa/lan9303-core.c
+++ b/drivers/net/dsa/lan9303-core.c
@@ -977,10 +977,14 @@ static const struct lan9303_mib_desc lan9303_mib[] = {
{ .offset = LAN9303_MAC_TX_LATECOL_0, .name = "TxLateCol", },
};
-static void lan9303_get_strings(struct dsa_switch *ds, int port, uint8_t *data)
+static void lan9303_get_strings(struct dsa_switch *ds, int port,
+ u32 stringset, uint8_t *data)
{
unsigned int u;
+ if (stringset != ETH_SS_STATS)
+ return;
+
for (u = 0; u < ARRAY_SIZE(lan9303_mib); u++) {
strncpy(data + u * ETH_GSTRING_LEN, lan9303_mib[u].name,
ETH_GSTRING_LEN);
@@ -1007,8 +1011,11 @@ static void lan9303_get_ethtool_stats(struct dsa_switch *ds, int port,
}
}
-static int lan9303_get_sset_count(struct dsa_switch *ds, int port)
+static int lan9303_get_sset_count(struct dsa_switch *ds, int port, int sset)
{
+ if (sset != ETH_SS_STATS)
+ return 0;
+
return ARRAY_SIZE(lan9303_mib);
}
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index bcb3e6c734f2..7210c49b7922 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -439,15 +439,22 @@ static void ksz_disable_port(struct dsa_switch *ds, int port,
ksz_port_cfg(dev, port, REG_PORT_CTRL_0, PORT_MAC_LOOPBACK, true);
}
-static int ksz_sset_count(struct dsa_switch *ds, int port)
+static int ksz_sset_count(struct dsa_switch *ds, int port, int sset)
{
+ if (sset != ETH_SS_STATS)
+ return 0;
+
return TOTAL_SWITCH_COUNTER_NUM;
}
-static void ksz_get_strings(struct dsa_switch *ds, int port, uint8_t *buf)
+static void ksz_get_strings(struct dsa_switch *ds, int port,
+ u32 stringset, uint8_t *buf)
{
int i;
+ if (stringset != ETH_SS_STATS)
+ return;
+
for (i = 0; i < TOTAL_SWITCH_COUNTER_NUM; i++) {
memcpy(buf + i * ETH_GSTRING_LEN, mib_names[i].string,
ETH_GSTRING_LEN);
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 80a4dbc3a499..62e486652e62 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -573,10 +573,14 @@ static int mt7530_phy_write(struct dsa_switch *ds, int port, int regnum,
}
static void
-mt7530_get_strings(struct dsa_switch *ds, int port, uint8_t *data)
+mt7530_get_strings(struct dsa_switch *ds, int port, u32 stringset,
+ uint8_t *data)
{
int i;
+ if (stringset != ETH_SS_STATS)
+ return;
+
for (i = 0; i < ARRAY_SIZE(mt7530_mib); i++)
strncpy(data + i * ETH_GSTRING_LEN, mt7530_mib[i].name,
ETH_GSTRING_LEN);
@@ -604,8 +608,11 @@ mt7530_get_ethtool_stats(struct dsa_switch *ds, int port,
}
static int
-mt7530_get_sset_count(struct dsa_switch *ds, int port)
+mt7530_get_sset_count(struct dsa_switch *ds, int port, int sset)
{
+ if (sset != ETH_SS_STATS)
+ return 0;
+
return ARRAY_SIZE(mt7530_mib);
}
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 3d2091099f7f..9d62e4acc01b 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -665,13 +665,13 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip,
case STATS_TYPE_PORT:
err = mv88e6xxx_port_read(chip, port, s->reg, &reg);
if (err)
- return UINT64_MAX;
+ return U64_MAX;
low = reg;
if (s->size == 4) {
err = mv88e6xxx_port_read(chip, port, s->reg + 1, &reg);
if (err)
- return UINT64_MAX;
+ return U64_MAX;
high = reg;
}
break;
@@ -685,7 +685,7 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip,
mv88e6xxx_g1_stats_read(chip, reg + 1, &high);
break;
default:
- return UINT64_MAX;
+ return U64_MAX;
}
value = (((u64)high) << 16) | low;
return value;
@@ -742,11 +742,14 @@ static void mv88e6xxx_atu_vtu_get_strings(uint8_t *data)
}
static void mv88e6xxx_get_strings(struct dsa_switch *ds, int port,
- uint8_t *data)
+ u32 stringset, uint8_t *data)
{
struct mv88e6xxx_chip *chip = ds->priv;
int count = 0;
+ if (stringset != ETH_SS_STATS)
+ return;
+
mutex_lock(&chip->reg_lock);
if (chip->info->ops->stats_get_strings)
@@ -789,12 +792,15 @@ static int mv88e6320_stats_get_sset_count(struct mv88e6xxx_chip *chip)
STATS_TYPE_BANK1);
}
-static int mv88e6xxx_get_sset_count(struct dsa_switch *ds, int port)
+static int mv88e6xxx_get_sset_count(struct dsa_switch *ds, int port, int sset)
{
struct mv88e6xxx_chip *chip = ds->priv;
int serdes_count = 0;
int count = 0;
+ if (sset != ETH_SS_STATS)
+ return 0;
+
mutex_lock(&chip->reg_lock);
if (chip->info->ops->stats_get_sset_count)
count = chip->info->ops->stats_get_sset_count(chip);
@@ -1020,6 +1026,38 @@ static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port,
dev_err(ds->dev, "p%d: failed to update state\n", port);
}
+static int mv88e6xxx_devmap_setup(struct mv88e6xxx_chip *chip)
+{
+ int target, port;
+ int err;
+
+ if (!chip->info->global2_addr)
+ return 0;
+
+ /* Initialize the routing port to the 32 possible target devices */
+ for (target = 0; target < 32; target++) {
+ port = 0x1f;
+ if (target < DSA_MAX_SWITCHES)
+ if (chip->ds->rtable[target] != DSA_RTABLE_NONE)
+ port = chip->ds->rtable[target];
+
+ err = mv88e6xxx_g2_device_mapping_write(chip, target, port);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int mv88e6xxx_trunk_setup(struct mv88e6xxx_chip *chip)
+{
+ /* Clear all trunk masks and mapping */
+ if (chip->info->global2_addr)
+ return mv88e6xxx_g2_trunk_clear(chip);
+
+ return 0;
+}
+
static int mv88e6xxx_pot_setup(struct mv88e6xxx_chip *chip)
{
if (chip->info->ops->pot_clear)
@@ -2190,13 +2228,6 @@ static int mv88e6xxx_setup(struct dsa_switch *ds)
if (err)
goto unlock;
- /* Setup Switch Global 2 Registers */
- if (chip->info->global2_addr) {
- err = mv88e6xxx_g2_setup(chip);
- if (err)
- goto unlock;
- }
-
err = mv88e6xxx_irl_setup(chip);
if (err)
goto unlock;
@@ -2233,6 +2264,14 @@ static int mv88e6xxx_setup(struct dsa_switch *ds)
if (err)
goto unlock;
+ err = mv88e6xxx_trunk_setup(chip);
+ if (err)
+ goto unlock;
+
+ err = mv88e6xxx_devmap_setup(chip);
+ if (err)
+ goto unlock;
+
/* Setup PTP Hardware Clock and timestamping */
if (chip->info->ptp_support) {
err = mv88e6xxx_ptp_setup(chip);
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index 80490f66bc06..4163c8099d0b 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -21,10 +21,6 @@
#include <linux/timecounter.h>
#include <net/dsa.h>
-#ifndef UINT64_MAX
-#define UINT64_MAX (u64)(~((u64)0))
-#endif
-
#define SMI_CMD 0x00
#define SMI_CMD_BUSY BIT(15)
#define SMI_CMD_CLAUSE_22 BIT(12)
diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c
index 0ce627fded48..e6d658181b27 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.c
+++ b/drivers/net/dsa/mv88e6xxx/global2.c
@@ -119,37 +119,17 @@ int mv88e6352_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip)
/* Offset 0x06: Device Mapping Table register */
-static int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip,
- int target, int port)
+int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, int target,
+ int port)
{
- u16 val = (target << 8) | (port & 0xf);
+ u16 val = (target << 8) | (port & 0x1f);
+ /* Modern chips use 5 bits to define a device mapping port,
+ * but bit 4 is reserved on older chips, so it is safe to use.
+ */
return mv88e6xxx_g2_update(chip, MV88E6XXX_G2_DEVICE_MAPPING, val);
}
-static int mv88e6xxx_g2_set_device_mapping(struct mv88e6xxx_chip *chip)
-{
- int target, port;
- int err;
-
- /* Initialize the routing port to the 32 possible target devices */
- for (target = 0; target < 32; ++target) {
- port = 0xf;
-
- if (target < DSA_MAX_SWITCHES) {
- port = chip->ds->rtable[target];
- if (port == DSA_RTABLE_NONE)
- port = 0xf;
- }
-
- err = mv88e6xxx_g2_device_mapping_write(chip, target, port);
- if (err)
- break;
- }
-
- return err;
-}
-
/* Offset 0x07: Trunk Mask Table register */
static int mv88e6xxx_g2_trunk_mask_write(struct mv88e6xxx_chip *chip, int num,
@@ -174,7 +154,7 @@ static int mv88e6xxx_g2_trunk_mapping_write(struct mv88e6xxx_chip *chip, int id,
return mv88e6xxx_g2_update(chip, MV88E6XXX_G2_TRUNK_MAPPING, val);
}
-static int mv88e6xxx_g2_clear_trunk(struct mv88e6xxx_chip *chip)
+int mv88e6xxx_g2_trunk_clear(struct mv88e6xxx_chip *chip)
{
const u16 port_mask = BIT(mv88e6xxx_num_ports(chip)) - 1;
int i, err;
@@ -1138,31 +1118,3 @@ void mv88e6xxx_g2_irq_mdio_free(struct mv88e6xxx_chip *chip,
for (phy = 0; phy < chip->info->num_internal_phys; phy++)
irq_dispose_mapping(bus->irq[phy]);
}
-
-int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip)
-{
- u16 reg;
- int err;
-
- /* Ignore removed tag data on doubly tagged packets, disable
- * flow control messages, force flow control priority to the
- * highest, and send all special multicast frames to the CPU
- * port at the highest priority.
- */
- reg = MV88E6XXX_G2_SWITCH_MGMT_FORCE_FLOW_CTL_PRI | (0x7 << 4);
- err = mv88e6xxx_g2_write(chip, MV88E6XXX_G2_SWITCH_MGMT, reg);
- if (err)
- return err;
-
- /* Program the DSA routing table. */
- err = mv88e6xxx_g2_set_device_mapping(chip);
- if (err)
- return err;
-
- /* Clear all trunk masks and mapping. */
- err = mv88e6xxx_g2_clear_trunk(chip);
- if (err)
- return err;
-
- return 0;
-}
diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h
index 520ec70d32e8..37e8ce2c72a0 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.h
+++ b/drivers/net/dsa/mv88e6xxx/global2.h
@@ -60,7 +60,8 @@
#define MV88E6XXX_G2_DEVICE_MAPPING 0x06
#define MV88E6XXX_G2_DEVICE_MAPPING_UPDATE 0x8000
#define MV88E6XXX_G2_DEVICE_MAPPING_DEV_MASK 0x1f00
-#define MV88E6XXX_G2_DEVICE_MAPPING_PORT_MASK 0x000f
+#define MV88E6352_G2_DEVICE_MAPPING_PORT_MASK 0x000f
+#define MV88E6390_G2_DEVICE_MAPPING_PORT_MASK 0x001f
/* Offset 0x07: Trunk Mask Table Register */
#define MV88E6XXX_G2_TRUNK_MASK 0x07
@@ -313,7 +314,6 @@ int mv88e6xxx_g2_pvt_write(struct mv88e6xxx_chip *chip, int src_dev,
int src_port, u16 data);
int mv88e6xxx_g2_misc_4_bit_port(struct mv88e6xxx_chip *chip);
-int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip);
int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip);
void mv88e6xxx_g2_irq_free(struct mv88e6xxx_chip *chip);
@@ -327,6 +327,11 @@ int mv88e6352_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip);
int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip);
+int mv88e6xxx_g2_trunk_clear(struct mv88e6xxx_chip *chip);
+
+int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, int target,
+ int port);
+
extern const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops;
extern const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops;
@@ -441,11 +446,6 @@ static inline int mv88e6xxx_g2_misc_4_bit_port(struct mv88e6xxx_chip *chip)
return -EOPNOTSUPP;
}
-static inline int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip)
-{
- return -EOPNOTSUPP;
-}
-
static inline int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip)
{
return -EOPNOTSUPP;
@@ -495,6 +495,17 @@ static inline int mv88e6xxx_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip,
return -EOPNOTSUPP;
}
+static inline int mv88e6xxx_g2_trunk_clear(struct mv88e6xxx_chip *chip)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip,
+ int target, int port)
+{
+ return -EOPNOTSUPP;
+}
+
#endif /* CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */
#endif /* _MV88E6XXX_GLOBAL2_H */
diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 600d5ad1fbde..757b6d90ea36 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -600,10 +600,13 @@ qca8k_phy_write(struct dsa_switch *ds, int phy, int regnum, u16 val)
}
static void
-qca8k_get_strings(struct dsa_switch *ds, int port, uint8_t *data)
+qca8k_get_strings(struct dsa_switch *ds, int port, u32 stringset, uint8_t *data)
{
int i;
+ if (stringset != ETH_SS_STATS)
+ return;
+
for (i = 0; i < ARRAY_SIZE(ar8327_mib); i++)
strncpy(data + i * ETH_GSTRING_LEN, ar8327_mib[i].name,
ETH_GSTRING_LEN);
@@ -631,8 +634,11 @@ qca8k_get_ethtool_stats(struct dsa_switch *ds, int port,
}
static int
-qca8k_get_sset_count(struct dsa_switch *ds, int port)
+qca8k_get_sset_count(struct dsa_switch *ds, int port, int sset)
{
+ if (sset != ETH_SS_STATS)
+ return 0;
+
return ARRAY_SIZE(ar8327_mib);
}
diff --git a/drivers/net/ethernet/8390/Kconfig b/drivers/net/ethernet/8390/Kconfig
index 9fee7c83ef9f..f2f0264c58ba 100644
--- a/drivers/net/ethernet/8390/Kconfig
+++ b/drivers/net/ethernet/8390/Kconfig
@@ -29,8 +29,8 @@ config PCMCIA_AXNET
called axnet_cs. If unsure, say N.
config AX88796
- tristate "ASIX AX88796 NE2000 clone support"
- depends on (ARM || MIPS || SUPERH)
+ tristate "ASIX AX88796 NE2000 clone support" if !ZORRO
+ depends on (ARM || MIPS || SUPERH || ZORRO || COMPILE_TEST)
select CRC32
select PHYLIB
select MDIO_BITBANG
@@ -45,6 +45,19 @@ config AX88796_93CX6
---help---
Select this if your platform comes with an external 93CX6 eeprom.
+config XSURF100
+ tristate "Amiga XSurf 100 AX88796/NE2000 clone support"
+ depends on ZORRO
+ select AX88796
+ select ASIX_PHY
+ help
+ This driver is for the Individual Computers X-Surf 100 Ethernet
+ card (based on the Asix AX88796 chip). If you have such a card,
+ say Y. Otherwise, say N.
+
+ To compile this driver as a module, choose M here: the module
+ will be called xsurf100.
+
config HYDRA
tristate "Hydra support"
depends on ZORRO
diff --git a/drivers/net/ethernet/8390/Makefile b/drivers/net/ethernet/8390/Makefile
index 1d650e66cc6e..85c83c566ec6 100644
--- a/drivers/net/ethernet/8390/Makefile
+++ b/drivers/net/ethernet/8390/Makefile
@@ -16,4 +16,5 @@ obj-$(CONFIG_PCMCIA_PCNET) += pcnet_cs.o 8390.o
obj-$(CONFIG_STNIC) += stnic.o 8390.o
obj-$(CONFIG_ULTRA) += smc-ultra.o 8390.o
obj-$(CONFIG_WD80x3) += wd.o 8390.o
+obj-$(CONFIG_XSURF100) += xsurf100.o
obj-$(CONFIG_ZORRO8390) += zorro8390.o
diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c
index da61cf3cb3a9..2a0ddec1dd56 100644
--- a/drivers/net/ethernet/8390/ax88796.c
+++ b/drivers/net/ethernet/8390/ax88796.c
@@ -163,6 +163,21 @@ static void ax_reset_8390(struct net_device *dev)
ei_outb(ENISR_RESET, addr + EN0_ISR); /* Ack intr. */
}
+/* Wrapper for __ei_interrupt for platforms that have a platform-specific
+ * way to find out whether the interrupt request might be caused by
+ * the ax88796 chip.
+ */
+static irqreturn_t ax_ei_interrupt_filtered(int irq, void *dev_id)
+{
+ struct net_device *dev = dev_id;
+ struct ax_device *ax = to_ax_dev(dev);
+ struct platform_device *pdev = to_platform_device(dev->dev.parent);
+
+ if (!ax->plat->check_irq(pdev))
+ return IRQ_NONE;
+
+ return ax_ei_interrupt(irq, dev_id);
+}
static void ax_get_8390_hdr(struct net_device *dev, struct e8390_pkt_hdr *hdr,
int ring_page)
@@ -387,6 +402,90 @@ static void ax_phy_switch(struct net_device *dev, int on)
ei_outb(reg_gpoc, ei_local->mem + EI_SHIFT(0x17));
}
+static void ax_bb_mdc(struct mdiobb_ctrl *ctrl, int level)
+{
+ struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl);
+
+ if (level)
+ ax->reg_memr |= AX_MEMR_MDC;
+ else
+ ax->reg_memr &= ~AX_MEMR_MDC;
+
+ ei_outb(ax->reg_memr, ax->addr_memr);
+}
+
+static void ax_bb_dir(struct mdiobb_ctrl *ctrl, int output)
+{
+ struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl);
+
+ if (output)
+ ax->reg_memr &= ~AX_MEMR_MDIR;
+ else
+ ax->reg_memr |= AX_MEMR_MDIR;
+
+ ei_outb(ax->reg_memr, ax->addr_memr);
+}
+
+static void ax_bb_set_data(struct mdiobb_ctrl *ctrl, int value)
+{
+ struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl);
+
+ if (value)
+ ax->reg_memr |= AX_MEMR_MDO;
+ else
+ ax->reg_memr &= ~AX_MEMR_MDO;
+
+ ei_outb(ax->reg_memr, ax->addr_memr);
+}
+
+static int ax_bb_get_data(struct mdiobb_ctrl *ctrl)
+{
+ struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl);
+ int reg_memr = ei_inb(ax->addr_memr);
+
+ return reg_memr & AX_MEMR_MDI ? 1 : 0;
+}
+
+static const struct mdiobb_ops bb_ops = {
+ .owner = THIS_MODULE,
+ .set_mdc = ax_bb_mdc,
+ .set_mdio_dir = ax_bb_dir,
+ .set_mdio_data = ax_bb_set_data,
+ .get_mdio_data = ax_bb_get_data,
+};
+
+static int ax_mii_init(struct net_device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev->dev.parent);
+ struct ei_device *ei_local = netdev_priv(dev);
+ struct ax_device *ax = to_ax_dev(dev);
+ int err;
+
+ ax->bb_ctrl.ops = &bb_ops;
+ ax->addr_memr = ei_local->mem + AX_MEMR;
+ ax->mii_bus = alloc_mdio_bitbang(&ax->bb_ctrl);
+ if (!ax->mii_bus) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ ax->mii_bus->name = "ax88796_mii_bus";
+ ax->mii_bus->parent = dev->dev.parent;
+ snprintf(ax->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x",
+ pdev->name, pdev->id);
+
+ err = mdiobus_register(ax->mii_bus);
+ if (err)
+ goto out_free_mdio_bitbang;
+
+ return 0;
+
+ out_free_mdio_bitbang:
+ free_mdio_bitbang(ax->mii_bus);
+ out:
+ return err;
+}
+
static int ax_open(struct net_device *dev)
{
struct ax_device *ax = to_ax_dev(dev);
@@ -394,8 +493,16 @@ static int ax_open(struct net_device *dev)
netdev_dbg(dev, "open\n");
- ret = request_irq(dev->irq, ax_ei_interrupt, ax->irqflags,
- dev->name, dev);
+ ret = ax_mii_init(dev);
+ if (ret)
+ goto failed_mii;
+
+ if (ax->plat->check_irq)
+ ret = request_irq(dev->irq, ax_ei_interrupt_filtered,
+ ax->irqflags, dev->name, dev);
+ else
+ ret = request_irq(dev->irq, ax_ei_interrupt, ax->irqflags,
+ dev->name, dev);
if (ret)
goto failed_request_irq;
@@ -421,6 +528,10 @@ static int ax_open(struct net_device *dev)
ax_phy_switch(dev, 0);
free_irq(dev->irq, dev);
failed_request_irq:
+ /* unregister mdiobus */
+ mdiobus_unregister(ax->mii_bus);
+ free_mdio_bitbang(ax->mii_bus);
+ failed_mii:
return ret;
}
@@ -440,6 +551,9 @@ static int ax_close(struct net_device *dev)
phy_disconnect(dev->phydev);
free_irq(dev->irq, dev);
+
+ mdiobus_unregister(ax->mii_bus);
+ free_mdio_bitbang(ax->mii_bus);
return 0;
}
@@ -539,92 +653,8 @@ static const struct net_device_ops ax_netdev_ops = {
#endif
};
-static void ax_bb_mdc(struct mdiobb_ctrl *ctrl, int level)
-{
- struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl);
-
- if (level)
- ax->reg_memr |= AX_MEMR_MDC;
- else
- ax->reg_memr &= ~AX_MEMR_MDC;
-
- ei_outb(ax->reg_memr, ax->addr_memr);
-}
-
-static void ax_bb_dir(struct mdiobb_ctrl *ctrl, int output)
-{
- struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl);
-
- if (output)
- ax->reg_memr &= ~AX_MEMR_MDIR;
- else
- ax->reg_memr |= AX_MEMR_MDIR;
-
- ei_outb(ax->reg_memr, ax->addr_memr);
-}
-
-static void ax_bb_set_data(struct mdiobb_ctrl *ctrl, int value)
-{
- struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl);
-
- if (value)
- ax->reg_memr |= AX_MEMR_MDO;
- else
- ax->reg_memr &= ~AX_MEMR_MDO;
-
- ei_outb(ax->reg_memr, ax->addr_memr);
-}
-
-static int ax_bb_get_data(struct mdiobb_ctrl *ctrl)
-{
- struct ax_device *ax = container_of(ctrl, struct ax_device, bb_ctrl);
- int reg_memr = ei_inb(ax->addr_memr);
-
- return reg_memr & AX_MEMR_MDI ? 1 : 0;
-}
-
-static const struct mdiobb_ops bb_ops = {
- .owner = THIS_MODULE,
- .set_mdc = ax_bb_mdc,
- .set_mdio_dir = ax_bb_dir,
- .set_mdio_data = ax_bb_set_data,
- .get_mdio_data = ax_bb_get_data,
-};
-
/* setup code */
-static int ax_mii_init(struct net_device *dev)
-{
- struct platform_device *pdev = to_platform_device(dev->dev.parent);
- struct ei_device *ei_local = netdev_priv(dev);
- struct ax_device *ax = to_ax_dev(dev);
- int err;
-
- ax->bb_ctrl.ops = &bb_ops;
- ax->addr_memr = ei_local->mem + AX_MEMR;
- ax->mii_bus = alloc_mdio_bitbang(&ax->bb_ctrl);
- if (!ax->mii_bus) {
- err = -ENOMEM;
- goto out;
- }
-
- ax->mii_bus->name = "ax88796_mii_bus";
- ax->mii_bus->parent = dev->dev.parent;
- snprintf(ax->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x",
- pdev->name, pdev->id);
-
- err = mdiobus_register(ax->mii_bus);
- if (err)
- goto out_free_mdio_bitbang;
-
- return 0;
-
- out_free_mdio_bitbang:
- free_mdio_bitbang(ax->mii_bus);
- out:
- return err;
-}
-
static void ax_initial_setup(struct net_device *dev, struct ei_device *ei_local)
{
void __iomem *ioaddr = ei_local->mem;
@@ -669,10 +699,16 @@ static int ax_init_dev(struct net_device *dev)
if (ax->plat->flags & AXFLG_HAS_EEPROM) {
unsigned char SA_prom[32];
+ ei_outb(6, ioaddr + EN0_RCNTLO);
+ ei_outb(0, ioaddr + EN0_RCNTHI);
+ ei_outb(0, ioaddr + EN0_RSARLO);
+ ei_outb(0, ioaddr + EN0_RSARHI);
+ ei_outb(E8390_RREAD + E8390_START, ioaddr + NE_CMD);
for (i = 0; i < sizeof(SA_prom); i += 2) {
SA_prom[i] = ei_inb(ioaddr + NE_DATAPORT);
SA_prom[i + 1] = ei_inb(ioaddr + NE_DATAPORT);
}
+ ei_outb(ENISR_RDC, ioaddr + EN0_ISR); /* Ack intr. */
if (ax->plat->wordlength == 2)
for (i = 0; i < 16; i++)
@@ -741,18 +777,20 @@ static int ax_init_dev(struct net_device *dev)
#endif
ei_local->reset_8390 = &ax_reset_8390;
- ei_local->block_input = &ax_block_input;
- ei_local->block_output = &ax_block_output;
+ if (ax->plat->block_input)
+ ei_local->block_input = ax->plat->block_input;
+ else
+ ei_local->block_input = &ax_block_input;
+ if (ax->plat->block_output)
+ ei_local->block_output = ax->plat->block_output;
+ else
+ ei_local->block_output = &ax_block_output;
ei_local->get_8390_hdr = &ax_get_8390_hdr;
ei_local->priv = 0;
dev->netdev_ops = &ax_netdev_ops;
dev->ethtool_ops = &ax_ethtool_ops;
- ret = ax_mii_init(dev);
- if (ret)
- goto err_out;
-
ax_NS8390_init(dev, 0);
ret = register_netdev(dev);
@@ -777,7 +815,6 @@ static int ax_remove(struct platform_device *pdev)
struct resource *mem;
unregister_netdev(dev);
- free_irq(dev->irq, dev);
iounmap(ei_local->mem);
mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -789,6 +826,7 @@ static int ax_remove(struct platform_device *pdev)
release_mem_region(mem->start, resource_size(mem));
}
+ platform_set_drvdata(pdev, NULL);
free_netdev(dev);
return 0;
@@ -835,6 +873,9 @@ static int ax_probe(struct platform_device *pdev)
dev->irq = irq->start;
ax->irqflags = irq->flags & IRQF_TRIGGER_MASK;
+ if (irq->flags & IORESOURCE_IRQ_SHAREABLE)
+ ax->irqflags |= IRQF_SHARED;
+
mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!mem) {
dev_err(&pdev->dev, "no MEM specified\n");
@@ -919,6 +960,7 @@ static int ax_probe(struct platform_device *pdev)
release_mem_region(mem->start, mem_size);
exit_mem:
+ platform_set_drvdata(pdev, NULL);
free_netdev(dev);
return ret;
diff --git a/drivers/net/ethernet/8390/xsurf100.c b/drivers/net/ethernet/8390/xsurf100.c
new file mode 100644
index 000000000000..e2c963821ffe
--- /dev/null
+++ b/drivers/net/ethernet/8390/xsurf100.c
@@ -0,0 +1,382 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+#include <linux/zorro.h>
+#include <net/ax88796.h>
+#include <asm/amigaints.h>
+
+#define ZORRO_PROD_INDIVIDUAL_COMPUTERS_X_SURF100 \
+ ZORRO_ID(INDIVIDUAL_COMPUTERS, 0x64, 0)
+
+#define XS100_IRQSTATUS_BASE 0x40
+#define XS100_8390_BASE 0x800
+
+/* Longword-access area. Translated to 2 16-bit access cycles by the
+ * X-Surf 100 FPGA
+ */
+#define XS100_8390_DATA32_BASE 0x8000
+#define XS100_8390_DATA32_SIZE 0x2000
+/* Sub-Areas for fast data register access; addresses relative to area begin */
+#define XS100_8390_DATA_READ32_BASE 0x0880
+#define XS100_8390_DATA_WRITE32_BASE 0x0C80
+#define XS100_8390_DATA_AREA_SIZE 0x80
+
+#define __NS8390_init ax_NS8390_init
+
+/* force unsigned long back to 'void __iomem *' */
+#define ax_convert_addr(_a) ((void __force __iomem *)(_a))
+
+#define ei_inb(_a) z_readb(ax_convert_addr(_a))
+#define ei_outb(_v, _a) z_writeb(_v, ax_convert_addr(_a))
+
+#define ei_inw(_a) z_readw(ax_convert_addr(_a))
+#define ei_outw(_v, _a) z_writew(_v, ax_convert_addr(_a))
+
+#define ei_inb_p(_a) ei_inb(_a)
+#define ei_outb_p(_v, _a) ei_outb(_v, _a)
+
+/* define EI_SHIFT() to take into account our register offsets */
+#define EI_SHIFT(x) (ei_local->reg_offset[(x)])
+
+/* Ensure we have our RCR base value */
+#define AX88796_PLATFORM
+
+static unsigned char version[] =
+ "ax88796.c: Copyright 2005,2007 Simtec Electronics\n";
+
+#include "lib8390.c"
+
+/* from ne.c */
+#define NE_CMD EI_SHIFT(0x00)
+#define NE_RESET EI_SHIFT(0x1f)
+#define NE_DATAPORT EI_SHIFT(0x10)
+
+struct xsurf100_ax_plat_data {
+ struct ax_plat_data ax;
+ void __iomem *base_regs;
+ void __iomem *data_area;
+};
+
+static int is_xsurf100_network_irq(struct platform_device *pdev)
+{
+ struct xsurf100_ax_plat_data *xs100 = dev_get_platdata(&pdev->dev);
+
+ return (readw(xs100->base_regs + XS100_IRQSTATUS_BASE) & 0xaaaa) != 0;
+}
+
+/* These functions guarantee that the iomem is accessed with 32 bit
+ * cycles only. z_memcpy_fromio / z_memcpy_toio don't
+ */
+static void z_memcpy_fromio32(void *dst, const void __iomem *src, size_t bytes)
+{
+ while (bytes > 32) {
+ asm __volatile__
+ ("movem.l (%0)+,%%d0-%%d7\n"
+ "movem.l %%d0-%%d7,(%1)\n"
+ "adda.l #32,%1" : "=a"(src), "=a"(dst)
+ : "0"(src), "1"(dst) : "d0", "d1", "d2", "d3", "d4",
+ "d5", "d6", "d7", "memory");
+ bytes -= 32;
+ }
+ while (bytes) {
+ *(uint32_t *)dst = z_readl(src);
+ src += 4;
+ dst += 4;
+ bytes -= 4;
+ }
+}
+
+static void z_memcpy_toio32(void __iomem *dst, const void *src, size_t bytes)
+{
+ while (bytes) {
+ z_writel(*(const uint32_t *)src, dst);
+ src += 4;
+ dst += 4;
+ bytes -= 4;
+ }
+}
+
+static void xs100_write(struct net_device *dev, const void *src,
+ unsigned int count)
+{
+ struct ei_device *ei_local = netdev_priv(dev);
+ struct platform_device *pdev = to_platform_device(dev->dev.parent);
+ struct xsurf100_ax_plat_data *xs100 = dev_get_platdata(&pdev->dev);
+
+ /* copy whole blocks */
+ while (count > XS100_8390_DATA_AREA_SIZE) {
+ z_memcpy_toio32(xs100->data_area +
+ XS100_8390_DATA_WRITE32_BASE, src,
+ XS100_8390_DATA_AREA_SIZE);
+ src += XS100_8390_DATA_AREA_SIZE;
+ count -= XS100_8390_DATA_AREA_SIZE;
+ }
+ /* copy whole dwords */
+ z_memcpy_toio32(xs100->data_area + XS100_8390_DATA_WRITE32_BASE,
+ src, count & ~3);
+ src += count & ~3;
+ if (count & 2) {
+ ei_outw(*(uint16_t *)src, ei_local->mem + NE_DATAPORT);
+ src += 2;
+ }
+ if (count & 1)
+ ei_outb(*(uint8_t *)src, ei_local->mem + NE_DATAPORT);
+}
+
+static void xs100_read(struct net_device *dev, void *dst, unsigned int count)
+{
+ struct ei_device *ei_local = netdev_priv(dev);
+ struct platform_device *pdev = to_platform_device(dev->dev.parent);
+ struct xsurf100_ax_plat_data *xs100 = dev_get_platdata(&pdev->dev);
+
+ /* copy whole blocks */
+ while (count > XS100_8390_DATA_AREA_SIZE) {
+ z_memcpy_fromio32(dst, xs100->data_area +
+ XS100_8390_DATA_READ32_BASE,
+ XS100_8390_DATA_AREA_SIZE);
+ dst += XS100_8390_DATA_AREA_SIZE;
+ count -= XS100_8390_DATA_AREA_SIZE;
+ }
+ /* copy whole dwords */
+ z_memcpy_fromio32(dst, xs100->data_area + XS100_8390_DATA_READ32_BASE,
+ count & ~3);
+ dst += count & ~3;
+ if (count & 2) {
+ *(uint16_t *)dst = ei_inw(ei_local->mem + NE_DATAPORT);
+ dst += 2;
+ }
+ if (count & 1)
+ *(uint8_t *)dst = ei_inb(ei_local->mem + NE_DATAPORT);
+}
+
+/* Block input and output, similar to the Crynwr packet driver. If
+ * you are porting to a new ethercard, look at the packet driver
+ * source for hints. The NEx000 doesn't share the on-board packet
+ * memory -- you have to put the packet out through the "remote DMA"
+ * dataport using ei_outb.
+ */
+static void xs100_block_input(struct net_device *dev, int count,
+ struct sk_buff *skb, int ring_offset)
+{
+ struct ei_device *ei_local = netdev_priv(dev);
+ void __iomem *nic_base = ei_local->mem;
+ char *buf = skb->data;
+
+ if (ei_local->dmaing) {
+ netdev_err(dev,
+ "DMAing conflict in %s [DMAstat:%d][irqlock:%d]\n",
+ __func__,
+ ei_local->dmaing, ei_local->irqlock);
+ return;
+ }
+
+ ei_local->dmaing |= 0x01;
+
+ ei_outb(E8390_NODMA + E8390_PAGE0 + E8390_START, nic_base + NE_CMD);
+ ei_outb(count & 0xff, nic_base + EN0_RCNTLO);
+ ei_outb(count >> 8, nic_base + EN0_RCNTHI);
+ ei_outb(ring_offset & 0xff, nic_base + EN0_RSARLO);
+ ei_outb(ring_offset >> 8, nic_base + EN0_RSARHI);
+ ei_outb(E8390_RREAD + E8390_START, nic_base + NE_CMD);
+
+ xs100_read(dev, buf, count);
+
+ ei_local->dmaing &= ~1;
+}
+
+static void xs100_block_output(struct net_device *dev, int count,
+ const unsigned char *buf, const int start_page)
+{
+ struct ei_device *ei_local = netdev_priv(dev);
+ void __iomem *nic_base = ei_local->mem;
+ unsigned long dma_start;
+
+ /* Round the count up for word writes. Do we need to do this?
+ * What effect will an odd byte count have on the 8390? I
+ * should check someday.
+ */
+ if (ei_local->word16 && (count & 0x01))
+ count++;
+
+ /* This *shouldn't* happen. If it does, it's the last thing
+ * you'll see
+ */
+ if (ei_local->dmaing) {
+ netdev_err(dev,
+ "DMAing conflict in %s [DMAstat:%d][irqlock:%d]\n",
+ __func__,
+ ei_local->dmaing, ei_local->irqlock);
+ return;
+ }
+
+ ei_local->dmaing |= 0x01;
+ /* We should already be in page 0, but to be safe... */
+ ei_outb(E8390_PAGE0 + E8390_START + E8390_NODMA, nic_base + NE_CMD);
+
+ ei_outb(ENISR_RDC, nic_base + EN0_ISR);
+
+ /* Now the normal output. */
+ ei_outb(count & 0xff, nic_base + EN0_RCNTLO);
+ ei_outb(count >> 8, nic_base + EN0_RCNTHI);
+ ei_outb(0x00, nic_base + EN0_RSARLO);
+ ei_outb(start_page, nic_base + EN0_RSARHI);
+
+ ei_outb(E8390_RWRITE + E8390_START, nic_base + NE_CMD);
+
+ xs100_write(dev, buf, count);
+
+ dma_start = jiffies;
+
+ while ((ei_inb(nic_base + EN0_ISR) & ENISR_RDC) == 0) {
+ if (jiffies - dma_start > 2 * HZ / 100) { /* 20ms */
+ netdev_warn(dev, "timeout waiting for Tx RDC.\n");
+ ei_local->reset_8390(dev);
+ ax_NS8390_init(dev, 1);
+ break;
+ }
+ }
+
+ ei_outb(ENISR_RDC, nic_base + EN0_ISR); /* Ack intr. */
+ ei_local->dmaing &= ~0x01;
+}
+
+static int xsurf100_probe(struct zorro_dev *zdev,
+ const struct zorro_device_id *ent)
+{
+ struct platform_device *pdev;
+ struct xsurf100_ax_plat_data ax88796_data;
+ struct resource res[2] = {
+ DEFINE_RES_NAMED(IRQ_AMIGA_PORTS, 1, NULL,
+ IORESOURCE_IRQ | IORESOURCE_IRQ_SHAREABLE),
+ DEFINE_RES_MEM(zdev->resource.start + XS100_8390_BASE,
+ 4 * 0x20)
+ };
+ int reg;
+ /* This table is referenced in the device structure, so it must
+ * outlive the scope of xsurf100_probe.
+ */
+ static u32 reg_offsets[32];
+ int ret = 0;
+
+ /* X-Surf 100 control and 32 bit ring buffer data access areas.
+ * These resources are not used by the ax88796 driver, so must
+ * be requested here and passed via platform data.
+ */
+
+ if (!request_mem_region(zdev->resource.start, 0x100, zdev->name)) {
+ dev_err(&zdev->dev, "cannot reserve X-Surf 100 control registers\n");
+ return -ENXIO;
+ }
+
+ if (!request_mem_region(zdev->resource.start +
+ XS100_8390_DATA32_BASE,
+ XS100_8390_DATA32_SIZE,
+ "X-Surf 100 32-bit data access")) {
+ dev_err(&zdev->dev, "cannot reserve 32-bit area\n");
+ ret = -ENXIO;
+ goto exit_req;
+ }
+
+ for (reg = 0; reg < 0x20; reg++)
+ reg_offsets[reg] = 4 * reg;
+
+ memset(&ax88796_data, 0, sizeof(ax88796_data));
+ ax88796_data.ax.flags = AXFLG_HAS_EEPROM;
+ ax88796_data.ax.wordlength = 2;
+ ax88796_data.ax.dcr_val = 0x48;
+ ax88796_data.ax.rcr_val = 0x40;
+ ax88796_data.ax.reg_offsets = reg_offsets;
+ ax88796_data.ax.check_irq = is_xsurf100_network_irq;
+ ax88796_data.base_regs = ioremap(zdev->resource.start, 0x100);
+
+ /* error handling for ioremap regs */
+ if (!ax88796_data.base_regs) {
+ dev_err(&zdev->dev, "Cannot ioremap area %pR (registers)\n",
+ &zdev->resource);
+
+ ret = -ENXIO;
+ goto exit_req2;
+ }
+
+ ax88796_data.data_area = ioremap(zdev->resource.start +
+ XS100_8390_DATA32_BASE, XS100_8390_DATA32_SIZE);
+
+ /* error handling for ioremap data */
+ if (!ax88796_data.data_area) {
+ dev_err(&zdev->dev,
+ "Cannot ioremap area %pR offset %x (32-bit access)\n",
+ &zdev->resource, XS100_8390_DATA32_BASE);
+
+ ret = -ENXIO;
+ goto exit_mem;
+ }
+
+ ax88796_data.ax.block_output = xs100_block_output;
+ ax88796_data.ax.block_input = xs100_block_input;
+
+ pdev = platform_device_register_resndata(&zdev->dev, "ax88796",
+ zdev->slotaddr, res, 2,
+ &ax88796_data,
+ sizeof(ax88796_data));
+
+ if (IS_ERR(pdev)) {
+ dev_err(&zdev->dev, "cannot register platform device\n");
+ ret = -ENXIO;
+ goto exit_mem2;
+ }
+
+ zorro_set_drvdata(zdev, pdev);
+
+ if (!ret)
+ return 0;
+
+ exit_mem2:
+ iounmap(ax88796_data.data_area);
+
+ exit_mem:
+ iounmap(ax88796_data.base_regs);
+
+ exit_req2:
+ release_mem_region(zdev->resource.start + XS100_8390_DATA32_BASE,
+ XS100_8390_DATA32_SIZE);
+
+ exit_req:
+ release_mem_region(zdev->resource.start, 0x100);
+
+ return ret;
+}
+
+static void xsurf100_remove(struct zorro_dev *zdev)
+{
+ struct platform_device *pdev = zorro_get_drvdata(zdev);
+ struct xsurf100_ax_plat_data *xs100 = dev_get_platdata(&pdev->dev);
+
+ platform_device_unregister(pdev);
+
+ iounmap(xs100->base_regs);
+ release_mem_region(zdev->resource.start, 0x100);
+ iounmap(xs100->data_area);
+ release_mem_region(zdev->resource.start + XS100_8390_DATA32_BASE,
+ XS100_8390_DATA32_SIZE);
+}
+
+static const struct zorro_device_id xsurf100_zorro_tbl[] = {
+ { ZORRO_PROD_INDIVIDUAL_COMPUTERS_X_SURF100, },
+ { 0 }
+};
+
+MODULE_DEVICE_TABLE(zorro, xsurf100_zorro_tbl);
+
+static struct zorro_driver xsurf100_driver = {
+ .name = "xsurf100",
+ .id_table = xsurf100_zorro_tbl,
+ .probe = xsurf100_probe,
+ .remove = xsurf100_remove,
+};
+
+module_driver(xsurf100_driver, zorro_register_driver, zorro_unregister_driver);
+
+MODULE_DESCRIPTION("X-Surf 100 driver");
+MODULE_AUTHOR("Michael Karcher <kernel@mkarcher.dialup.fu-berlin.de>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c
index c99e3e845ac0..a90080f12e67 100644
--- a/drivers/net/ethernet/amd/amd8111e.c
+++ b/drivers/net/ethernet/amd/amd8111e.c
@@ -1074,16 +1074,12 @@ static int amd8111e_calc_coalesce(struct net_device *dev)
amd8111e_set_coalesce(dev,TX_INTR_COAL);
coal_conf->tx_coal_type = MEDIUM_COALESCE;
}
-
- }
- else if(tx_pkt_size >= 1024){
- if (tx_pkt_size >= 1024){
- if(coal_conf->tx_coal_type != HIGH_COALESCE){
- coal_conf->tx_timeout = 4;
- coal_conf->tx_event_count = 8;
- amd8111e_set_coalesce(dev,TX_INTR_COAL);
- coal_conf->tx_coal_type = HIGH_COALESCE;
- }
+ } else if (tx_pkt_size >= 1024) {
+ if (coal_conf->tx_coal_type != HIGH_COALESCE) {
+ coal_conf->tx_timeout = 4;
+ coal_conf->tx_event_count = 8;
+ amd8111e_set_coalesce(dev, TX_INTR_COAL);
+ coal_conf->tx_coal_type = HIGH_COALESCE;
}
}
}
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index f33b25fbca63..d5fca2e5a9bc 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -654,7 +654,7 @@ static int bcm_sysport_set_coalesce(struct net_device *dev,
pkts = priv->rx_max_coalesced_frames;
if (ec->use_adaptive_rx_coalesce && !priv->dim.use_dim) {
- moder = net_dim_get_def_profile(priv->dim.dim.mode);
+ moder = net_dim_get_def_rx_moderation(priv->dim.dim.mode);
usecs = moder.usec;
pkts = moder.pkts;
}
@@ -1064,7 +1064,7 @@ static void bcm_sysport_dim_work(struct work_struct *work)
struct bcm_sysport_priv *priv =
container_of(ndim, struct bcm_sysport_priv, dim);
struct net_dim_cq_moder cur_profile =
- net_dim_get_profile(dim->mode, dim->profile_ix);
+ net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
bcm_sysport_set_rx_coalesce(priv, cur_profile.usec, cur_profile.pkts);
dim->state = NET_DIM_START_MEASURE;
@@ -1437,7 +1437,7 @@ static void bcm_sysport_init_rx_coalesce(struct bcm_sysport_priv *priv)
/* If DIM was enabled, re-apply default parameters */
if (dim->use_dim) {
- moder = net_dim_get_def_profile(dim->dim.mode);
+ moder = net_dim_get_def_rx_moderation(dim->dim.mode);
usecs = moder.usec;
pkts = moder.pkts;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/Makefile b/drivers/net/ethernet/broadcom/bnxt/Makefile
index 7c560d545c03..5a779b19d149 100644
--- a/drivers/net/ethernet/broadcom/bnxt/Makefile
+++ b/drivers/net/ethernet/broadcom/bnxt/Makefile
@@ -2,3 +2,4 @@ obj-$(CONFIG_BNXT) += bnxt_en.o
bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_vfr.o bnxt_devlink.o bnxt_dim.o
bnxt_en-$(CONFIG_BNXT_FLOWER_OFFLOAD) += bnxt_tc.o
+bnxt_en-$(CONFIG_DEBUG_FS) += bnxt_debugfs.o
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index f83769d8047b..efe5c7203f60 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -62,6 +62,7 @@
#include "bnxt_vfr.h"
#include "bnxt_tc.h"
#include "bnxt_devlink.h"
+#include "bnxt_debugfs.h"
#define BNXT_TX_TIMEOUT (5 * HZ)
@@ -2383,6 +2384,7 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp)
for (i = 0, j = 0; i < bp->tx_nr_rings; i++) {
struct bnxt_tx_ring_info *txr = &bp->tx_ring[i];
struct bnxt_ring_struct *ring;
+ u8 qidx;
ring = &txr->tx_ring_struct;
@@ -2411,7 +2413,8 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp)
memset(txr->tx_push, 0, sizeof(struct tx_push_bd));
}
- ring->queue_id = bp->q_info[j].queue_id;
+ qidx = bp->tc_to_qidx[j];
+ ring->queue_id = bp->q_info[qidx].queue_id;
if (i < bp->tx_nr_rings_xdp)
continue;
if (i % bp->tx_nr_rings_per_tc == (bp->tx_nr_rings_per_tc - 1))
@@ -3493,15 +3496,29 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
if (!timeout)
timeout = DFLT_HWRM_CMD_TIMEOUT;
+ /* convert timeout to usec */
+ timeout *= 1000;
i = 0;
- tmo_count = timeout * 40;
+ /* Short timeout for the first few iterations:
+ * number of loops = number of loops for short timeout +
+ * number of loops for standard timeout.
+ */
+ tmo_count = HWRM_SHORT_TIMEOUT_COUNTER;
+ timeout = timeout - HWRM_SHORT_MIN_TIMEOUT * HWRM_SHORT_TIMEOUT_COUNTER;
+ tmo_count += DIV_ROUND_UP(timeout, HWRM_MIN_TIMEOUT);
resp_len = bp->hwrm_cmd_resp_addr + HWRM_RESP_LEN_OFFSET;
if (intr_process) {
/* Wait until hwrm response cmpl interrupt is processed */
while (bp->hwrm_intr_seq_id != HWRM_SEQ_ID_INVALID &&
i++ < tmo_count) {
- usleep_range(25, 40);
+ /* on first few passes, just barely sleep */
+ if (i < HWRM_SHORT_TIMEOUT_COUNTER)
+ usleep_range(HWRM_SHORT_MIN_TIMEOUT,
+ HWRM_SHORT_MAX_TIMEOUT);
+ else
+ usleep_range(HWRM_MIN_TIMEOUT,
+ HWRM_MAX_TIMEOUT);
}
if (bp->hwrm_intr_seq_id != HWRM_SEQ_ID_INVALID) {
@@ -3519,7 +3536,13 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
HWRM_RESP_LEN_SFT;
if (len)
break;
- usleep_range(25, 40);
+ /* on first few passes, just barely sleep */
+ if (i < DFLT_HWRM_CMD_TIMEOUT)
+ usleep_range(HWRM_SHORT_MIN_TIMEOUT,
+ HWRM_SHORT_MAX_TIMEOUT);
+ else
+ usleep_range(HWRM_MIN_TIMEOUT,
+ HWRM_MAX_TIMEOUT);
}
if (i >= tmo_count) {
@@ -4334,26 +4357,9 @@ static int hwrm_ring_alloc_send_msg(struct bnxt *bp,
mutex_unlock(&bp->hwrm_cmd_lock);
if (rc || err) {
- switch (ring_type) {
- case RING_FREE_REQ_RING_TYPE_L2_CMPL:
- netdev_err(bp->dev, "hwrm_ring_alloc cp failed. rc:%x err:%x\n",
- rc, err);
- return -1;
-
- case RING_FREE_REQ_RING_TYPE_RX:
- netdev_err(bp->dev, "hwrm_ring_alloc rx failed. rc:%x err:%x\n",
- rc, err);
- return -1;
-
- case RING_FREE_REQ_RING_TYPE_TX:
- netdev_err(bp->dev, "hwrm_ring_alloc tx failed. rc:%x err:%x\n",
- rc, err);
- return -1;
-
- default:
- netdev_err(bp->dev, "Invalid ring\n");
- return -1;
- }
+ netdev_err(bp->dev, "hwrm_ring_alloc type %d failed. rc:%x err:%x\n",
+ ring_type, rc, err);
+ return -EIO;
}
ring->fw_ring_id = ring_id;
return rc;
@@ -4477,23 +4483,9 @@ static int hwrm_ring_free_send_msg(struct bnxt *bp,
mutex_unlock(&bp->hwrm_cmd_lock);
if (rc || error_code) {
- switch (ring_type) {
- case RING_FREE_REQ_RING_TYPE_L2_CMPL:
- netdev_err(bp->dev, "hwrm_ring_free cp failed. rc:%d\n",
- rc);
- return rc;
- case RING_FREE_REQ_RING_TYPE_RX:
- netdev_err(bp->dev, "hwrm_ring_free rx failed. rc:%d\n",
- rc);
- return rc;
- case RING_FREE_REQ_RING_TYPE_TX:
- netdev_err(bp->dev, "hwrm_ring_free tx failed. rc:%d\n",
- rc);
- return rc;
- default:
- netdev_err(bp->dev, "Invalid ring\n");
- return -1;
- }
+ netdev_err(bp->dev, "hwrm_ring_free type %d failed. rc:%x err:%x\n",
+ ring_type, rc, error_code);
+ return -EIO;
}
return 0;
}
@@ -4721,6 +4713,10 @@ bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
__bnxt_hwrm_reserve_vf_rings(bp, &req, tx_rings, rx_rings, ring_grps,
cp_rings, vnics);
+ req.enables |= cpu_to_le32(FUNC_VF_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS |
+ FUNC_VF_CFG_REQ_ENABLES_NUM_L2_CTXS);
+ req.num_rsscos_ctxs = cpu_to_le16(BNXT_VF_MAX_RSS_CTX);
+ req.num_l2_ctxs = cpu_to_le16(BNXT_VF_MAX_L2_CTX);
rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
if (rc)
return -ENOMEM;
@@ -5309,6 +5305,7 @@ static int bnxt_hwrm_queue_qportcfg(struct bnxt *bp)
for (i = 0; i < bp->max_tc; i++) {
bp->q_info[i].queue_id = *qptr++;
bp->q_info[i].queue_profile = *qptr++;
+ bp->tc_to_qidx[i] = i;
}
qportcfg_exit:
@@ -5376,7 +5373,8 @@ int bnxt_hwrm_fw_set_time(struct bnxt *bp)
struct tm tm;
time64_t now = ktime_get_real_seconds();
- if (bp->hwrm_spec_code < 0x10400)
+ if ((BNXT_VF(bp) && bp->hwrm_spec_code < 0x10901) ||
+ bp->hwrm_spec_code < 0x10400)
return -EOPNOTSUPP;
time64_to_tm(now, 0, &tm);
@@ -5958,6 +5956,9 @@ static int bnxt_init_msix(struct bnxt *bp)
if (total_vecs > max)
total_vecs = max;
+ if (!total_vecs)
+ return 0;
+
msix_ent = kcalloc(total_vecs, sizeof(struct msix_entry), GFP_KERNEL);
if (!msix_ent)
return -ENOMEM;
@@ -6843,6 +6844,8 @@ static void bnxt_preset_reg_win(struct bnxt *bp)
}
}
+static int bnxt_init_dflt_ring_mode(struct bnxt *bp);
+
static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
{
int rc = 0;
@@ -6850,6 +6853,12 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
bnxt_preset_reg_win(bp);
netif_carrier_off(bp->dev);
if (irq_re_init) {
+ /* Reserve rings now if none were reserved at driver probe. */
+ rc = bnxt_init_dflt_ring_mode(bp);
+ if (rc) {
+ netdev_err(bp->dev, "Failed to reserve default rings at open\n");
+ return rc;
+ }
rc = bnxt_reserve_rings(bp);
if (rc)
return rc;
@@ -6877,6 +6886,7 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
}
bnxt_enable_napi(bp);
+ bnxt_debug_dev_init(bp);
rc = bnxt_init_nic(bp, irq_re_init);
if (rc) {
@@ -6909,6 +6919,7 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
return 0;
open_err:
+ bnxt_debug_dev_exit(bp);
bnxt_disable_napi(bp);
bnxt_del_napi(bp);
@@ -7002,6 +7013,7 @@ static void __bnxt_close_nic(struct bnxt *bp, bool irq_re_init,
/* TODO CHIMP_FW: Link/PHY related cleanup if (link_re_init) */
+ bnxt_debug_dev_exit(bp);
bnxt_disable_napi(bp);
del_timer_sync(&bp->timer);
bnxt_free_skbs(bp);
@@ -7279,6 +7291,25 @@ skip_uc:
return rc;
}
+static bool bnxt_can_reserve_rings(struct bnxt *bp)
+{
+#ifdef CONFIG_BNXT_SRIOV
+ if ((bp->flags & BNXT_FLAG_NEW_RM) && BNXT_VF(bp)) {
+ struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
+
+ /* No minimum rings were provisioned by the PF. Don't
+ * reserve rings by default when device is down.
+ */
+ if (hw_resc->min_tx_rings || hw_resc->resv_tx_rings)
+ return true;
+
+ if (!netif_running(bp->dev))
+ return false;
+ }
+#endif
+ return true;
+}
+
/* If the chip and firmware supports RFS */
static bool bnxt_rfs_supported(struct bnxt *bp)
{
@@ -7295,7 +7326,7 @@ static bool bnxt_rfs_capable(struct bnxt *bp)
#ifdef CONFIG_RFS_ACCEL
int vnics, max_vnics, max_rss_ctxs;
- if (!(bp->flags & BNXT_FLAG_MSIX_CAP))
+ if (!(bp->flags & BNXT_FLAG_MSIX_CAP) || !bnxt_can_reserve_rings(bp))
return false;
vnics = 1 + bp->rx_nr_rings;
@@ -7729,7 +7760,7 @@ static void bnxt_init_dflt_coal(struct bnxt *bp)
coal->coal_bufs = 30;
coal->coal_ticks_irq = 1;
coal->coal_bufs_irq = 2;
- coal->idle_thresh = 25;
+ coal->idle_thresh = 50;
coal->bufs_per_record = 2;
coal->budget = 64; /* NAPI budget */
@@ -8529,6 +8560,9 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh)
{
int dflt_rings, max_rx_rings, max_tx_rings, rc;
+ if (!bnxt_can_reserve_rings(bp))
+ return 0;
+
if (sh)
bp->flags |= BNXT_FLAG_SHARED_RINGS;
dflt_rings = netif_get_num_default_rss_queues();
@@ -8574,6 +8608,29 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh)
return rc;
}
+static int bnxt_init_dflt_ring_mode(struct bnxt *bp)
+{
+ int rc;
+
+ if (bp->tx_nr_rings)
+ return 0;
+
+ rc = bnxt_set_dflt_rings(bp, true);
+ if (rc) {
+ netdev_err(bp->dev, "Not enough rings available.\n");
+ return rc;
+ }
+ rc = bnxt_init_int_mode(bp);
+ if (rc)
+ return rc;
+ bp->tx_nr_rings_per_tc = bp->tx_nr_rings;
+ if (bnxt_rfs_supported(bp) && bnxt_rfs_capable(bp)) {
+ bp->flags |= BNXT_FLAG_RFS;
+ bp->dev->features |= NETIF_F_NTUPLE;
+ }
+ return 0;
+}
+
int bnxt_restore_pf_fw_resources(struct bnxt *bp)
{
int rc;
@@ -9078,6 +9135,7 @@ static struct pci_driver bnxt_pci_driver = {
static int __init bnxt_init(void)
{
+ bnxt_debug_init();
return pci_register_driver(&bnxt_pci_driver);
}
@@ -9086,6 +9144,7 @@ static void __exit bnxt_exit(void)
pci_unregister_driver(&bnxt_pci_driver);
if (bnxt_pf_wq)
destroy_workqueue(bnxt_pf_wq);
+ bnxt_debug_exit();
}
module_init(bnxt_init);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 3d55d3b56865..8df1d8b9d2e3 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -532,6 +532,12 @@ struct rx_tpa_end_cmp_ext {
#define BNXT_HWRM_REQ_MAX_SIZE 128
#define BNXT_HWRM_REQS_PER_PAGE (BNXT_PAGE_SIZE / \
BNXT_HWRM_REQ_MAX_SIZE)
+#define HWRM_SHORT_MIN_TIMEOUT 3
+#define HWRM_SHORT_MAX_TIMEOUT 10
+#define HWRM_SHORT_TIMEOUT_COUNTER 5
+
+#define HWRM_MIN_TIMEOUT 25
+#define HWRM_MAX_TIMEOUT 40
#define BNXT_RX_EVENT 1
#define BNXT_AGG_EVENT 2
@@ -1242,6 +1248,7 @@ struct bnxt {
u8 max_tc;
u8 max_lltc; /* lossless TCs */
struct bnxt_queue_info q_info[BNXT_MAX_QUEUE];
+ u8 tc_to_qidx[BNXT_MAX_QUEUE];
unsigned int current_interval;
#define BNXT_TIMER_INTERVAL HZ
@@ -1384,6 +1391,8 @@ struct bnxt {
u16 *cfa_code_map; /* cfa_code -> vf_idx map */
u8 switch_id[8];
struct bnxt_tc_info *tc_info;
+ struct dentry *debugfs_pdev;
+ struct dentry *debugfs_dim;
};
#define BNXT_RX_STATS_OFFSET(counter) \
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
index 3c746f2d9ed8..d5bc72cecde3 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
@@ -21,6 +21,21 @@
#include "bnxt_dcb.h"
#ifdef CONFIG_BNXT_DCB
+static int bnxt_queue_to_tc(struct bnxt *bp, u8 queue_id)
+{
+ int i, j;
+
+ for (i = 0; i < bp->max_tc; i++) {
+ if (bp->q_info[i].queue_id == queue_id) {
+ for (j = 0; j < bp->max_tc; j++) {
+ if (bp->tc_to_qidx[j] == i)
+ return j;
+ }
+ }
+ }
+ return -EINVAL;
+}
+
static int bnxt_hwrm_queue_pri2cos_cfg(struct bnxt *bp, struct ieee_ets *ets)
{
struct hwrm_queue_pri2cos_cfg_input req = {0};
@@ -33,10 +48,13 @@ static int bnxt_hwrm_queue_pri2cos_cfg(struct bnxt *bp, struct ieee_ets *ets)
pri2cos = &req.pri0_cos_queue_id;
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ u8 qidx;
+
req.enables |= cpu_to_le32(
QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI0_COS_QUEUE_ID << i);
- pri2cos[i] = bp->q_info[ets->prio_tc[i]].queue_id;
+ qidx = bp->tc_to_qidx[ets->prio_tc[i]];
+ pri2cos[i] = bp->q_info[qidx].queue_id;
}
rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
return rc;
@@ -55,17 +73,15 @@ static int bnxt_hwrm_queue_pri2cos_qcfg(struct bnxt *bp, struct ieee_ets *ets)
rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
if (!rc) {
u8 *pri2cos = &resp->pri0_cos_queue_id;
- int i, j;
+ int i;
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
u8 queue_id = pri2cos[i];
+ int tc;
- for (j = 0; j < bp->max_tc; j++) {
- if (bp->q_info[j].queue_id == queue_id) {
- ets->prio_tc[i] = j;
- break;
- }
- }
+ tc = bnxt_queue_to_tc(bp, queue_id);
+ if (tc >= 0)
+ ets->prio_tc[i] = tc;
}
}
mutex_unlock(&bp->hwrm_cmd_lock);
@@ -81,13 +97,15 @@ static int bnxt_hwrm_queue_cos2bw_cfg(struct bnxt *bp, struct ieee_ets *ets,
void *data;
bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_COS2BW_CFG, -1, -1);
- data = &req.unused_0;
- for (i = 0; i < max_tc; i++, data += sizeof(cos2bw) - 4) {
+ for (i = 0; i < max_tc; i++) {
+ u8 qidx;
+
req.enables |= cpu_to_le32(
QUEUE_COS2BW_CFG_REQ_ENABLES_COS_QUEUE_ID0_VALID << i);
memset(&cos2bw, 0, sizeof(cos2bw));
- cos2bw.queue_id = bp->q_info[i].queue_id;
+ qidx = bp->tc_to_qidx[i];
+ cos2bw.queue_id = bp->q_info[qidx].queue_id;
if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_STRICT) {
cos2bw.tsa =
QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_TSA_ASSIGN_SP;
@@ -103,8 +121,9 @@ static int bnxt_hwrm_queue_cos2bw_cfg(struct bnxt *bp, struct ieee_ets *ets,
cpu_to_le32((ets->tc_tx_bw[i] * 100) |
BW_VALUE_UNIT_PERCENT1_100);
}
+ data = &req.unused_0 + qidx * (sizeof(cos2bw) - 4);
memcpy(data, &cos2bw.queue_id, sizeof(cos2bw) - 4);
- if (i == 0) {
+ if (qidx == 0) {
req.queue_id0 = cos2bw.queue_id;
req.unused_0 = 0;
}
@@ -132,66 +151,81 @@ static int bnxt_hwrm_queue_cos2bw_qcfg(struct bnxt *bp, struct ieee_ets *ets)
data = &resp->queue_id0 + offsetof(struct bnxt_cos2bw_cfg, queue_id);
for (i = 0; i < bp->max_tc; i++, data += sizeof(cos2bw) - 4) {
- int j;
+ int tc;
memcpy(&cos2bw.queue_id, data, sizeof(cos2bw) - 4);
if (i == 0)
cos2bw.queue_id = resp->queue_id0;
- for (j = 0; j < bp->max_tc; j++) {
- if (bp->q_info[j].queue_id != cos2bw.queue_id)
- continue;
- if (cos2bw.tsa ==
- QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_TSA_ASSIGN_SP) {
- ets->tc_tsa[j] = IEEE_8021QAZ_TSA_STRICT;
- } else {
- ets->tc_tsa[j] = IEEE_8021QAZ_TSA_ETS;
- ets->tc_tx_bw[j] = cos2bw.bw_weight;
- }
+ tc = bnxt_queue_to_tc(bp, cos2bw.queue_id);
+ if (tc < 0)
+ continue;
+
+ if (cos2bw.tsa ==
+ QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_TSA_ASSIGN_SP) {
+ ets->tc_tsa[tc] = IEEE_8021QAZ_TSA_STRICT;
+ } else {
+ ets->tc_tsa[tc] = IEEE_8021QAZ_TSA_ETS;
+ ets->tc_tx_bw[tc] = cos2bw.bw_weight;
}
}
mutex_unlock(&bp->hwrm_cmd_lock);
return 0;
}
-static int bnxt_hwrm_queue_cfg(struct bnxt *bp, unsigned int lltc_mask)
+static int bnxt_queue_remap(struct bnxt *bp, unsigned int lltc_mask)
{
- struct hwrm_queue_cfg_input req = {0};
- int i;
+ unsigned long qmap = 0;
+ int max = bp->max_tc;
+ int i, j, rc;
- if (netif_running(bp->dev))
- bnxt_tx_disable(bp);
+ /* Assign lossless TCs first */
+ for (i = 0, j = 0; i < max; ) {
+ if (lltc_mask & (1 << i)) {
+ if (BNXT_LLQ(bp->q_info[j].queue_profile)) {
+ bp->tc_to_qidx[i] = j;
+ __set_bit(j, &qmap);
+ i++;
+ }
+ j++;
+ continue;
+ }
+ i++;
+ }
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_CFG, -1, -1);
- req.flags = cpu_to_le32(QUEUE_CFG_REQ_FLAGS_PATH_BIDIR);
- req.enables = cpu_to_le32(QUEUE_CFG_REQ_ENABLES_SERVICE_PROFILE);
+ for (i = 0, j = 0; i < max; i++) {
+ if (lltc_mask & (1 << i))
+ continue;
+ j = find_next_zero_bit(&qmap, max, j);
+ bp->tc_to_qidx[i] = j;
+ __set_bit(j, &qmap);
+ j++;
+ }
- /* Configure lossless queues to lossy first */
- req.service_profile = QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSY;
- for (i = 0; i < bp->max_tc; i++) {
- if (BNXT_LLQ(bp->q_info[i].queue_profile)) {
- req.queue_id = cpu_to_le32(bp->q_info[i].queue_id);
- hwrm_send_message(bp, &req, sizeof(req),
- HWRM_CMD_TIMEOUT);
- bp->q_info[i].queue_profile =
- QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSY;
+ if (netif_running(bp->dev)) {
+ bnxt_close_nic(bp, false, false);
+ rc = bnxt_open_nic(bp, false, false);
+ if (rc) {
+ netdev_warn(bp->dev, "failed to open NIC, rc = %d\n", rc);
+ return rc;
}
}
-
- /* Now configure desired queues to lossless */
- req.service_profile = QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSLESS;
- for (i = 0; i < bp->max_tc; i++) {
- if (lltc_mask & (1 << i)) {
- req.queue_id = cpu_to_le32(bp->q_info[i].queue_id);
- hwrm_send_message(bp, &req, sizeof(req),
- HWRM_CMD_TIMEOUT);
- bp->q_info[i].queue_profile =
- QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSLESS;
+ if (bp->ieee_ets) {
+ int tc = netdev_get_num_tc(bp->dev);
+
+ if (!tc)
+ tc = 1;
+ rc = bnxt_hwrm_queue_cos2bw_cfg(bp, bp->ieee_ets, tc);
+ if (rc) {
+ netdev_warn(bp->dev, "failed to config BW, rc = %d\n", rc);
+ return rc;
+ }
+ rc = bnxt_hwrm_queue_pri2cos_cfg(bp, bp->ieee_ets);
+ if (rc) {
+ netdev_warn(bp->dev, "failed to config prio, rc = %d\n", rc);
+ return rc;
}
}
- if (netif_running(bp->dev))
- bnxt_tx_enable(bp);
-
return 0;
}
@@ -201,7 +235,7 @@ static int bnxt_hwrm_queue_pfc_cfg(struct bnxt *bp, struct ieee_pfc *pfc)
struct ieee_ets *my_ets = bp->ieee_ets;
unsigned int tc_mask = 0, pri_mask = 0;
u8 i, pri, lltc_count = 0;
- bool need_q_recfg = false;
+ bool need_q_remap = false;
int rc;
if (!my_ets)
@@ -221,21 +255,25 @@ static int bnxt_hwrm_queue_pfc_cfg(struct bnxt *bp, struct ieee_pfc *pfc)
if (lltc_count > bp->max_lltc)
return -EINVAL;
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_PFCENABLE_CFG, -1, -1);
- req.flags = cpu_to_le32(pri_mask);
- rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
- if (rc)
- return rc;
-
for (i = 0; i < bp->max_tc; i++) {
if (tc_mask & (1 << i)) {
- if (!BNXT_LLQ(bp->q_info[i].queue_profile))
- need_q_recfg = true;
+ u8 qidx = bp->tc_to_qidx[i];
+
+ if (!BNXT_LLQ(bp->q_info[qidx].queue_profile)) {
+ need_q_remap = true;
+ break;
+ }
}
}
- if (need_q_recfg)
- rc = bnxt_hwrm_queue_cfg(bp, tc_mask);
+ if (need_q_remap)
+ rc = bnxt_queue_remap(bp, tc_mask);
+
+ bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_PFCENABLE_CFG, -1, -1);
+ req.flags = cpu_to_le32(pri_mask);
+ rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ if (rc)
+ return rc;
return rc;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c
new file mode 100644
index 000000000000..94e208e9789f
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c
@@ -0,0 +1,124 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2017-2018 Broadcom Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include "bnxt_hsi.h"
+#include <linux/net_dim.h>
+#include "bnxt.h"
+#include "bnxt_debugfs.h"
+
+static struct dentry *bnxt_debug_mnt;
+
+static ssize_t debugfs_dim_read(struct file *filep,
+ char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ struct net_dim *dim = filep->private_data;
+ int len;
+ char *buf;
+
+ if (*ppos)
+ return 0;
+ if (!dim)
+ return -ENODEV;
+ buf = kasprintf(GFP_KERNEL,
+ "state = %d\n" \
+ "profile_ix = %d\n" \
+ "mode = %d\n" \
+ "tune_state = %d\n" \
+ "steps_right = %d\n" \
+ "steps_left = %d\n" \
+ "tired = %d\n",
+ dim->state,
+ dim->profile_ix,
+ dim->mode,
+ dim->tune_state,
+ dim->steps_right,
+ dim->steps_left,
+ dim->tired);
+ if (!buf)
+ return -ENOMEM;
+ if (count < strlen(buf)) {
+ kfree(buf);
+ return -ENOSPC;
+ }
+ len = simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
+ kfree(buf);
+ return len;
+}
+
+static const struct file_operations debugfs_dim_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = debugfs_dim_read,
+};
+
+static struct dentry *debugfs_dim_ring_init(struct net_dim *dim, int ring_idx,
+ struct dentry *dd)
+{
+ static char qname[16];
+
+ snprintf(qname, 10, "%d", ring_idx);
+ return debugfs_create_file(qname, 0600, dd,
+ dim, &debugfs_dim_fops);
+}
+
+void bnxt_debug_dev_init(struct bnxt *bp)
+{
+ const char *pname = pci_name(bp->pdev);
+ struct dentry *pdevf;
+ int i;
+
+ bp->debugfs_pdev = debugfs_create_dir(pname, bnxt_debug_mnt);
+ if (bp->debugfs_pdev) {
+ pdevf = debugfs_create_dir("dim", bp->debugfs_pdev);
+ if (!pdevf) {
+ pr_err("failed to create debugfs entry %s/dim\n",
+ pname);
+ return;
+ }
+ bp->debugfs_dim = pdevf;
+ /* create files for each rx ring */
+ for (i = 0; i < bp->cp_nr_rings; i++) {
+ struct bnxt_cp_ring_info *cpr = &bp->bnapi[i]->cp_ring;
+
+ if (cpr && bp->bnapi[i]->rx_ring) {
+ pdevf = debugfs_dim_ring_init(&cpr->dim, i,
+ bp->debugfs_dim);
+ if (!pdevf)
+ pr_err("failed to create debugfs entry %s/dim/%d\n",
+ pname, i);
+ }
+ }
+ } else {
+ pr_err("failed to create debugfs entry %s\n", pname);
+ }
+}
+
+void bnxt_debug_dev_exit(struct bnxt *bp)
+{
+ if (bp) {
+ debugfs_remove_recursive(bp->debugfs_pdev);
+ bp->debugfs_pdev = NULL;
+ }
+}
+
+void bnxt_debug_init(void)
+{
+ bnxt_debug_mnt = debugfs_create_dir("bnxt_en", NULL);
+ if (!bnxt_debug_mnt)
+ pr_err("failed to init bnxt_en debugfs\n");
+}
+
+void bnxt_debug_exit(void)
+{
+ debugfs_remove_recursive(bnxt_debug_mnt);
+}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.h
new file mode 100644
index 000000000000..d0bb4887acd0
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.h
@@ -0,0 +1,23 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2017-2018 Broadcom Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+
+#include "bnxt_hsi.h"
+#include "bnxt.h"
+
+#ifdef CONFIG_DEBUG_FS
+void bnxt_debug_init(void);
+void bnxt_debug_exit(void);
+void bnxt_debug_dev_init(struct bnxt *bp);
+void bnxt_debug_dev_exit(struct bnxt *bp);
+#else
+static inline void bnxt_debug_init(void) {}
+static inline void bnxt_debug_exit(void) {}
+static inline void bnxt_debug_dev_init(struct bnxt *bp) {}
+static inline void bnxt_debug_dev_exit(struct bnxt *bp) {}
+#endif
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c
index 408dd190331e..afa97c8bb081 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c
@@ -21,11 +21,11 @@ void bnxt_dim_work(struct work_struct *work)
struct bnxt_napi *bnapi = container_of(cpr,
struct bnxt_napi,
cp_ring);
- struct net_dim_cq_moder cur_profile = net_dim_get_profile(dim->mode,
- dim->profile_ix);
+ struct net_dim_cq_moder cur_moder =
+ net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
- cpr->rx_ring_coal.coal_ticks = cur_profile.usec;
- cpr->rx_ring_coal.coal_bufs = cur_profile.pkts;
+ cpr->rx_ring_coal.coal_ticks = cur_moder.usec;
+ cpr->rx_ring_coal.coal_bufs = cur_moder.pkts;
bnxt_hwrm_set_ring_coal(bnapi->bp, bnapi);
dim->state = NET_DIM_START_MEASURE;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 8ba14ae00e8f..ad98b78f5aa1 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -140,6 +140,19 @@ reset_coalesce:
#define BNXT_RX_STATS_EXT_ENTRY(counter) \
{ BNXT_RX_STATS_EXT_OFFSET(counter), __stringify(counter) }
+enum {
+ RX_TOTAL_DISCARDS,
+ TX_TOTAL_DISCARDS,
+};
+
+static struct {
+ u64 counter;
+ char string[ETH_GSTRING_LEN];
+} bnxt_sw_func_stats[] = {
+ {0, "rx_total_discard_pkts"},
+ {0, "tx_total_discard_pkts"},
+};
+
static const struct {
long offset;
char string[ETH_GSTRING_LEN];
@@ -237,6 +250,7 @@ static const struct {
BNXT_RX_STATS_EXT_ENTRY(resume_roce_pause_events),
};
+#define BNXT_NUM_SW_FUNC_STATS ARRAY_SIZE(bnxt_sw_func_stats)
#define BNXT_NUM_PORT_STATS ARRAY_SIZE(bnxt_port_stats_arr)
#define BNXT_NUM_PORT_STATS_EXT ARRAY_SIZE(bnxt_port_stats_ext_arr)
@@ -244,6 +258,8 @@ static int bnxt_get_num_stats(struct bnxt *bp)
{
int num_stats = BNXT_NUM_STATS * bp->cp_nr_rings;
+ num_stats += BNXT_NUM_SW_FUNC_STATS;
+
if (bp->flags & BNXT_FLAG_PORT_STATS)
num_stats += BNXT_NUM_PORT_STATS;
@@ -279,6 +295,9 @@ static void bnxt_get_ethtool_stats(struct net_device *dev,
if (!bp->bnapi)
return;
+ for (i = 0; i < BNXT_NUM_SW_FUNC_STATS; i++)
+ bnxt_sw_func_stats[i].counter = 0;
+
for (i = 0; i < bp->cp_nr_rings; i++) {
struct bnxt_napi *bnapi = bp->bnapi[i];
struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
@@ -288,7 +307,16 @@ static void bnxt_get_ethtool_stats(struct net_device *dev,
for (k = 0; k < stat_fields; j++, k++)
buf[j] = le64_to_cpu(hw_stats[k]);
buf[j++] = cpr->rx_l4_csum_errors;
+
+ bnxt_sw_func_stats[RX_TOTAL_DISCARDS].counter +=
+ le64_to_cpu(cpr->hw_stats->rx_discard_pkts);
+ bnxt_sw_func_stats[TX_TOTAL_DISCARDS].counter +=
+ le64_to_cpu(cpr->hw_stats->tx_discard_pkts);
}
+
+ for (i = 0; i < BNXT_NUM_SW_FUNC_STATS; i++, j++)
+ buf[j] = bnxt_sw_func_stats[i].counter;
+
if (bp->flags & BNXT_FLAG_PORT_STATS) {
__le64 *port_stats = (__le64 *)bp->hw_rx_port_stats;
@@ -359,6 +387,11 @@ static void bnxt_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
sprintf(buf, "[%d]: rx_l4_csum_errors", i);
buf += ETH_GSTRING_LEN;
}
+ for (i = 0; i < BNXT_NUM_SW_FUNC_STATS; i++) {
+ strcpy(buf, bnxt_sw_func_stats[i].string);
+ buf += ETH_GSTRING_LEN;
+ }
+
if (bp->flags & BNXT_FLAG_PORT_STATS) {
for (i = 0; i < BNXT_NUM_PORT_STATS; i++) {
strcpy(buf, bnxt_port_stats_arr[i].string);
@@ -551,6 +584,8 @@ static int bnxt_set_channels(struct net_device *dev,
* to renable
*/
}
+ } else {
+ rc = bnxt_reserve_rings(bp);
}
return rc;
@@ -1785,6 +1820,11 @@ static int nvm_get_dir_info(struct net_device *dev, u32 *entries, u32 *length)
static int bnxt_get_eeprom_len(struct net_device *dev)
{
+ struct bnxt *bp = netdev_priv(dev);
+
+ if (BNXT_VF(bp))
+ return 0;
+
/* The -1 return value allows the entire 32-bit range of offsets to be
* passed via the ethtool command-line utility.
*/
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index f952963d594e..cc21d874eb6e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -462,13 +462,13 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs)
vf_vnics = hw_resc->max_vnics - bp->nr_vnics;
vf_vnics = min_t(u16, vf_vnics, vf_rx_rings);
- req.min_rsscos_ctx = cpu_to_le16(1);
- req.max_rsscos_ctx = cpu_to_le16(1);
+ req.min_rsscos_ctx = cpu_to_le16(BNXT_VF_MIN_RSS_CTX);
+ req.max_rsscos_ctx = cpu_to_le16(BNXT_VF_MAX_RSS_CTX);
if (pf->vf_resv_strategy == BNXT_VF_RESV_STRATEGY_MINIMAL) {
req.min_cmpl_rings = cpu_to_le16(1);
req.min_tx_rings = cpu_to_le16(1);
req.min_rx_rings = cpu_to_le16(1);
- req.min_l2_ctxs = cpu_to_le16(1);
+ req.min_l2_ctxs = cpu_to_le16(BNXT_VF_MIN_L2_CTX);
req.min_vnics = cpu_to_le16(1);
req.min_stat_ctx = cpu_to_le16(1);
req.min_hw_ring_grps = cpu_to_le16(1);
@@ -483,7 +483,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs)
req.min_cmpl_rings = cpu_to_le16(vf_cp_rings);
req.min_tx_rings = cpu_to_le16(vf_tx_rings);
req.min_rx_rings = cpu_to_le16(vf_rx_rings);
- req.min_l2_ctxs = cpu_to_le16(4);
+ req.min_l2_ctxs = cpu_to_le16(BNXT_VF_MAX_L2_CTX);
req.min_vnics = cpu_to_le16(vf_vnics);
req.min_stat_ctx = cpu_to_le16(vf_stat_ctx);
req.min_hw_ring_grps = cpu_to_le16(vf_ring_grps);
@@ -491,7 +491,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs)
req.max_cmpl_rings = cpu_to_le16(vf_cp_rings);
req.max_tx_rings = cpu_to_le16(vf_tx_rings);
req.max_rx_rings = cpu_to_le16(vf_rx_rings);
- req.max_l2_ctxs = cpu_to_le16(4);
+ req.max_l2_ctxs = cpu_to_le16(BNXT_VF_MAX_L2_CTX);
req.max_vnics = cpu_to_le16(vf_vnics);
req.max_stat_ctx = cpu_to_le16(vf_stat_ctx);
req.max_hw_ring_grps = cpu_to_le16(vf_ring_grps);
@@ -809,6 +809,9 @@ static int bnxt_hwrm_fwd_resp(struct bnxt *bp, struct bnxt_vf_info *vf,
struct hwrm_fwd_resp_input req = {0};
struct hwrm_fwd_resp_output *resp = bp->hwrm_cmd_resp_addr;
+ if (BNXT_FWD_RESP_SIZE_ERR(msg_size))
+ return -EINVAL;
+
bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FWD_RESP, -1, -1);
/* Set the new target id */
@@ -845,6 +848,9 @@ static int bnxt_hwrm_fwd_err_resp(struct bnxt *bp, struct bnxt_vf_info *vf,
struct hwrm_reject_fwd_resp_input req = {0};
struct hwrm_reject_fwd_resp_output *resp = bp->hwrm_cmd_resp_addr;
+ if (BNXT_REJ_FWD_RESP_SIZE_ERR(msg_size))
+ return -EINVAL;
+
bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_REJECT_FWD_RESP, -1, -1);
/* Set the new target id */
req.target_id = cpu_to_le16(vf->fw_fid);
@@ -877,6 +883,9 @@ static int bnxt_hwrm_exec_fwd_resp(struct bnxt *bp, struct bnxt_vf_info *vf,
struct hwrm_exec_fwd_resp_input req = {0};
struct hwrm_exec_fwd_resp_output *resp = bp->hwrm_cmd_resp_addr;
+ if (BNXT_EXEC_FWD_RESP_SIZE_ERR(msg_size))
+ return -EINVAL;
+
bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_EXEC_FWD_RESP, -1, -1);
/* Set the new target id */
req.target_id = cpu_to_le16(vf->fw_fid);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
index d10f6f6c7860..e9b20cd19881 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
@@ -11,6 +11,23 @@
#ifndef BNXT_SRIOV_H
#define BNXT_SRIOV_H
+#define BNXT_FWD_RESP_SIZE_ERR(n) \
+ ((offsetof(struct hwrm_fwd_resp_input, encap_resp) + n) > \
+ sizeof(struct hwrm_fwd_resp_input))
+
+#define BNXT_EXEC_FWD_RESP_SIZE_ERR(n) \
+ ((offsetof(struct hwrm_exec_fwd_resp_input, encap_request) + n) >\
+ offsetof(struct hwrm_exec_fwd_resp_input, encap_resp_target_id))
+
+#define BNXT_REJ_FWD_RESP_SIZE_ERR(n) \
+ ((offsetof(struct hwrm_reject_fwd_resp_input, encap_request) + n) >\
+ offsetof(struct hwrm_reject_fwd_resp_input, encap_resp_target_id))
+
+#define BNXT_VF_MIN_RSS_CTX 1
+#define BNXT_VF_MAX_RSS_CTX 1
+#define BNXT_VF_MIN_L2_CTX 1
+#define BNXT_VF_MAX_L2_CTX 4
+
int bnxt_get_vf_config(struct net_device *, int, struct ifla_vf_info *);
int bnxt_set_vf_mac(struct net_device *, int, u8 *);
int bnxt_set_vf_vlan(struct net_device *, int, u16, u8, __be16);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index 1389ab5e05df..1f0e872d0667 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -113,10 +113,10 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
if (tx_avail != bp->tx_ring_size)
*event &= ~BNXT_RX_EVENT;
+ *len = xdp.data_end - xdp.data;
if (orig_data != xdp.data) {
offset = xdp.data - xdp.data_hard_start;
*data_ptr = xdp.data_hard_start + offset;
- *len = xdp.data_end - xdp.data;
}
switch (act) {
case XDP_PASS:
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 0445f2c0c629..20c1681bb1af 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -652,7 +652,7 @@ static void bcmgenet_set_ring_rx_coalesce(struct bcmgenet_rx_ring *ring,
pkts = ring->rx_max_coalesced_frames;
if (ec->use_adaptive_rx_coalesce && !ring->dim.use_dim) {
- moder = net_dim_get_def_profile(ring->dim.dim.mode);
+ moder = net_dim_get_def_rx_moderation(ring->dim.dim.mode);
usecs = moder.usec;
pkts = moder.pkts;
}
@@ -1925,7 +1925,7 @@ static void bcmgenet_dim_work(struct work_struct *work)
struct bcmgenet_rx_ring *ring =
container_of(ndim, struct bcmgenet_rx_ring, dim);
struct net_dim_cq_moder cur_profile =
- net_dim_get_profile(dim->mode, dim->profile_ix);
+ net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
bcmgenet_set_rx_coalesce(ring, cur_profile.usec, cur_profile.pkts);
dim->state = NET_DIM_START_MEASURE;
@@ -2102,7 +2102,7 @@ static void bcmgenet_init_rx_coalesce(struct bcmgenet_rx_ring *ring)
/* If DIM was enabled, re-apply default parameters */
if (dim->use_dim) {
- moder = net_dim_get_def_profile(dim->dim.mode);
+ moder = net_dim_get_def_rx_moderation(dim->dim.mode);
usecs = moder.usec;
pkts = moder.pkts;
}
diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c
index e8b290473ee2..929d485a3a2f 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c
@@ -1245,7 +1245,7 @@ static void cn23xx_setup_reg_address(struct octeon_device *oct)
CN23XX_SLI_MAC_PF_INT_ENB64(oct->pcie_port, oct->pf_num);
}
-static int cn23xx_sriov_config(struct octeon_device *oct)
+int cn23xx_sriov_config(struct octeon_device *oct)
{
struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip;
u32 max_rings, total_rings, max_vfs, rings_per_vf;
@@ -1269,8 +1269,8 @@ static int cn23xx_sriov_config(struct octeon_device *oct)
break;
}
- if (max_rings <= num_present_cpus())
- num_pf_rings = 1;
+ if (oct->sriov_info.num_pf_rings)
+ num_pf_rings = oct->sriov_info.num_pf_rings;
else
num_pf_rings = num_present_cpus();
@@ -1497,3 +1497,57 @@ void cn23xx_tell_vf_its_macaddr_changed(struct octeon_device *oct, int vfidx,
octeon_mbox_write(oct, &mbox_cmd);
}
}
+
+static void
+cn23xx_get_vf_stats_callback(struct octeon_device *oct,
+ struct octeon_mbox_cmd *cmd, void *arg)
+{
+ struct oct_vf_stats_ctx *ctx = arg;
+
+ memcpy(ctx->stats, cmd->data, sizeof(struct oct_vf_stats));
+ atomic_set(&ctx->status, 1);
+}
+
+int cn23xx_get_vf_stats(struct octeon_device *oct, int vfidx,
+ struct oct_vf_stats *stats)
+{
+ u32 timeout = HZ; // 1sec
+ struct octeon_mbox_cmd mbox_cmd;
+ struct oct_vf_stats_ctx ctx;
+ u32 count = 0, ret;
+
+ if (!(oct->sriov_info.vf_drv_loaded_mask & (1ULL << vfidx)))
+ return -1;
+
+ if (sizeof(struct oct_vf_stats) > sizeof(mbox_cmd.data))
+ return -1;
+
+ mbox_cmd.msg.u64 = 0;
+ mbox_cmd.msg.s.type = OCTEON_MBOX_REQUEST;
+ mbox_cmd.msg.s.resp_needed = 1;
+ mbox_cmd.msg.s.cmd = OCTEON_GET_VF_STATS;
+ mbox_cmd.msg.s.len = 1;
+ mbox_cmd.q_no = vfidx * oct->sriov_info.rings_per_vf;
+ mbox_cmd.recv_len = 0;
+ mbox_cmd.recv_status = 0;
+ mbox_cmd.fn = (octeon_mbox_callback_t)cn23xx_get_vf_stats_callback;
+ ctx.stats = stats;
+ atomic_set(&ctx.status, 0);
+ mbox_cmd.fn_arg = (void *)&ctx;
+ memset(mbox_cmd.data, 0, sizeof(mbox_cmd.data));
+ octeon_mbox_write(oct, &mbox_cmd);
+
+ do {
+ schedule_timeout_uninterruptible(1);
+ } while ((atomic_read(&ctx.status) == 0) && (count++ < timeout));
+
+ ret = atomic_read(&ctx.status);
+ if (ret == 0) {
+ octeon_mbox_cancel(oct, 0);
+ dev_err(&oct->pci_dev->dev, "Unable to get stats from VF-%d, timedout\n",
+ vfidx);
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h
index 2aba5247b6d8..e6f31d0d5c0b 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h
@@ -43,6 +43,15 @@ struct octeon_cn23xx_pf {
#define CN23XX_SLI_DEF_BP 0x40
+struct oct_vf_stats {
+ u64 rx_packets;
+ u64 tx_packets;
+ u64 rx_bytes;
+ u64 tx_bytes;
+ u64 broadcast;
+ u64 multicast;
+};
+
int setup_cn23xx_octeon_pf_device(struct octeon_device *oct);
int validate_cn23xx_pf_config_info(struct octeon_device *oct,
@@ -52,8 +61,13 @@ u32 cn23xx_pf_get_oq_ticks(struct octeon_device *oct, u32 time_intr_in_us);
void cn23xx_dump_pf_initialized_regs(struct octeon_device *oct);
+int cn23xx_sriov_config(struct octeon_device *oct);
+
int cn23xx_fw_loaded(struct octeon_device *oct);
void cn23xx_tell_vf_its_macaddr_changed(struct octeon_device *oct, int vfidx,
u8 *mac);
+
+int cn23xx_get_vf_stats(struct octeon_device *oct, int ifidx,
+ struct oct_vf_stats *stats);
#endif
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c
index 2a94eee943b2..6821afcdc365 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_core.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c
@@ -29,6 +29,162 @@
/* OOM task polling interval */
#define LIO_OOM_POLL_INTERVAL_MS 250
+#define OCTNIC_MAX_SG MAX_SKB_FRAGS
+
+/**
+ * \brief Callback for getting interface configuration
+ * @param status status of request
+ * @param buf pointer to resp structure
+ */
+void lio_if_cfg_callback(struct octeon_device *oct,
+ u32 status __attribute__((unused)), void *buf)
+{
+ struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
+ struct liquidio_if_cfg_context *ctx;
+ struct liquidio_if_cfg_resp *resp;
+
+ resp = (struct liquidio_if_cfg_resp *)sc->virtrptr;
+ ctx = (struct liquidio_if_cfg_context *)sc->ctxptr;
+
+ oct = lio_get_device(ctx->octeon_id);
+ if (resp->status)
+ dev_err(&oct->pci_dev->dev, "nic if cfg instruction failed. Status: %llx\n",
+ CVM_CAST64(resp->status));
+ WRITE_ONCE(ctx->cond, 1);
+
+ snprintf(oct->fw_info.liquidio_firmware_version, 32, "%s",
+ resp->cfg_info.liquidio_firmware_version);
+
+ /* This barrier is required to be sure that the response has been
+ * written fully before waking up the handler
+ */
+ wmb();
+
+ wake_up_interruptible(&ctx->wc);
+}
+
+/**
+ * \brief Delete gather lists
+ * @param lio per-network private data
+ */
+void lio_delete_glists(struct lio *lio)
+{
+ struct octnic_gather *g;
+ int i;
+
+ kfree(lio->glist_lock);
+ lio->glist_lock = NULL;
+
+ if (!lio->glist)
+ return;
+
+ for (i = 0; i < lio->oct_dev->num_iqs; i++) {
+ do {
+ g = (struct octnic_gather *)
+ lio_list_delete_head(&lio->glist[i]);
+ kfree(g);
+ } while (g);
+
+ if (lio->glists_virt_base && lio->glists_virt_base[i] &&
+ lio->glists_dma_base && lio->glists_dma_base[i]) {
+ lio_dma_free(lio->oct_dev,
+ lio->glist_entry_size * lio->tx_qsize,
+ lio->glists_virt_base[i],
+ lio->glists_dma_base[i]);
+ }
+ }
+
+ kfree(lio->glists_virt_base);
+ lio->glists_virt_base = NULL;
+
+ kfree(lio->glists_dma_base);
+ lio->glists_dma_base = NULL;
+
+ kfree(lio->glist);
+ lio->glist = NULL;
+}
+
+/**
+ * \brief Setup gather lists
+ * @param lio per-network private data
+ */
+int lio_setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs)
+{
+ struct octnic_gather *g;
+ int i, j;
+
+ lio->glist_lock =
+ kcalloc(num_iqs, sizeof(*lio->glist_lock), GFP_KERNEL);
+ if (!lio->glist_lock)
+ return -ENOMEM;
+
+ lio->glist =
+ kcalloc(num_iqs, sizeof(*lio->glist), GFP_KERNEL);
+ if (!lio->glist) {
+ kfree(lio->glist_lock);
+ lio->glist_lock = NULL;
+ return -ENOMEM;
+ }
+
+ lio->glist_entry_size =
+ ROUNDUP8((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE);
+
+ /* allocate memory to store virtual and dma base address of
+ * per glist consistent memory
+ */
+ lio->glists_virt_base = kcalloc(num_iqs, sizeof(*lio->glists_virt_base),
+ GFP_KERNEL);
+ lio->glists_dma_base = kcalloc(num_iqs, sizeof(*lio->glists_dma_base),
+ GFP_KERNEL);
+
+ if (!lio->glists_virt_base || !lio->glists_dma_base) {
+ lio_delete_glists(lio);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < num_iqs; i++) {
+ int numa_node = dev_to_node(&oct->pci_dev->dev);
+
+ spin_lock_init(&lio->glist_lock[i]);
+
+ INIT_LIST_HEAD(&lio->glist[i]);
+
+ lio->glists_virt_base[i] =
+ lio_dma_alloc(oct,
+ lio->glist_entry_size * lio->tx_qsize,
+ &lio->glists_dma_base[i]);
+
+ if (!lio->glists_virt_base[i]) {
+ lio_delete_glists(lio);
+ return -ENOMEM;
+ }
+
+ for (j = 0; j < lio->tx_qsize; j++) {
+ g = kzalloc_node(sizeof(*g), GFP_KERNEL,
+ numa_node);
+ if (!g)
+ g = kzalloc(sizeof(*g), GFP_KERNEL);
+ if (!g)
+ break;
+
+ g->sg = lio->glists_virt_base[i] +
+ (j * lio->glist_entry_size);
+
+ g->sg_dma_ptr = lio->glists_dma_base[i] +
+ (j * lio->glist_entry_size);
+
+ list_add_tail(&g->list, &lio->glist[i]);
+ }
+
+ if (j != lio->tx_qsize) {
+ lio_delete_glists(lio);
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1)
{
struct lio *lio = GET_LIO(netdev);
@@ -880,8 +1036,8 @@ int octeon_setup_interrupt(struct octeon_device *oct, u32 num_ioqs)
int num_ioq_vectors;
int irqret, err;
- oct->num_msix_irqs = num_ioqs;
if (oct->msix_on) {
+ oct->num_msix_irqs = num_ioqs;
if (OCTEON_CN23XX_PF(oct)) {
num_interrupts = MAX_IOQ_INTERRUPTS_PER_PF + 1;
@@ -1169,3 +1325,159 @@ int lio_wait_for_clean_oq(struct octeon_device *oct)
return pending_pkts;
}
+
+static void
+octnet_nic_stats_callback(struct octeon_device *oct_dev,
+ u32 status, void *ptr)
+{
+ struct octeon_soft_command *sc = (struct octeon_soft_command *)ptr;
+ struct oct_nic_stats_resp *resp =
+ (struct oct_nic_stats_resp *)sc->virtrptr;
+ struct oct_nic_stats_ctrl *ctrl =
+ (struct oct_nic_stats_ctrl *)sc->ctxptr;
+ struct nic_rx_stats *rsp_rstats = &resp->stats.fromwire;
+ struct nic_tx_stats *rsp_tstats = &resp->stats.fromhost;
+ struct nic_rx_stats *rstats = &oct_dev->link_stats.fromwire;
+ struct nic_tx_stats *tstats = &oct_dev->link_stats.fromhost;
+
+ if (status != OCTEON_REQUEST_TIMEOUT && !resp->status) {
+ octeon_swap_8B_data((u64 *)&resp->stats,
+ (sizeof(struct oct_link_stats)) >> 3);
+
+ /* RX link-level stats */
+ rstats->total_rcvd = rsp_rstats->total_rcvd;
+ rstats->bytes_rcvd = rsp_rstats->bytes_rcvd;
+ rstats->total_bcst = rsp_rstats->total_bcst;
+ rstats->total_mcst = rsp_rstats->total_mcst;
+ rstats->runts = rsp_rstats->runts;
+ rstats->ctl_rcvd = rsp_rstats->ctl_rcvd;
+ /* Accounts for over/under-run of buffers */
+ rstats->fifo_err = rsp_rstats->fifo_err;
+ rstats->dmac_drop = rsp_rstats->dmac_drop;
+ rstats->fcs_err = rsp_rstats->fcs_err;
+ rstats->jabber_err = rsp_rstats->jabber_err;
+ rstats->l2_err = rsp_rstats->l2_err;
+ rstats->frame_err = rsp_rstats->frame_err;
+ rstats->red_drops = rsp_rstats->red_drops;
+
+ /* RX firmware stats */
+ rstats->fw_total_rcvd = rsp_rstats->fw_total_rcvd;
+ rstats->fw_total_fwd = rsp_rstats->fw_total_fwd;
+ rstats->fw_total_mcast = rsp_rstats->fw_total_mcast;
+ rstats->fw_total_bcast = rsp_rstats->fw_total_bcast;
+ rstats->fw_err_pko = rsp_rstats->fw_err_pko;
+ rstats->fw_err_link = rsp_rstats->fw_err_link;
+ rstats->fw_err_drop = rsp_rstats->fw_err_drop;
+ rstats->fw_rx_vxlan = rsp_rstats->fw_rx_vxlan;
+ rstats->fw_rx_vxlan_err = rsp_rstats->fw_rx_vxlan_err;
+
+ /* Number of packets that are LROed */
+ rstats->fw_lro_pkts = rsp_rstats->fw_lro_pkts;
+ /* Number of octets that are LROed */
+ rstats->fw_lro_octs = rsp_rstats->fw_lro_octs;
+ /* Number of LRO packets formed */
+ rstats->fw_total_lro = rsp_rstats->fw_total_lro;
+ /* Number of times lRO of packet aborted */
+ rstats->fw_lro_aborts = rsp_rstats->fw_lro_aborts;
+ rstats->fw_lro_aborts_port = rsp_rstats->fw_lro_aborts_port;
+ rstats->fw_lro_aborts_seq = rsp_rstats->fw_lro_aborts_seq;
+ rstats->fw_lro_aborts_tsval = rsp_rstats->fw_lro_aborts_tsval;
+ rstats->fw_lro_aborts_timer = rsp_rstats->fw_lro_aborts_timer;
+ /* intrmod: packet forward rate */
+ rstats->fwd_rate = rsp_rstats->fwd_rate;
+
+ /* TX link-level stats */
+ tstats->total_pkts_sent = rsp_tstats->total_pkts_sent;
+ tstats->total_bytes_sent = rsp_tstats->total_bytes_sent;
+ tstats->mcast_pkts_sent = rsp_tstats->mcast_pkts_sent;
+ tstats->bcast_pkts_sent = rsp_tstats->bcast_pkts_sent;
+ tstats->ctl_sent = rsp_tstats->ctl_sent;
+ /* Packets sent after one collision*/
+ tstats->one_collision_sent = rsp_tstats->one_collision_sent;
+ /* Packets sent after multiple collision*/
+ tstats->multi_collision_sent = rsp_tstats->multi_collision_sent;
+ /* Packets not sent due to max collisions */
+ tstats->max_collision_fail = rsp_tstats->max_collision_fail;
+ /* Packets not sent due to max deferrals */
+ tstats->max_deferral_fail = rsp_tstats->max_deferral_fail;
+ /* Accounts for over/under-run of buffers */
+ tstats->fifo_err = rsp_tstats->fifo_err;
+ tstats->runts = rsp_tstats->runts;
+ /* Total number of collisions detected */
+ tstats->total_collisions = rsp_tstats->total_collisions;
+
+ /* firmware stats */
+ tstats->fw_total_sent = rsp_tstats->fw_total_sent;
+ tstats->fw_total_fwd = rsp_tstats->fw_total_fwd;
+ tstats->fw_total_mcast_sent = rsp_tstats->fw_total_mcast_sent;
+ tstats->fw_total_bcast_sent = rsp_tstats->fw_total_bcast_sent;
+ tstats->fw_err_pko = rsp_tstats->fw_err_pko;
+ tstats->fw_err_pki = rsp_tstats->fw_err_pki;
+ tstats->fw_err_link = rsp_tstats->fw_err_link;
+ tstats->fw_err_drop = rsp_tstats->fw_err_drop;
+ tstats->fw_tso = rsp_tstats->fw_tso;
+ tstats->fw_tso_fwd = rsp_tstats->fw_tso_fwd;
+ tstats->fw_err_tso = rsp_tstats->fw_err_tso;
+ tstats->fw_tx_vxlan = rsp_tstats->fw_tx_vxlan;
+
+ resp->status = 1;
+ } else {
+ resp->status = -1;
+ }
+ complete(&ctrl->complete);
+}
+
+int octnet_get_link_stats(struct net_device *netdev)
+{
+ struct lio *lio = GET_LIO(netdev);
+ struct octeon_device *oct_dev = lio->oct_dev;
+ struct octeon_soft_command *sc;
+ struct oct_nic_stats_ctrl *ctrl;
+ struct oct_nic_stats_resp *resp;
+ int retval;
+
+ /* Alloc soft command */
+ sc = (struct octeon_soft_command *)
+ octeon_alloc_soft_command(oct_dev,
+ 0,
+ sizeof(struct oct_nic_stats_resp),
+ sizeof(struct octnic_ctrl_pkt));
+
+ if (!sc)
+ return -ENOMEM;
+
+ resp = (struct oct_nic_stats_resp *)sc->virtrptr;
+ memset(resp, 0, sizeof(struct oct_nic_stats_resp));
+
+ ctrl = (struct oct_nic_stats_ctrl *)sc->ctxptr;
+ memset(ctrl, 0, sizeof(struct oct_nic_stats_ctrl));
+ ctrl->netdev = netdev;
+ init_completion(&ctrl->complete);
+
+ sc->iq_no = lio->linfo.txpciq[0].s.q_no;
+
+ octeon_prepare_soft_command(oct_dev, sc, OPCODE_NIC,
+ OPCODE_NIC_PORT_STATS, 0, 0, 0);
+
+ sc->callback = octnet_nic_stats_callback;
+ sc->callback_arg = sc;
+ sc->wait_time = 500; /*in milli seconds*/
+
+ retval = octeon_send_soft_command(oct_dev, sc);
+ if (retval == IQ_SEND_FAILED) {
+ octeon_free_soft_command(oct_dev, sc);
+ return -EINVAL;
+ }
+
+ wait_for_completion_timeout(&ctrl->complete, msecs_to_jiffies(1000));
+
+ if (resp->status != 1) {
+ octeon_free_soft_command(oct_dev, sc);
+
+ return -EINVAL;
+ }
+
+ octeon_free_soft_command(oct_dev, sc);
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
index 550ac29682a5..a1d84304a608 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
@@ -32,7 +32,6 @@
#include "cn23xx_vf_device.h"
static int lio_reset_queues(struct net_device *netdev, uint32_t num_qs);
-static int octnet_get_link_stats(struct net_device *netdev);
struct oct_intrmod_context {
int octeon_id;
@@ -96,11 +95,9 @@ static const char oct_stats_strings[][ETH_GSTRING_LEN] = {
"tx_packets",
"rx_bytes",
"tx_bytes",
- "rx_errors", /*jabber_err+l2_err+frame_err */
- "tx_errors", /*fw_err_pko+fw_err_link+fw_err_drop */
- "rx_dropped", /*st->fromwire.total_rcvd - st->fromwire.fw_total_rcvd +
- *st->fromwire.dmac_drop + st->fromwire.fw_err_drop
- */
+ "rx_errors",
+ "tx_errors",
+ "rx_dropped",
"tx_dropped",
"tx_total_sent",
@@ -115,14 +112,17 @@ static const char oct_stats_strings[][ETH_GSTRING_LEN] = {
"tx_tso_err",
"tx_vxlan",
+ "tx_mcast",
+ "tx_bcast",
+
"mac_tx_total_pkts",
"mac_tx_total_bytes",
"mac_tx_mcast_pkts",
"mac_tx_bcast_pkts",
- "mac_tx_ctl_packets", /*oct->link_stats.fromhost.ctl_sent */
+ "mac_tx_ctl_packets",
"mac_tx_total_collisions",
"mac_tx_one_collision",
- "mac_tx_multi_collison",
+ "mac_tx_multi_collision",
"mac_tx_max_collision_fail",
"mac_tx_max_deferal_fail",
"mac_tx_fifo_err",
@@ -130,6 +130,8 @@ static const char oct_stats_strings[][ETH_GSTRING_LEN] = {
"rx_total_rcvd",
"rx_total_fwd",
+ "rx_mcast",
+ "rx_bcast",
"rx_jabber_err",
"rx_l2_err",
"rx_frame_err",
@@ -170,17 +172,21 @@ static const char oct_vf_stats_strings[][ETH_GSTRING_LEN] = {
"tx_packets",
"rx_bytes",
"tx_bytes",
- "rx_errors", /* jabber_err + l2_err+frame_err */
- "tx_errors", /* fw_err_pko + fw_err_link+fw_err_drop */
- "rx_dropped", /* total_rcvd - fw_total_rcvd + dmac_drop + fw_err_drop */
+ "rx_errors",
+ "tx_errors",
+ "rx_dropped",
"tx_dropped",
+ "rx_mcast",
+ "tx_mcast",
+ "rx_bcast",
+ "tx_bcast",
"link_state_changes",
};
/* statistics of host tx queue */
static const char oct_iq_stats_strings[][ETH_GSTRING_LEN] = {
- "packets", /*oct->instr_queue[iq_no]->stats.tx_done*/
- "bytes", /*oct->instr_queue[iq_no]->stats.tx_tot_bytes*/
+ "packets",
+ "bytes",
"dropped",
"iq_busy",
"sgentry_sent",
@@ -197,13 +203,9 @@ static const char oct_iq_stats_strings[][ETH_GSTRING_LEN] = {
/* statistics of host rx queue */
static const char oct_droq_stats_strings[][ETH_GSTRING_LEN] = {
- "packets", /*oct->droq[oq_no]->stats.rx_pkts_received */
- "bytes", /*oct->droq[oq_no]->stats.rx_bytes_received */
- "dropped", /*oct->droq[oq_no]->stats.rx_dropped+
- *oct->droq[oq_no]->stats.dropped_nodispatch+
- *oct->droq[oq_no]->stats.dropped_toomany+
- *oct->droq[oq_no]->stats.dropped_nomem
- */
+ "packets",
+ "bytes",
+ "dropped",
"dropped_nomem",
"dropped_toomany",
"fw_dropped",
@@ -359,7 +361,14 @@ lio_ethtool_get_channels(struct net_device *dev,
rx_count = CFG_GET_NUM_RXQS_NIC_IF(conf6x, lio->ifidx);
tx_count = CFG_GET_NUM_TXQS_NIC_IF(conf6x, lio->ifidx);
} else if (OCTEON_CN23XX_PF(oct)) {
- max_combined = lio->linfo.num_txpciq;
+ if (oct->sriov_info.sriov_enabled) {
+ max_combined = lio->linfo.num_txpciq;
+ } else {
+ struct octeon_config *conf23_pf =
+ CHIP_CONF(oct, cn23xx_pf);
+
+ max_combined = CFG_GET_IQ_MAX_Q(conf23_pf);
+ }
combined_count = oct->num_iqs;
} else if (OCTEON_CN23XX_VF(oct)) {
u64 reg_val = 0ULL;
@@ -423,9 +432,15 @@ lio_irq_reallocate_irqs(struct octeon_device *oct, uint32_t num_ioqs)
kfree(oct->irq_name_storage);
oct->irq_name_storage = NULL;
+
+ if (octeon_allocate_ioq_vector(oct, num_ioqs)) {
+ dev_err(&oct->pci_dev->dev, "OCTEON: ioq vector allocation failed\n");
+ return -1;
+ }
+
if (octeon_setup_interrupt(oct, num_ioqs)) {
dev_info(&oct->pci_dev->dev, "Setup interrupt failed\n");
- return 1;
+ return -1;
}
/* Enable Octeon device interrupts */
@@ -455,7 +470,16 @@ lio_ethtool_set_channels(struct net_device *dev,
combined_count = channel->combined_count;
if (OCTEON_CN23XX_PF(oct)) {
- max_combined = channel->max_combined;
+ if (oct->sriov_info.sriov_enabled) {
+ max_combined = lio->linfo.num_txpciq;
+ } else {
+ struct octeon_config *conf23_pf =
+ CHIP_CONF(oct,
+ cn23xx_pf);
+
+ max_combined =
+ CFG_GET_IQ_MAX_Q(conf23_pf);
+ }
} else if (OCTEON_CN23XX_VF(oct)) {
u64 reg_val = 0ULL;
u64 ctrl = CN23XX_VF_SLI_IQ_PKT_CONTROL64(0);
@@ -483,7 +507,6 @@ lio_ethtool_set_channels(struct net_device *dev,
if (lio_reset_queues(dev, combined_count))
return -EINVAL;
- lio_irq_reallocate_irqs(oct, combined_count);
if (stopped)
dev->netdev_ops->ndo_open(dev);
@@ -822,12 +845,120 @@ lio_ethtool_get_ringparam(struct net_device *netdev,
ering->rx_jumbo_max_pending = 0;
}
+static int lio_23xx_reconfigure_queue_count(struct lio *lio)
+{
+ struct octeon_device *oct = lio->oct_dev;
+ struct liquidio_if_cfg_context *ctx;
+ u32 resp_size, ctx_size, data_size;
+ struct liquidio_if_cfg_resp *resp;
+ struct octeon_soft_command *sc;
+ union oct_nic_if_cfg if_cfg;
+ struct lio_version *vdata;
+ u32 ifidx_or_pfnum;
+ int retval;
+ int j;
+
+ resp_size = sizeof(struct liquidio_if_cfg_resp);
+ ctx_size = sizeof(struct liquidio_if_cfg_context);
+ data_size = sizeof(struct lio_version);
+ sc = (struct octeon_soft_command *)
+ octeon_alloc_soft_command(oct, data_size,
+ resp_size, ctx_size);
+ if (!sc) {
+ dev_err(&oct->pci_dev->dev, "%s: Failed to allocate soft command\n",
+ __func__);
+ return -1;
+ }
+
+ resp = (struct liquidio_if_cfg_resp *)sc->virtrptr;
+ ctx = (struct liquidio_if_cfg_context *)sc->ctxptr;
+ vdata = (struct lio_version *)sc->virtdptr;
+
+ vdata->major = (__force u16)cpu_to_be16(LIQUIDIO_BASE_MAJOR_VERSION);
+ vdata->minor = (__force u16)cpu_to_be16(LIQUIDIO_BASE_MINOR_VERSION);
+ vdata->micro = (__force u16)cpu_to_be16(LIQUIDIO_BASE_MICRO_VERSION);
+
+ ifidx_or_pfnum = oct->pf_num;
+ WRITE_ONCE(ctx->cond, 0);
+ ctx->octeon_id = lio_get_device_id(oct);
+ init_waitqueue_head(&ctx->wc);
+
+ if_cfg.u64 = 0;
+ if_cfg.s.num_iqueues = oct->sriov_info.num_pf_rings;
+ if_cfg.s.num_oqueues = oct->sriov_info.num_pf_rings;
+ if_cfg.s.base_queue = oct->sriov_info.pf_srn;
+ if_cfg.s.gmx_port_id = oct->pf_num;
+
+ sc->iq_no = 0;
+ octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
+ OPCODE_NIC_QCOUNT_UPDATE, 0,
+ if_cfg.u64, 0);
+ sc->callback = lio_if_cfg_callback;
+ sc->callback_arg = sc;
+ sc->wait_time = LIO_IFCFG_WAIT_TIME;
+
+ retval = octeon_send_soft_command(oct, sc);
+ if (retval == IQ_SEND_FAILED) {
+ dev_err(&oct->pci_dev->dev,
+ "iq/oq config failed status: %x\n",
+ retval);
+ goto qcount_update_fail;
+ }
+
+ if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR) {
+ dev_err(&oct->pci_dev->dev, "Wait interrupted\n");
+ return -1;
+ }
+
+ retval = resp->status;
+ if (retval) {
+ dev_err(&oct->pci_dev->dev, "iq/oq config failed\n");
+ goto qcount_update_fail;
+ }
+
+ octeon_swap_8B_data((u64 *)(&resp->cfg_info),
+ (sizeof(struct liquidio_if_cfg_info)) >> 3);
+
+ lio->ifidx = ifidx_or_pfnum;
+ lio->linfo.num_rxpciq = hweight64(resp->cfg_info.iqmask);
+ lio->linfo.num_txpciq = hweight64(resp->cfg_info.iqmask);
+ for (j = 0; j < lio->linfo.num_rxpciq; j++) {
+ lio->linfo.rxpciq[j].u64 =
+ resp->cfg_info.linfo.rxpciq[j].u64;
+ }
+
+ for (j = 0; j < lio->linfo.num_txpciq; j++) {
+ lio->linfo.txpciq[j].u64 =
+ resp->cfg_info.linfo.txpciq[j].u64;
+ }
+
+ lio->linfo.hw_addr = resp->cfg_info.linfo.hw_addr;
+ lio->linfo.gmxport = resp->cfg_info.linfo.gmxport;
+ lio->linfo.link.u64 = resp->cfg_info.linfo.link.u64;
+ lio->txq = lio->linfo.txpciq[0].s.q_no;
+ lio->rxq = lio->linfo.rxpciq[0].s.q_no;
+
+ octeon_free_soft_command(oct, sc);
+ dev_info(&oct->pci_dev->dev, "Queue count updated to %d\n",
+ lio->linfo.num_rxpciq);
+
+ return 0;
+
+qcount_update_fail:
+ octeon_free_soft_command(oct, sc);
+
+ return -1;
+}
+
static int lio_reset_queues(struct net_device *netdev, uint32_t num_qs)
{
struct lio *lio = GET_LIO(netdev);
struct octeon_device *oct = lio->oct_dev;
+ int i, queue_count_update = 0;
struct napi_struct *napi, *n;
- int i, update = 0;
+ int ret;
+
+ schedule_timeout_uninterruptible(msecs_to_jiffies(100));
if (wait_for_pending_requests(oct))
dev_err(&oct->pci_dev->dev, "There were pending requests\n");
@@ -836,7 +967,7 @@ static int lio_reset_queues(struct net_device *netdev, uint32_t num_qs)
dev_err(&oct->pci_dev->dev, "IQ had pending instructions\n");
if (octeon_set_io_queues_off(oct)) {
- dev_err(&oct->pci_dev->dev, "setting io queues off failed\n");
+ dev_err(&oct->pci_dev->dev, "Setting io queues off failed\n");
return -1;
}
@@ -849,9 +980,40 @@ static int lio_reset_queues(struct net_device *netdev, uint32_t num_qs)
netif_napi_del(napi);
if (num_qs != oct->num_iqs) {
- netif_set_real_num_rx_queues(netdev, num_qs);
- netif_set_real_num_tx_queues(netdev, num_qs);
- update = 1;
+ ret = netif_set_real_num_rx_queues(netdev, num_qs);
+ if (ret) {
+ dev_err(&oct->pci_dev->dev,
+ "Setting real number rx failed\n");
+ return ret;
+ }
+
+ ret = netif_set_real_num_tx_queues(netdev, num_qs);
+ if (ret) {
+ dev_err(&oct->pci_dev->dev,
+ "Setting real number tx failed\n");
+ return ret;
+ }
+
+ /* The value of queue_count_update decides whether it is the
+ * queue count or the descriptor count that is being
+ * re-configured.
+ */
+ queue_count_update = 1;
+ }
+
+ /* Re-configuration of queues can happen in two scenarios, SRIOV enabled
+ * and SRIOV disabled. Few things like recreating queue zero, resetting
+ * glists and IRQs are required for both. For the latter, some more
+ * steps like updating sriov_info for the octeon device need to be done.
+ */
+ if (queue_count_update) {
+ lio_delete_glists(lio);
+
+ /* Delete mbox for PF which is SRIOV disabled because sriov_info
+ * will be now changed.
+ */
+ if ((OCTEON_CN23XX_PF(oct)) && !oct->sriov_info.sriov_enabled)
+ oct->fn_list.free_mbox(oct);
}
for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) {
@@ -866,24 +1028,91 @@ static int lio_reset_queues(struct net_device *netdev, uint32_t num_qs)
octeon_delete_instr_queue(oct, i);
}
+ if (queue_count_update) {
+ /* For PF re-configure sriov related information */
+ if ((OCTEON_CN23XX_PF(oct)) &&
+ !oct->sriov_info.sriov_enabled) {
+ oct->sriov_info.num_pf_rings = num_qs;
+ if (cn23xx_sriov_config(oct)) {
+ dev_err(&oct->pci_dev->dev,
+ "Queue reset aborted: SRIOV config failed\n");
+ return -1;
+ }
+
+ num_qs = oct->sriov_info.num_pf_rings;
+ }
+ }
+
if (oct->fn_list.setup_device_regs(oct)) {
dev_err(&oct->pci_dev->dev, "Failed to configure device registers\n");
return -1;
}
- if (liquidio_setup_io_queues(oct, 0, num_qs, num_qs)) {
- dev_err(&oct->pci_dev->dev, "IO queues initialization failed\n");
- return -1;
+ /* The following are needed in case of queue count re-configuration and
+ * not for descriptor count re-configuration.
+ */
+ if (queue_count_update) {
+ if (octeon_setup_instr_queues(oct))
+ return -1;
+
+ if (octeon_setup_output_queues(oct))
+ return -1;
+
+ /* Recreating mbox for PF that is SRIOV disabled */
+ if (OCTEON_CN23XX_PF(oct) && !oct->sriov_info.sriov_enabled) {
+ if (oct->fn_list.setup_mbox(oct)) {
+ dev_err(&oct->pci_dev->dev, "Mailbox setup failed\n");
+ return -1;
+ }
+ }
+
+ /* Deleting and recreating IRQs whether the interface is SRIOV
+ * enabled or disabled.
+ */
+ if (lio_irq_reallocate_irqs(oct, num_qs)) {
+ dev_err(&oct->pci_dev->dev, "IRQs could not be allocated\n");
+ return -1;
+ }
+
+ /* Enable the input and output queues for this Octeon device */
+ if (oct->fn_list.enable_io_queues(oct)) {
+ dev_err(&oct->pci_dev->dev, "Failed to enable input/output queues\n");
+ return -1;
+ }
+
+ for (i = 0; i < oct->num_oqs; i++)
+ writel(oct->droq[i]->max_count,
+ oct->droq[i]->pkts_credit_reg);
+
+ /* Informing firmware about the new queue count. It is required
+ * for firmware to allocate more number of queues than those at
+ * load time.
+ */
+ if (OCTEON_CN23XX_PF(oct) && !oct->sriov_info.sriov_enabled) {
+ if (lio_23xx_reconfigure_queue_count(lio))
+ return -1;
+ }
}
- /* Enable the input and output queues for this Octeon device */
- if (oct->fn_list.enable_io_queues(oct)) {
- dev_err(&oct->pci_dev->dev, "Failed to enable input/output queues");
+ /* Once firmware is aware of the new value, queues can be recreated */
+ if (liquidio_setup_io_queues(oct, 0, num_qs, num_qs)) {
+ dev_err(&oct->pci_dev->dev, "I/O queues creation failed\n");
return -1;
}
- if (update && lio_send_queue_count_update(netdev, num_qs))
- return -1;
+ if (queue_count_update) {
+ if (lio_setup_glists(oct, lio, num_qs)) {
+ dev_err(&oct->pci_dev->dev, "Gather list allocation failed\n");
+ return -1;
+ }
+
+ /* Send firmware the information about new number of queues
+ * if the interface is a VF or a PF that is SRIOV enabled.
+ */
+ if (oct->sriov_info.sriov_enabled || OCTEON_CN23XX_VF(oct))
+ if (lio_send_queue_count_update(netdev, num_qs))
+ return -1;
+ }
return 0;
}
@@ -928,7 +1157,7 @@ static int lio_ethtool_set_ringparam(struct net_device *netdev,
CFG_SET_NUM_RX_DESCS_NIC_IF(octeon_get_conf(oct), lio->ifidx,
rx_count);
- if (lio_reset_queues(netdev, lio->linfo.num_txpciq))
+ if (lio_reset_queues(netdev, oct->num_iqs))
goto err_lio_reset_queues;
if (stopped)
@@ -1063,33 +1292,48 @@ lio_get_ethtool_stats(struct net_device *netdev,
{
struct lio *lio = GET_LIO(netdev);
struct octeon_device *oct_dev = lio->oct_dev;
- struct net_device_stats *netstats = &netdev->stats;
+ struct rtnl_link_stats64 lstats;
int i = 0, j;
if (ifstate_check(lio, LIO_IFSTATE_RESETTING))
return;
- netdev->netdev_ops->ndo_get_stats(netdev);
- octnet_get_link_stats(netdev);
-
+ netdev->netdev_ops->ndo_get_stats64(netdev, &lstats);
/*sum of oct->droq[oq_no]->stats->rx_pkts_received */
- data[i++] = CVM_CAST64(netstats->rx_packets);
+ data[i++] = lstats.rx_packets;
/*sum of oct->instr_queue[iq_no]->stats.tx_done */
- data[i++] = CVM_CAST64(netstats->tx_packets);
+ data[i++] = lstats.tx_packets;
/*sum of oct->droq[oq_no]->stats->rx_bytes_received */
- data[i++] = CVM_CAST64(netstats->rx_bytes);
+ data[i++] = lstats.rx_bytes;
/*sum of oct->instr_queue[iq_no]->stats.tx_tot_bytes */
- data[i++] = CVM_CAST64(netstats->tx_bytes);
- data[i++] = CVM_CAST64(netstats->rx_errors);
- data[i++] = CVM_CAST64(netstats->tx_errors);
+ data[i++] = lstats.tx_bytes;
+ data[i++] = lstats.rx_errors +
+ oct_dev->link_stats.fromwire.fcs_err +
+ oct_dev->link_stats.fromwire.jabber_err +
+ oct_dev->link_stats.fromwire.l2_err +
+ oct_dev->link_stats.fromwire.frame_err;
+ data[i++] = lstats.tx_errors;
/*sum of oct->droq[oq_no]->stats->rx_dropped +
*oct->droq[oq_no]->stats->dropped_nodispatch +
*oct->droq[oq_no]->stats->dropped_toomany +
*oct->droq[oq_no]->stats->dropped_nomem
*/
- data[i++] = CVM_CAST64(netstats->rx_dropped);
+ data[i++] = lstats.rx_dropped +
+ oct_dev->link_stats.fromwire.fifo_err +
+ oct_dev->link_stats.fromwire.dmac_drop +
+ oct_dev->link_stats.fromwire.red_drops +
+ oct_dev->link_stats.fromwire.fw_err_pko +
+ oct_dev->link_stats.fromwire.fw_err_link +
+ oct_dev->link_stats.fromwire.fw_err_drop;
/*sum of oct->instr_queue[iq_no]->stats.tx_dropped */
- data[i++] = CVM_CAST64(netstats->tx_dropped);
+ data[i++] = lstats.tx_dropped +
+ oct_dev->link_stats.fromhost.max_collision_fail +
+ oct_dev->link_stats.fromhost.max_deferral_fail +
+ oct_dev->link_stats.fromhost.total_collisions +
+ oct_dev->link_stats.fromhost.fw_err_pko +
+ oct_dev->link_stats.fromhost.fw_err_link +
+ oct_dev->link_stats.fromhost.fw_err_drop +
+ oct_dev->link_stats.fromhost.fw_err_pki;
/* firmware tx stats */
/*per_core_stats[cvmx_get_core_num()].link_stats[mdata->from_ifidx].
@@ -1124,6 +1368,10 @@ lio_get_ethtool_stats(struct net_device *netdev,
*/
data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_tx_vxlan);
+ /* Multicast packets sent by this port */
+ data[i++] = oct_dev->link_stats.fromhost.fw_total_mcast_sent;
+ data[i++] = oct_dev->link_stats.fromhost.fw_total_bcast_sent;
+
/* mac tx statistics */
/*CVMX_BGXX_CMRX_TX_STAT5 */
data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.total_pkts_sent);
@@ -1160,6 +1408,9 @@ lio_get_ethtool_stats(struct net_device *netdev,
*fw_total_fwd
*/
data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.fw_total_fwd);
+ /* Multicast packets received on this port */
+ data[i++] = oct_dev->link_stats.fromwire.fw_total_mcast;
+ data[i++] = oct_dev->link_stats.fromwire.fw_total_bcast;
/*per_core_stats[core_id].link_stats[ifidx].fromwire.jabber_err */
data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.jabber_err);
/*per_core_stats[core_id].link_stats[ifidx].fromwire.l2_err */
@@ -1328,7 +1579,7 @@ static void lio_vf_get_ethtool_stats(struct net_device *netdev,
__attribute__((unused)),
u64 *data)
{
- struct net_device_stats *netstats = &netdev->stats;
+ struct rtnl_link_stats64 lstats;
struct lio *lio = GET_LIO(netdev);
struct octeon_device *oct_dev = lio->oct_dev;
int i = 0, j, vj;
@@ -1336,25 +1587,31 @@ static void lio_vf_get_ethtool_stats(struct net_device *netdev,
if (ifstate_check(lio, LIO_IFSTATE_RESETTING))
return;
- netdev->netdev_ops->ndo_get_stats(netdev);
+ netdev->netdev_ops->ndo_get_stats64(netdev, &lstats);
/* sum of oct->droq[oq_no]->stats->rx_pkts_received */
- data[i++] = CVM_CAST64(netstats->rx_packets);
+ data[i++] = lstats.rx_packets;
/* sum of oct->instr_queue[iq_no]->stats.tx_done */
- data[i++] = CVM_CAST64(netstats->tx_packets);
+ data[i++] = lstats.tx_packets;
/* sum of oct->droq[oq_no]->stats->rx_bytes_received */
- data[i++] = CVM_CAST64(netstats->rx_bytes);
+ data[i++] = lstats.rx_bytes;
/* sum of oct->instr_queue[iq_no]->stats.tx_tot_bytes */
- data[i++] = CVM_CAST64(netstats->tx_bytes);
- data[i++] = CVM_CAST64(netstats->rx_errors);
- data[i++] = CVM_CAST64(netstats->tx_errors);
+ data[i++] = lstats.tx_bytes;
+ data[i++] = lstats.rx_errors;
+ data[i++] = lstats.tx_errors;
/* sum of oct->droq[oq_no]->stats->rx_dropped +
* oct->droq[oq_no]->stats->dropped_nodispatch +
* oct->droq[oq_no]->stats->dropped_toomany +
* oct->droq[oq_no]->stats->dropped_nomem
*/
- data[i++] = CVM_CAST64(netstats->rx_dropped);
+ data[i++] = lstats.rx_dropped;
/* sum of oct->instr_queue[iq_no]->stats.tx_dropped */
- data[i++] = CVM_CAST64(netstats->tx_dropped);
+ data[i++] = lstats.tx_dropped;
+
+ data[i++] = oct_dev->link_stats.fromwire.fw_total_mcast;
+ data[i++] = oct_dev->link_stats.fromhost.fw_total_mcast_sent;
+ data[i++] = oct_dev->link_stats.fromwire.fw_total_bcast;
+ data[i++] = oct_dev->link_stats.fromhost.fw_total_bcast_sent;
+
/* lio->link_changes */
data[i++] = CVM_CAST64(lio->link_changes);
@@ -1765,161 +2022,6 @@ static int octnet_set_intrmod_cfg(struct lio *lio,
return -EINTR;
}
-static void
-octnet_nic_stats_callback(struct octeon_device *oct_dev,
- u32 status, void *ptr)
-{
- struct octeon_soft_command *sc = (struct octeon_soft_command *)ptr;
- struct oct_nic_stats_resp *resp =
- (struct oct_nic_stats_resp *)sc->virtrptr;
- struct oct_nic_stats_ctrl *ctrl =
- (struct oct_nic_stats_ctrl *)sc->ctxptr;
- struct nic_rx_stats *rsp_rstats = &resp->stats.fromwire;
- struct nic_tx_stats *rsp_tstats = &resp->stats.fromhost;
-
- struct nic_rx_stats *rstats = &oct_dev->link_stats.fromwire;
- struct nic_tx_stats *tstats = &oct_dev->link_stats.fromhost;
-
- if ((status != OCTEON_REQUEST_TIMEOUT) && !resp->status) {
- octeon_swap_8B_data((u64 *)&resp->stats,
- (sizeof(struct oct_link_stats)) >> 3);
-
- /* RX link-level stats */
- rstats->total_rcvd = rsp_rstats->total_rcvd;
- rstats->bytes_rcvd = rsp_rstats->bytes_rcvd;
- rstats->total_bcst = rsp_rstats->total_bcst;
- rstats->total_mcst = rsp_rstats->total_mcst;
- rstats->runts = rsp_rstats->runts;
- rstats->ctl_rcvd = rsp_rstats->ctl_rcvd;
- /* Accounts for over/under-run of buffers */
- rstats->fifo_err = rsp_rstats->fifo_err;
- rstats->dmac_drop = rsp_rstats->dmac_drop;
- rstats->fcs_err = rsp_rstats->fcs_err;
- rstats->jabber_err = rsp_rstats->jabber_err;
- rstats->l2_err = rsp_rstats->l2_err;
- rstats->frame_err = rsp_rstats->frame_err;
-
- /* RX firmware stats */
- rstats->fw_total_rcvd = rsp_rstats->fw_total_rcvd;
- rstats->fw_total_fwd = rsp_rstats->fw_total_fwd;
- rstats->fw_err_pko = rsp_rstats->fw_err_pko;
- rstats->fw_err_link = rsp_rstats->fw_err_link;
- rstats->fw_err_drop = rsp_rstats->fw_err_drop;
- rstats->fw_rx_vxlan = rsp_rstats->fw_rx_vxlan;
- rstats->fw_rx_vxlan_err = rsp_rstats->fw_rx_vxlan_err;
-
- /* Number of packets that are LROed */
- rstats->fw_lro_pkts = rsp_rstats->fw_lro_pkts;
- /* Number of octets that are LROed */
- rstats->fw_lro_octs = rsp_rstats->fw_lro_octs;
- /* Number of LRO packets formed */
- rstats->fw_total_lro = rsp_rstats->fw_total_lro;
- /* Number of times lRO of packet aborted */
- rstats->fw_lro_aborts = rsp_rstats->fw_lro_aborts;
- rstats->fw_lro_aborts_port = rsp_rstats->fw_lro_aborts_port;
- rstats->fw_lro_aborts_seq = rsp_rstats->fw_lro_aborts_seq;
- rstats->fw_lro_aborts_tsval = rsp_rstats->fw_lro_aborts_tsval;
- rstats->fw_lro_aborts_timer = rsp_rstats->fw_lro_aborts_timer;
- /* intrmod: packet forward rate */
- rstats->fwd_rate = rsp_rstats->fwd_rate;
-
- /* TX link-level stats */
- tstats->total_pkts_sent = rsp_tstats->total_pkts_sent;
- tstats->total_bytes_sent = rsp_tstats->total_bytes_sent;
- tstats->mcast_pkts_sent = rsp_tstats->mcast_pkts_sent;
- tstats->bcast_pkts_sent = rsp_tstats->bcast_pkts_sent;
- tstats->ctl_sent = rsp_tstats->ctl_sent;
- /* Packets sent after one collision*/
- tstats->one_collision_sent = rsp_tstats->one_collision_sent;
- /* Packets sent after multiple collision*/
- tstats->multi_collision_sent = rsp_tstats->multi_collision_sent;
- /* Packets not sent due to max collisions */
- tstats->max_collision_fail = rsp_tstats->max_collision_fail;
- /* Packets not sent due to max deferrals */
- tstats->max_deferral_fail = rsp_tstats->max_deferral_fail;
- /* Accounts for over/under-run of buffers */
- tstats->fifo_err = rsp_tstats->fifo_err;
- tstats->runts = rsp_tstats->runts;
- /* Total number of collisions detected */
- tstats->total_collisions = rsp_tstats->total_collisions;
-
- /* firmware stats */
- tstats->fw_total_sent = rsp_tstats->fw_total_sent;
- tstats->fw_total_fwd = rsp_tstats->fw_total_fwd;
- tstats->fw_err_pko = rsp_tstats->fw_err_pko;
- tstats->fw_err_pki = rsp_tstats->fw_err_pki;
- tstats->fw_err_link = rsp_tstats->fw_err_link;
- tstats->fw_err_drop = rsp_tstats->fw_err_drop;
- tstats->fw_tso = rsp_tstats->fw_tso;
- tstats->fw_tso_fwd = rsp_tstats->fw_tso_fwd;
- tstats->fw_err_tso = rsp_tstats->fw_err_tso;
- tstats->fw_tx_vxlan = rsp_tstats->fw_tx_vxlan;
-
- resp->status = 1;
- } else {
- resp->status = -1;
- }
- complete(&ctrl->complete);
-}
-
-/* Configure interrupt moderation parameters */
-static int octnet_get_link_stats(struct net_device *netdev)
-{
- struct lio *lio = GET_LIO(netdev);
- struct octeon_device *oct_dev = lio->oct_dev;
-
- struct octeon_soft_command *sc;
- struct oct_nic_stats_ctrl *ctrl;
- struct oct_nic_stats_resp *resp;
-
- int retval;
-
- /* Alloc soft command */
- sc = (struct octeon_soft_command *)
- octeon_alloc_soft_command(oct_dev,
- 0,
- sizeof(struct oct_nic_stats_resp),
- sizeof(struct octnic_ctrl_pkt));
-
- if (!sc)
- return -ENOMEM;
-
- resp = (struct oct_nic_stats_resp *)sc->virtrptr;
- memset(resp, 0, sizeof(struct oct_nic_stats_resp));
-
- ctrl = (struct oct_nic_stats_ctrl *)sc->ctxptr;
- memset(ctrl, 0, sizeof(struct oct_nic_stats_ctrl));
- ctrl->netdev = netdev;
- init_completion(&ctrl->complete);
-
- sc->iq_no = lio->linfo.txpciq[0].s.q_no;
-
- octeon_prepare_soft_command(oct_dev, sc, OPCODE_NIC,
- OPCODE_NIC_PORT_STATS, 0, 0, 0);
-
- sc->callback = octnet_nic_stats_callback;
- sc->callback_arg = sc;
- sc->wait_time = 500; /*in milli seconds*/
-
- retval = octeon_send_soft_command(oct_dev, sc);
- if (retval == IQ_SEND_FAILED) {
- octeon_free_soft_command(oct_dev, sc);
- return -EINVAL;
- }
-
- wait_for_completion_timeout(&ctrl->complete, msecs_to_jiffies(1000));
-
- if (resp->status != 1) {
- octeon_free_soft_command(oct_dev, sc);
-
- return -EINVAL;
- }
-
- octeon_free_soft_command(oct_dev, sc);
-
- return 0;
-}
-
static int lio_get_intr_coalesce(struct net_device *netdev,
struct ethtool_coalesce *intr_coal)
{
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 603a144d3d9c..ee75048b3937 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -138,33 +138,10 @@ union tx_info {
* by this structure in the NIC module.
*/
-#define OCTNIC_MAX_SG (MAX_SKB_FRAGS)
-
#define OCTNIC_GSO_MAX_HEADER_SIZE 128
#define OCTNIC_GSO_MAX_SIZE \
(CN23XX_DEFAULT_INPUT_JABBER - OCTNIC_GSO_MAX_HEADER_SIZE)
-/** Structure of a node in list of gather components maintained by
- * NIC driver for each network device.
- */
-struct octnic_gather {
- /** List manipulation. Next and prev pointers. */
- struct list_head list;
-
- /** Size of the gather component at sg in bytes. */
- int sg_size;
-
- /** Number of bytes that sg was adjusted to make it 8B-aligned. */
- int adjust;
-
- /** Gather component that can accommodate max sized fragment list
- * received from the IP layer.
- */
- struct octeon_sg_entry *sg;
-
- dma_addr_t sg_dma_ptr;
-};
-
struct handshake {
struct completion init;
struct completion started;
@@ -520,7 +497,7 @@ static void liquidio_deinit_pci(void)
*/
static inline int check_txq_status(struct lio *lio)
{
- int numqs = lio->netdev->num_tx_queues;
+ int numqs = lio->netdev->real_num_tx_queues;
int ret_val = 0;
int q, iq;
@@ -542,148 +519,6 @@ static inline int check_txq_status(struct lio *lio)
}
/**
- * Remove the node at the head of the list. The list would be empty at
- * the end of this call if there are no more nodes in the list.
- */
-static inline struct list_head *list_delete_head(struct list_head *root)
-{
- struct list_head *node;
-
- if ((root->prev == root) && (root->next == root))
- node = NULL;
- else
- node = root->next;
-
- if (node)
- list_del(node);
-
- return node;
-}
-
-/**
- * \brief Delete gather lists
- * @param lio per-network private data
- */
-static void delete_glists(struct lio *lio)
-{
- struct octnic_gather *g;
- int i;
-
- kfree(lio->glist_lock);
- lio->glist_lock = NULL;
-
- if (!lio->glist)
- return;
-
- for (i = 0; i < lio->linfo.num_txpciq; i++) {
- do {
- g = (struct octnic_gather *)
- list_delete_head(&lio->glist[i]);
- if (g)
- kfree(g);
- } while (g);
-
- if (lio->glists_virt_base && lio->glists_virt_base[i] &&
- lio->glists_dma_base && lio->glists_dma_base[i]) {
- lio_dma_free(lio->oct_dev,
- lio->glist_entry_size * lio->tx_qsize,
- lio->glists_virt_base[i],
- lio->glists_dma_base[i]);
- }
- }
-
- kfree(lio->glists_virt_base);
- lio->glists_virt_base = NULL;
-
- kfree(lio->glists_dma_base);
- lio->glists_dma_base = NULL;
-
- kfree(lio->glist);
- lio->glist = NULL;
-}
-
-/**
- * \brief Setup gather lists
- * @param lio per-network private data
- */
-static int setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs)
-{
- int i, j;
- struct octnic_gather *g;
-
- lio->glist_lock = kcalloc(num_iqs, sizeof(*lio->glist_lock),
- GFP_KERNEL);
- if (!lio->glist_lock)
- return -ENOMEM;
-
- lio->glist = kcalloc(num_iqs, sizeof(*lio->glist),
- GFP_KERNEL);
- if (!lio->glist) {
- kfree(lio->glist_lock);
- lio->glist_lock = NULL;
- return -ENOMEM;
- }
-
- lio->glist_entry_size =
- ROUNDUP8((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE);
-
- /* allocate memory to store virtual and dma base address of
- * per glist consistent memory
- */
- lio->glists_virt_base = kcalloc(num_iqs, sizeof(*lio->glists_virt_base),
- GFP_KERNEL);
- lio->glists_dma_base = kcalloc(num_iqs, sizeof(*lio->glists_dma_base),
- GFP_KERNEL);
-
- if (!lio->glists_virt_base || !lio->glists_dma_base) {
- delete_glists(lio);
- return -ENOMEM;
- }
-
- for (i = 0; i < num_iqs; i++) {
- int numa_node = dev_to_node(&oct->pci_dev->dev);
-
- spin_lock_init(&lio->glist_lock[i]);
-
- INIT_LIST_HEAD(&lio->glist[i]);
-
- lio->glists_virt_base[i] =
- lio_dma_alloc(oct,
- lio->glist_entry_size * lio->tx_qsize,
- &lio->glists_dma_base[i]);
-
- if (!lio->glists_virt_base[i]) {
- delete_glists(lio);
- return -ENOMEM;
- }
-
- for (j = 0; j < lio->tx_qsize; j++) {
- g = kzalloc_node(sizeof(*g), GFP_KERNEL,
- numa_node);
- if (!g)
- g = kzalloc(sizeof(*g), GFP_KERNEL);
- if (!g)
- break;
-
- g->sg = lio->glists_virt_base[i] +
- (j * lio->glist_entry_size);
-
- g->sg_dma_ptr = lio->glists_dma_base[i] +
- (j * lio->glist_entry_size);
-
- list_add_tail(&g->list, &lio->glist[i]);
- }
-
- if (j != lio->tx_qsize) {
- delete_glists(lio);
- return -ENOMEM;
- }
- }
-
- return 0;
-}
-
-/**
* \brief Print link information
* @param netdev network device
*/
@@ -1471,7 +1306,7 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
cleanup_rx_oom_poll_fn(netdev);
- delete_glists(lio);
+ lio_delete_glists(lio);
free_netdev(netdev);
@@ -1686,7 +1521,7 @@ static void free_netsgbuf(void *buf)
i++;
}
- iq = skb_iq(lio, skb);
+ iq = skb_iq(lio->oct_dev, skb);
spin_lock(&lio->glist_lock[iq]);
list_add_tail(&g->list, &lio->glist[iq]);
spin_unlock(&lio->glist_lock[iq]);
@@ -1729,7 +1564,7 @@ static void free_netsgbuf_with_resp(void *buf)
i++;
}
- iq = skb_iq(lio, skb);
+ iq = skb_iq(lio->oct_dev, skb);
spin_lock(&lio->glist_lock[iq]);
list_add_tail(&g->list, &lio->glist[iq]);
@@ -1942,39 +1777,6 @@ static int load_firmware(struct octeon_device *oct)
}
/**
- * \brief Callback for getting interface configuration
- * @param status status of request
- * @param buf pointer to resp structure
- */
-static void if_cfg_callback(struct octeon_device *oct,
- u32 status __attribute__((unused)),
- void *buf)
-{
- struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
- struct liquidio_if_cfg_resp *resp;
- struct liquidio_if_cfg_context *ctx;
-
- resp = (struct liquidio_if_cfg_resp *)sc->virtrptr;
- ctx = (struct liquidio_if_cfg_context *)sc->ctxptr;
-
- oct = lio_get_device(ctx->octeon_id);
- if (resp->status)
- dev_err(&oct->pci_dev->dev, "nic if cfg instruction failed. Status: 0x%llx (0x%08x)\n",
- CVM_CAST64(resp->status), status);
- WRITE_ONCE(ctx->cond, 1);
-
- snprintf(oct->fw_info.liquidio_firmware_version, 32, "%s",
- resp->cfg_info.liquidio_firmware_version);
-
- /* This barrier is required to be sure that the response has been
- * written fully before waking up the handler
- */
- wmb();
-
- wake_up_interruptible(&ctx->wc);
-}
-
-/**
* \brief Poll routine for checking transmit queue status
* @param work work_struct data structure
*/
@@ -2049,11 +1851,6 @@ static int liquidio_open(struct net_device *netdev)
ifstate_set(lio, LIO_IFSTATE_RUNNING);
- /* Ready for link status updates */
- lio->intf_open = 1;
-
- netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n");
-
if (OCTEON_CN23XX_PF(oct)) {
if (!oct->msix_on)
if (setup_tx_poll_fn(netdev))
@@ -2063,7 +1860,12 @@ static int liquidio_open(struct net_device *netdev)
return -1;
}
- start_txqs(netdev);
+ netif_tx_start_all_queues(netdev);
+
+ /* Ready for link status updates */
+ lio->intf_open = 1;
+
+ netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n");
/* tell Octeon to start forwarding packets to host */
send_rx_ctrl_cmd(lio, 1);
@@ -2086,11 +1888,15 @@ static int liquidio_stop(struct net_device *netdev)
ifstate_reset(lio, LIO_IFSTATE_RUNNING);
- netif_tx_disable(netdev);
+ /* Stop any link updates */
+ lio->intf_open = 0;
+
+ stop_txqs(netdev);
/* Inform that netif carrier is down */
netif_carrier_off(netdev);
- lio->intf_open = 0;
+ netif_tx_disable(netdev);
+
lio->linfo.link.s.link_up = 0;
lio->link_changes++;
@@ -2252,14 +2058,11 @@ static int liquidio_set_mac(struct net_device *netdev, void *p)
return 0;
}
-/**
- * \brief Net device get_stats
- * @param netdev network device
- */
-static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
+static void
+liquidio_get_stats64(struct net_device *netdev,
+ struct rtnl_link_stats64 *lstats)
{
struct lio *lio = GET_LIO(netdev);
- struct net_device_stats *stats = &netdev->stats;
struct octeon_device *oct;
u64 pkts = 0, drop = 0, bytes = 0;
struct oct_droq_stats *oq_stats;
@@ -2269,7 +2072,7 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
oct = lio->oct_dev;
if (ifstate_check(lio, LIO_IFSTATE_RESETTING))
- return stats;
+ return;
for (i = 0; i < oct->num_iqs; i++) {
iq_no = lio->linfo.txpciq[i].s.q_no;
@@ -2279,9 +2082,9 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
bytes += iq_stats->tx_tot_bytes;
}
- stats->tx_packets = pkts;
- stats->tx_bytes = bytes;
- stats->tx_dropped = drop;
+ lstats->tx_packets = pkts;
+ lstats->tx_bytes = bytes;
+ lstats->tx_dropped = drop;
pkts = 0;
drop = 0;
@@ -2298,11 +2101,34 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
bytes += oq_stats->rx_bytes_received;
}
- stats->rx_bytes = bytes;
- stats->rx_packets = pkts;
- stats->rx_dropped = drop;
-
- return stats;
+ lstats->rx_bytes = bytes;
+ lstats->rx_packets = pkts;
+ lstats->rx_dropped = drop;
+
+ octnet_get_link_stats(netdev);
+ lstats->multicast = oct->link_stats.fromwire.fw_total_mcast;
+ lstats->collisions = oct->link_stats.fromhost.total_collisions;
+
+ /* detailed rx_errors: */
+ lstats->rx_length_errors = oct->link_stats.fromwire.l2_err;
+ /* recved pkt with crc error */
+ lstats->rx_crc_errors = oct->link_stats.fromwire.fcs_err;
+ /* recv'd frame alignment error */
+ lstats->rx_frame_errors = oct->link_stats.fromwire.frame_err;
+ /* recv'r fifo overrun */
+ lstats->rx_fifo_errors = oct->link_stats.fromwire.fifo_err;
+
+ lstats->rx_errors = lstats->rx_length_errors + lstats->rx_crc_errors +
+ lstats->rx_frame_errors + lstats->rx_fifo_errors;
+
+ /* detailed tx_errors */
+ lstats->tx_aborted_errors = oct->link_stats.fromhost.fw_err_pko;
+ lstats->tx_carrier_errors = oct->link_stats.fromhost.fw_err_link;
+ lstats->tx_fifo_errors = oct->link_stats.fromhost.fifo_err;
+
+ lstats->tx_errors = lstats->tx_aborted_errors +
+ lstats->tx_carrier_errors +
+ lstats->tx_fifo_errors;
}
/**
@@ -2510,7 +2336,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
lio = GET_LIO(netdev);
oct = lio->oct_dev;
- q_idx = skb_iq(lio, skb);
+ q_idx = skb_iq(oct, skb);
tag = q_idx;
iq_no = lio->linfo.txpciq[q_idx].s.q_no;
@@ -2585,6 +2411,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
if (dma_mapping_error(&oct->pci_dev->dev, dptr)) {
dev_err(&oct->pci_dev->dev, "%s DMA mapping error 1\n",
__func__);
+ stats->tx_dmamap_fail++;
return NETDEV_TX_BUSY;
}
@@ -2602,7 +2429,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
spin_lock(&lio->glist_lock[q_idx]);
g = (struct octnic_gather *)
- list_delete_head(&lio->glist[q_idx]);
+ lio_list_delete_head(&lio->glist[q_idx]);
spin_unlock(&lio->glist_lock[q_idx]);
if (!g) {
@@ -2624,6 +2451,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
if (dma_mapping_error(&oct->pci_dev->dev, g->sg[0].ptr[0])) {
dev_err(&oct->pci_dev->dev, "%s DMA mapping error 2\n",
__func__);
+ stats->tx_dmamap_fail++;
return NETDEV_TX_BUSY;
}
add_sg_size(&g->sg[0], (skb->len - skb->data_len), 0);
@@ -3324,11 +3152,36 @@ static const struct switchdev_ops lio_pf_switchdev_ops = {
.switchdev_port_attr_get = lio_pf_switchdev_attr_get,
};
+static int liquidio_get_vf_stats(struct net_device *netdev, int vfidx,
+ struct ifla_vf_stats *vf_stats)
+{
+ struct lio *lio = GET_LIO(netdev);
+ struct octeon_device *oct = lio->oct_dev;
+ struct oct_vf_stats stats;
+ int ret;
+
+ if (vfidx < 0 || vfidx >= oct->sriov_info.num_vfs_alloced)
+ return -EINVAL;
+
+ memset(&stats, 0, sizeof(struct oct_vf_stats));
+ ret = cn23xx_get_vf_stats(oct, vfidx, &stats);
+ if (!ret) {
+ vf_stats->rx_packets = stats.rx_packets;
+ vf_stats->tx_packets = stats.tx_packets;
+ vf_stats->rx_bytes = stats.rx_bytes;
+ vf_stats->tx_bytes = stats.tx_bytes;
+ vf_stats->broadcast = stats.broadcast;
+ vf_stats->multicast = stats.multicast;
+ }
+
+ return ret;
+}
+
static const struct net_device_ops lionetdevops = {
.ndo_open = liquidio_open,
.ndo_stop = liquidio_stop,
.ndo_start_xmit = liquidio_xmit,
- .ndo_get_stats = liquidio_get_stats,
+ .ndo_get_stats64 = liquidio_get_stats64,
.ndo_set_mac_address = liquidio_set_mac,
.ndo_set_rx_mode = liquidio_set_mcast_list,
.ndo_tx_timeout = liquidio_tx_timeout,
@@ -3346,6 +3199,7 @@ static const struct net_device_ops lionetdevops = {
.ndo_get_vf_config = liquidio_get_vf_config,
.ndo_set_vf_trust = liquidio_set_vf_trust,
.ndo_set_vf_link_state = liquidio_set_vf_link_state,
+ .ndo_get_vf_stats = liquidio_get_vf_stats,
};
/** \brief Entry point for the liquidio module
@@ -3448,6 +3302,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
struct liquidio_if_cfg_resp *resp;
struct octdev_props *props;
int retval, num_iqueues, num_oqueues;
+ int max_num_queues = 0;
union oct_nic_if_cfg if_cfg;
unsigned int base_queue;
unsigned int gmx_port_id;
@@ -3528,9 +3383,9 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
OPCODE_NIC_IF_CFG, 0,
if_cfg.u64, 0);
- sc->callback = if_cfg_callback;
+ sc->callback = lio_if_cfg_callback;
sc->callback_arg = sc;
- sc->wait_time = 3000;
+ sc->wait_time = LIO_IFCFG_WAIT_TIME;
retval = octeon_send_soft_command(octeon_dev, sc);
if (retval == IQ_SEND_FAILED) {
@@ -3584,11 +3439,20 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
resp->cfg_info.oqmask);
goto setup_nic_dev_fail;
}
+
+ if (OCTEON_CN6XXX(octeon_dev)) {
+ max_num_queues = CFG_GET_IQ_MAX_Q(CHIP_CONF(octeon_dev,
+ cn6xxx));
+ } else if (OCTEON_CN23XX_PF(octeon_dev)) {
+ max_num_queues = CFG_GET_IQ_MAX_Q(CHIP_CONF(octeon_dev,
+ cn23xx_pf));
+ }
+
dev_dbg(&octeon_dev->pci_dev->dev,
- "interface %d, iqmask %016llx, oqmask %016llx, numiqueues %d, numoqueues %d\n",
+ "interface %d, iqmask %016llx, oqmask %016llx, numiqueues %d, numoqueues %d max_num_queues: %d\n",
i, resp->cfg_info.iqmask, resp->cfg_info.oqmask,
- num_iqueues, num_oqueues);
- netdev = alloc_etherdev_mq(LIO_SIZE, num_iqueues);
+ num_iqueues, num_oqueues, max_num_queues);
+ netdev = alloc_etherdev_mq(LIO_SIZE, max_num_queues);
if (!netdev) {
dev_err(&octeon_dev->pci_dev->dev, "Device allocation failed\n");
@@ -3603,6 +3467,20 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
netdev->netdev_ops = &lionetdevops;
SWITCHDEV_SET_OPS(netdev, &lio_pf_switchdev_ops);
+ retval = netif_set_real_num_rx_queues(netdev, num_oqueues);
+ if (retval) {
+ dev_err(&octeon_dev->pci_dev->dev,
+ "setting real number rx failed\n");
+ goto setup_nic_dev_fail;
+ }
+
+ retval = netif_set_real_num_tx_queues(netdev, num_iqueues);
+ if (retval) {
+ dev_err(&octeon_dev->pci_dev->dev,
+ "setting real number tx failed\n");
+ goto setup_nic_dev_fail;
+ }
+
lio = GET_LIO(netdev);
memset(lio, 0, sizeof(struct lio));
@@ -3724,7 +3602,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
lio->tx_qsize = octeon_get_tx_qsize(octeon_dev, lio->txq);
lio->rx_qsize = octeon_get_rx_qsize(octeon_dev, lio->rxq);
- if (setup_glists(octeon_dev, lio, num_iqueues)) {
+ if (lio_setup_glists(octeon_dev, lio, num_iqueues)) {
dev_err(&octeon_dev->pci_dev->dev,
"Gather list allocation failed\n");
goto setup_nic_dev_fail;
@@ -4223,7 +4101,9 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
}
atomic_set(&octeon_dev->status, OCT_DEV_MBOX_SETUP_DONE);
- if (octeon_allocate_ioq_vector(octeon_dev)) {
+ if (octeon_allocate_ioq_vector
+ (octeon_dev,
+ octeon_dev->sriov_info.num_pf_rings)) {
dev_err(&octeon_dev->pci_dev->dev, "OCTEON: ioq vector allocation failed\n");
return 1;
}
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index f92dfa411de6..6295eeec795c 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -69,30 +69,10 @@ union tx_info {
} s;
};
-#define OCTNIC_MAX_SG (MAX_SKB_FRAGS)
-
#define OCTNIC_GSO_MAX_HEADER_SIZE 128
#define OCTNIC_GSO_MAX_SIZE \
(CN23XX_DEFAULT_INPUT_JABBER - OCTNIC_GSO_MAX_HEADER_SIZE)
-struct octnic_gather {
- /* List manipulation. Next and prev pointers. */
- struct list_head list;
-
- /* Size of the gather component at sg in bytes. */
- int sg_size;
-
- /* Number of bytes that sg was adjusted to make it 8B-aligned. */
- int adjust;
-
- /* Gather component that can accommodate max sized fragment list
- * received from the IP layer.
- */
- struct octeon_sg_entry *sg;
-
- dma_addr_t sg_dma_ptr;
-};
-
static int
liquidio_vf_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
static void liquidio_vf_remove(struct pci_dev *pdev);
@@ -285,142 +265,6 @@ static struct pci_driver liquidio_vf_pci_driver = {
};
/**
- * Remove the node at the head of the list. The list would be empty at
- * the end of this call if there are no more nodes in the list.
- */
-static struct list_head *list_delete_head(struct list_head *root)
-{
- struct list_head *node;
-
- if ((root->prev == root) && (root->next == root))
- node = NULL;
- else
- node = root->next;
-
- if (node)
- list_del(node);
-
- return node;
-}
-
-/**
- * \brief Delete gather lists
- * @param lio per-network private data
- */
-static void delete_glists(struct lio *lio)
-{
- struct octnic_gather *g;
- int i;
-
- kfree(lio->glist_lock);
- lio->glist_lock = NULL;
-
- if (!lio->glist)
- return;
-
- for (i = 0; i < lio->linfo.num_txpciq; i++) {
- do {
- g = (struct octnic_gather *)
- list_delete_head(&lio->glist[i]);
- kfree(g);
- } while (g);
-
- if (lio->glists_virt_base && lio->glists_virt_base[i] &&
- lio->glists_dma_base && lio->glists_dma_base[i]) {
- lio_dma_free(lio->oct_dev,
- lio->glist_entry_size * lio->tx_qsize,
- lio->glists_virt_base[i],
- lio->glists_dma_base[i]);
- }
- }
-
- kfree(lio->glists_virt_base);
- lio->glists_virt_base = NULL;
-
- kfree(lio->glists_dma_base);
- lio->glists_dma_base = NULL;
-
- kfree(lio->glist);
- lio->glist = NULL;
-}
-
-/**
- * \brief Setup gather lists
- * @param lio per-network private data
- */
-static int setup_glists(struct lio *lio, int num_iqs)
-{
- struct octnic_gather *g;
- int i, j;
-
- lio->glist_lock =
- kzalloc(sizeof(*lio->glist_lock) * num_iqs, GFP_KERNEL);
- if (!lio->glist_lock)
- return -ENOMEM;
-
- lio->glist =
- kzalloc(sizeof(*lio->glist) * num_iqs, GFP_KERNEL);
- if (!lio->glist) {
- kfree(lio->glist_lock);
- lio->glist_lock = NULL;
- return -ENOMEM;
- }
-
- lio->glist_entry_size =
- ROUNDUP8((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE);
-
- /* allocate memory to store virtual and dma base address of
- * per glist consistent memory
- */
- lio->glists_virt_base = kcalloc(num_iqs, sizeof(*lio->glists_virt_base),
- GFP_KERNEL);
- lio->glists_dma_base = kcalloc(num_iqs, sizeof(*lio->glists_dma_base),
- GFP_KERNEL);
-
- if (!lio->glists_virt_base || !lio->glists_dma_base) {
- delete_glists(lio);
- return -ENOMEM;
- }
-
- for (i = 0; i < num_iqs; i++) {
- spin_lock_init(&lio->glist_lock[i]);
-
- INIT_LIST_HEAD(&lio->glist[i]);
-
- lio->glists_virt_base[i] =
- lio_dma_alloc(lio->oct_dev,
- lio->glist_entry_size * lio->tx_qsize,
- &lio->glists_dma_base[i]);
-
- if (!lio->glists_virt_base[i]) {
- delete_glists(lio);
- return -ENOMEM;
- }
-
- for (j = 0; j < lio->tx_qsize; j++) {
- g = kzalloc(sizeof(*g), GFP_KERNEL);
- if (!g)
- break;
-
- g->sg = lio->glists_virt_base[i] +
- (j * lio->glist_entry_size);
-
- g->sg_dma_ptr = lio->glists_dma_base[i] +
- (j * lio->glist_entry_size);
-
- list_add_tail(&g->list, &lio->glist[i]);
- }
-
- if (j != lio->tx_qsize) {
- delete_glists(lio);
- return -ENOMEM;
- }
- }
-
- return 0;
-}
-
-/**
* \brief Print link information
* @param netdev network device
*/
@@ -856,7 +700,7 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
cleanup_link_status_change_wq(netdev);
- delete_glists(lio);
+ lio_delete_glists(lio);
free_netdev(netdev);
@@ -1005,7 +849,7 @@ static void free_netsgbuf(void *buf)
i++;
}
- iq = skb_iq(lio, skb);
+ iq = skb_iq(lio->oct_dev, skb);
spin_lock(&lio->glist_lock[iq]);
list_add_tail(&g->list, &lio->glist[iq]);
@@ -1049,7 +893,7 @@ static void free_netsgbuf_with_resp(void *buf)
i++;
}
- iq = skb_iq(lio, skb);
+ iq = skb_iq(lio->oct_dev, skb);
spin_lock(&lio->glist_lock[iq]);
list_add_tail(&g->list, &lio->glist[iq]);
@@ -1059,38 +903,6 @@ static void free_netsgbuf_with_resp(void *buf)
}
/**
- * \brief Callback for getting interface configuration
- * @param status status of request
- * @param buf pointer to resp structure
- */
-static void if_cfg_callback(struct octeon_device *oct,
- u32 status __attribute__((unused)), void *buf)
-{
- struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
- struct liquidio_if_cfg_context *ctx;
- struct liquidio_if_cfg_resp *resp;
-
- resp = (struct liquidio_if_cfg_resp *)sc->virtrptr;
- ctx = (struct liquidio_if_cfg_context *)sc->ctxptr;
-
- oct = lio_get_device(ctx->octeon_id);
- if (resp->status)
- dev_err(&oct->pci_dev->dev, "nic if cfg instruction failed. Status: %llx\n",
- CVM_CAST64(resp->status));
- WRITE_ONCE(ctx->cond, 1);
-
- snprintf(oct->fw_info.liquidio_firmware_version, 32, "%s",
- resp->cfg_info.liquidio_firmware_version);
-
- /* This barrier is required to be sure that the response has been
- * written fully before waking up the handler
- */
- wmb();
-
- wake_up_interruptible(&ctx->wc);
-}
-
-/**
* \brief Net device open for LiquidIO
* @param netdev network device
*/
@@ -1336,24 +1148,21 @@ static int liquidio_set_mac(struct net_device *netdev, void *p)
return 0;
}
-/**
- * \brief Net device get_stats
- * @param netdev network device
- */
-static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
+static void
+liquidio_get_stats64(struct net_device *netdev,
+ struct rtnl_link_stats64 *lstats)
{
struct lio *lio = GET_LIO(netdev);
- struct net_device_stats *stats = &netdev->stats;
+ struct octeon_device *oct;
u64 pkts = 0, drop = 0, bytes = 0;
struct oct_droq_stats *oq_stats;
struct oct_iq_stats *iq_stats;
- struct octeon_device *oct;
int i, iq_no, oq_no;
oct = lio->oct_dev;
if (ifstate_check(lio, LIO_IFSTATE_RESETTING))
- return stats;
+ return;
for (i = 0; i < oct->num_iqs; i++) {
iq_no = lio->linfo.txpciq[i].s.q_no;
@@ -1363,9 +1172,9 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
bytes += iq_stats->tx_tot_bytes;
}
- stats->tx_packets = pkts;
- stats->tx_bytes = bytes;
- stats->tx_dropped = drop;
+ lstats->tx_packets = pkts;
+ lstats->tx_bytes = bytes;
+ lstats->tx_dropped = drop;
pkts = 0;
drop = 0;
@@ -1382,11 +1191,29 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
bytes += oq_stats->rx_bytes_received;
}
- stats->rx_bytes = bytes;
- stats->rx_packets = pkts;
- stats->rx_dropped = drop;
+ lstats->rx_bytes = bytes;
+ lstats->rx_packets = pkts;
+ lstats->rx_dropped = drop;
+
+ octnet_get_link_stats(netdev);
+ lstats->multicast = oct->link_stats.fromwire.fw_total_mcast;
+
+ /* detailed rx_errors: */
+ lstats->rx_length_errors = oct->link_stats.fromwire.l2_err;
+ /* recved pkt with crc error */
+ lstats->rx_crc_errors = oct->link_stats.fromwire.fcs_err;
+ /* recv'd frame alignment error */
+ lstats->rx_frame_errors = oct->link_stats.fromwire.frame_err;
+
+ lstats->rx_errors = lstats->rx_length_errors + lstats->rx_crc_errors +
+ lstats->rx_frame_errors;
+
+ /* detailed tx_errors */
+ lstats->tx_aborted_errors = oct->link_stats.fromhost.fw_err_pko;
+ lstats->tx_carrier_errors = oct->link_stats.fromhost.fw_err_link;
- return stats;
+ lstats->tx_errors = lstats->tx_aborted_errors +
+ lstats->tx_carrier_errors;
}
/**
@@ -1580,7 +1407,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
lio = GET_LIO(netdev);
oct = lio->oct_dev;
- q_idx = skb_iq(lio, skb);
+ q_idx = skb_iq(lio->oct_dev, skb);
tag = q_idx;
iq_no = lio->linfo.txpciq[q_idx].s.q_no;
@@ -1661,8 +1488,8 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
int i, frags;
spin_lock(&lio->glist_lock[q_idx]);
- g = (struct octnic_gather *)list_delete_head(
- &lio->glist[q_idx]);
+ g = (struct octnic_gather *)
+ lio_list_delete_head(&lio->glist[q_idx]);
spin_unlock(&lio->glist_lock[q_idx]);
if (!g) {
@@ -2034,7 +1861,7 @@ static const struct net_device_ops lionetdevops = {
.ndo_open = liquidio_open,
.ndo_stop = liquidio_stop,
.ndo_start_xmit = liquidio_xmit,
- .ndo_get_stats = liquidio_get_stats,
+ .ndo_get_stats64 = liquidio_get_stats64,
.ndo_set_mac_address = liquidio_set_mac,
.ndo_set_rx_mode = liquidio_set_mcast_list,
.ndo_tx_timeout = liquidio_tx_timeout,
@@ -2156,7 +1983,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
OPCODE_NIC_IF_CFG, 0, if_cfg.u64,
0);
- sc->callback = if_cfg_callback;
+ sc->callback = lio_if_cfg_callback;
sc->callback_arg = sc;
sc->wait_time = 5000;
@@ -2273,6 +2100,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
netdev->features = (lio->dev_capability & ~NETIF_F_LRO);
netdev->hw_features = lio->dev_capability;
+ netdev->hw_features &= ~NETIF_F_HW_VLAN_CTAG_RX;
/* MTU range: 68 - 16000 */
netdev->min_mtu = LIO_MIN_MTU_SIZE;
@@ -2321,7 +2149,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
lio->tx_qsize = octeon_get_tx_qsize(octeon_dev, lio->txq);
lio->rx_qsize = octeon_get_rx_qsize(octeon_dev, lio->rxq);
- if (setup_glists(lio, num_iqueues)) {
+ if (lio_setup_glists(octeon_dev, lio, num_iqueues)) {
dev_err(&octeon_dev->pci_dev->dev,
"Gather list allocation failed\n");
goto setup_nic_dev_fail;
@@ -2512,7 +2340,7 @@ static int octeon_device_init(struct octeon_device *oct)
}
atomic_set(&oct->status, OCT_DEV_MBOX_SETUP_DONE);
- if (octeon_allocate_ioq_vector(oct)) {
+ if (octeon_allocate_ioq_vector(oct, oct->sriov_info.rings_per_vf)) {
dev_err(&oct->pci_dev->dev, "ioq vector allocation failed\n");
return 1;
}
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c
index 2adafa366d3f..ddd7431579f4 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c
@@ -201,13 +201,14 @@ lio_vf_rep_get_stats64(struct net_device *dev,
{
struct lio_vf_rep_desc *vf_rep = netdev_priv(dev);
- stats64->tx_packets = vf_rep->stats.tx_packets;
- stats64->tx_bytes = vf_rep->stats.tx_bytes;
- stats64->tx_dropped = vf_rep->stats.tx_dropped;
-
- stats64->rx_packets = vf_rep->stats.rx_packets;
- stats64->rx_bytes = vf_rep->stats.rx_bytes;
- stats64->rx_dropped = vf_rep->stats.rx_dropped;
+ /* Swap tx and rx stats as VF rep is a switch port */
+ stats64->tx_packets = vf_rep->stats.rx_packets;
+ stats64->tx_bytes = vf_rep->stats.rx_bytes;
+ stats64->tx_dropped = vf_rep->stats.rx_dropped;
+
+ stats64->rx_packets = vf_rep->stats.tx_packets;
+ stats64->rx_bytes = vf_rep->stats.tx_bytes;
+ stats64->rx_dropped = vf_rep->stats.tx_dropped;
}
static int
diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
index 75eea83c7cc6..026570499dcf 100644
--- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
+++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
@@ -84,6 +84,7 @@ enum octeon_tag_type {
#define OPCODE_NIC_IF_CFG 0x09
#define OPCODE_NIC_VF_DRV_NOTICE 0x0A
#define OPCODE_NIC_INTRMOD_PARAMS 0x0B
+#define OPCODE_NIC_QCOUNT_UPDATE 0x12
#define OPCODE_NIC_SET_TRUSTED_VF 0x13
#define OPCODE_NIC_SYNC_OCTEON_TIME 0x14
#define VF_DRV_LOADED 1
@@ -779,23 +780,32 @@ struct liquidio_if_cfg_info {
/** Stats for each NIC port in RX direction. */
struct nic_rx_stats {
/* link-level stats */
- u64 total_rcvd;
- u64 bytes_rcvd;
- u64 total_bcst;
- u64 total_mcst;
- u64 runts;
- u64 ctl_rcvd;
- u64 fifo_err; /* Accounts for over/under-run of buffers */
- u64 dmac_drop;
- u64 fcs_err;
- u64 jabber_err;
- u64 l2_err;
- u64 frame_err;
+ u64 total_rcvd; /* Received packets */
+ u64 bytes_rcvd; /* Octets of received packets */
+ u64 total_bcst; /* Number of non-dropped L2 broadcast packets */
+ u64 total_mcst; /* Number of non-dropped L2 multicast packets */
+ u64 runts; /* Packets shorter than allowed */
+ u64 ctl_rcvd; /* Received PAUSE packets */
+ u64 fifo_err; /* Packets dropped due to RX FIFO full */
+ u64 dmac_drop; /* Packets dropped by the DMAC filter */
+ u64 fcs_err; /* Sum of fragment, overrun, and FCS errors */
+ u64 jabber_err; /* Packets larger than allowed */
+ u64 l2_err; /* Sum of DMA, parity, PCAM access, no memory,
+ * buffer overflow, malformed L2 header or
+ * length, oversize errors
+ **/
+ u64 frame_err; /* Sum of IPv4 and L4 checksum errors */
+ u64 red_drops; /* Packets dropped by RED due to buffer
+ * exhaustion
+ **/
/* firmware stats */
u64 fw_total_rcvd;
u64 fw_total_fwd;
u64 fw_total_fwd_bytes;
+ u64 fw_total_mcast;
+ u64 fw_total_bcast;
+
u64 fw_err_pko;
u64 fw_err_link;
u64 fw_err_drop;
@@ -806,11 +816,11 @@ struct nic_rx_stats {
u64 fw_lro_pkts; /* Number of packets that are LROed */
u64 fw_lro_octs; /* Number of octets that are LROed */
u64 fw_total_lro; /* Number of LRO packets formed */
- u64 fw_lro_aborts; /* Number of times lRO of packet aborted */
+ u64 fw_lro_aborts; /* Number of times LRO of packet aborted */
u64 fw_lro_aborts_port;
u64 fw_lro_aborts_seq;
u64 fw_lro_aborts_tsval;
- u64 fw_lro_aborts_timer;
+ u64 fw_lro_aborts_timer; /* Timer setting error */
/* intrmod: packet forward rate */
u64 fwd_rate;
};
@@ -818,23 +828,42 @@ struct nic_rx_stats {
/** Stats for each NIC port in RX direction. */
struct nic_tx_stats {
/* link-level stats */
- u64 total_pkts_sent;
- u64 total_bytes_sent;
- u64 mcast_pkts_sent;
- u64 bcast_pkts_sent;
- u64 ctl_sent;
- u64 one_collision_sent; /* Packets sent after one collision*/
- u64 multi_collision_sent; /* Packets sent after multiple collision*/
- u64 max_collision_fail; /* Packets not sent due to max collisions */
- u64 max_deferral_fail; /* Packets not sent due to max deferrals */
- u64 fifo_err; /* Accounts for over/under-run of buffers */
- u64 runts;
- u64 total_collisions; /* Total number of collisions detected */
+ u64 total_pkts_sent; /* Total frames sent on the interface */
+ u64 total_bytes_sent; /* Total octets sent on the interface */
+ u64 mcast_pkts_sent; /* Packets sent to the multicast DMAC */
+ u64 bcast_pkts_sent; /* Packets sent to a broadcast DMAC */
+ u64 ctl_sent; /* Control/PAUSE packets sent */
+ u64 one_collision_sent; /* Packets sent that experienced a
+ * single collision before successful
+ * transmission
+ **/
+ u64 multi_collision_sent; /* Packets sent that experienced
+ * multiple collisions before successful
+ * transmission
+ **/
+ u64 max_collision_fail; /* Packets dropped due to excessive
+ * collisions
+ **/
+ u64 max_deferral_fail; /* Packets not sent due to max
+ * deferrals
+ **/
+ u64 fifo_err; /* Packets sent that experienced a
+ * transmit underflow and were
+ * truncated
+ **/
+ u64 runts; /* Packets sent with an octet count
+ * lessthan 64
+ **/
+ u64 total_collisions; /* Packets dropped due to excessive
+ * collisions
+ **/
/* firmware stats */
u64 fw_total_sent;
u64 fw_total_fwd;
u64 fw_total_fwd_bytes;
+ u64 fw_total_mcast_sent;
+ u64 fw_total_bcast_sent;
u64 fw_err_pko;
u64 fw_err_link;
u64 fw_err_drop;
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
index f38abf626412..f878a552fef3 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
@@ -824,23 +824,18 @@ int octeon_deregister_device(struct octeon_device *oct)
}
int
-octeon_allocate_ioq_vector(struct octeon_device *oct)
+octeon_allocate_ioq_vector(struct octeon_device *oct, u32 num_ioqs)
{
- int i, num_ioqs = 0;
struct octeon_ioq_vector *ioq_vector;
int cpu_num;
int size;
-
- if (OCTEON_CN23XX_PF(oct))
- num_ioqs = oct->sriov_info.num_pf_rings;
- else if (OCTEON_CN23XX_VF(oct))
- num_ioqs = oct->sriov_info.rings_per_vf;
+ int i;
size = sizeof(struct octeon_ioq_vector) * num_ioqs;
oct->ioq_vector = vzalloc(size);
if (!oct->ioq_vector)
- return 1;
+ return -1;
for (i = 0; i < num_ioqs; i++) {
ioq_vector = &oct->ioq_vector[i];
ioq_vector->oct_dev = oct;
@@ -856,6 +851,7 @@ octeon_allocate_ioq_vector(struct octeon_device *oct)
else
ioq_vector->ioq_num = i;
}
+
return 0;
}
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
index 91937cc5c1d7..9430c0a0bb8c 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
@@ -867,7 +867,7 @@ void *oct_get_config_info(struct octeon_device *oct, u16 card_type);
struct octeon_config *octeon_get_conf(struct octeon_device *oct);
void octeon_free_ioq_vector(struct octeon_device *oct);
-int octeon_allocate_ioq_vector(struct octeon_device *oct);
+int octeon_allocate_ioq_vector(struct octeon_device *oct, u32 num_ioqs);
void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq);
/* LiquidIO driver pivate flags */
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
index 81c987682941..5fed7b63223e 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
@@ -62,8 +62,8 @@ struct oct_iq_stats {
u64 tx_tot_bytes;/**< Total count of bytes sento to network. */
u64 tx_gso; /* count of tso */
u64 tx_vxlan; /* tunnel */
- u64 tx_dmamap_fail;
- u64 tx_restart;
+ u64 tx_dmamap_fail; /* Number of times dma mapping failed */
+ u64 tx_restart; /* Number of times this queue restarted */
};
#define OCT_IQ_STATS_SIZE (sizeof(struct oct_iq_stats))
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c
index 28e74ee23ff8..021d99cd1665 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c
@@ -24,6 +24,7 @@
#include "octeon_device.h"
#include "octeon_main.h"
#include "octeon_mailbox.h"
+#include "cn23xx_pf_device.h"
/**
* octeon_mbox_read:
@@ -205,6 +206,26 @@ int octeon_mbox_write(struct octeon_device *oct,
return ret;
}
+static void get_vf_stats(struct octeon_device *oct,
+ struct oct_vf_stats *stats)
+{
+ int i;
+
+ for (i = 0; i < oct->num_iqs; i++) {
+ if (!oct->instr_queue[i])
+ continue;
+ stats->tx_packets += oct->instr_queue[i]->stats.tx_done;
+ stats->tx_bytes += oct->instr_queue[i]->stats.tx_tot_bytes;
+ }
+
+ for (i = 0; i < oct->num_oqs; i++) {
+ if (!oct->droq[i])
+ continue;
+ stats->rx_packets += oct->droq[i]->stats.rx_pkts_received;
+ stats->rx_bytes += oct->droq[i]->stats.rx_bytes_received;
+ }
+}
+
/**
* octeon_mbox_process_cmd:
* @mbox: Pointer mailbox
@@ -250,6 +271,15 @@ static int octeon_mbox_process_cmd(struct octeon_mbox *mbox,
mbox_cmd->msg.s.params);
break;
+ case OCTEON_GET_VF_STATS:
+ dev_dbg(&oct->pci_dev->dev, "Got VF stats request. Sending data back\n");
+ mbox_cmd->msg.s.type = OCTEON_MBOX_RESPONSE;
+ mbox_cmd->msg.s.resp_needed = 1;
+ mbox_cmd->msg.s.len = 1 +
+ sizeof(struct oct_vf_stats) / sizeof(u64);
+ get_vf_stats(oct, (struct oct_vf_stats *)mbox_cmd->data);
+ octeon_mbox_write(oct, mbox_cmd);
+ break;
default:
break;
}
@@ -322,3 +352,25 @@ int octeon_mbox_process_message(struct octeon_mbox *mbox)
return 0;
}
+
+int octeon_mbox_cancel(struct octeon_device *oct, int q_no)
+{
+ struct octeon_mbox *mbox = oct->mbox[q_no];
+ struct octeon_mbox_cmd *mbox_cmd;
+ unsigned long flags = 0;
+
+ spin_lock_irqsave(&mbox->lock, flags);
+ mbox_cmd = &mbox->mbox_resp;
+
+ if (!(mbox->state & OCTEON_MBOX_STATE_RESPONSE_PENDING)) {
+ spin_unlock_irqrestore(&mbox->lock, flags);
+ return 1;
+ }
+
+ mbox->state = OCTEON_MBOX_STATE_IDLE;
+ memset(mbox_cmd, 0, sizeof(*mbox_cmd));
+ writeq(OCTEON_PFVFSIG, mbox->mbox_read_reg);
+ spin_unlock_irqrestore(&mbox->lock, flags);
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.h b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.h
index 1def22afeff1..d92bd7e16477 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.h
@@ -25,6 +25,7 @@
#define OCTEON_VF_ACTIVE 0x1
#define OCTEON_VF_FLR_REQUEST 0x2
#define OCTEON_PF_CHANGED_VF_MACADDR 0x4
+#define OCTEON_GET_VF_STATS 0x8
/*Macro for Read acknowldgement*/
#define OCTEON_PFVFACK 0xffffffffffffffffULL
@@ -107,9 +108,15 @@ struct octeon_mbox {
};
+struct oct_vf_stats_ctx {
+ atomic_t status;
+ struct oct_vf_stats *stats;
+};
+
int octeon_mbox_read(struct octeon_mbox *mbox);
int octeon_mbox_write(struct octeon_device *oct,
struct octeon_mbox_cmd *mbox_cmd);
int octeon_mbox_process_message(struct octeon_mbox *mbox);
+int octeon_mbox_cancel(struct octeon_device *oct, int q_no);
#endif
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index 4069710796a8..8571f11e3c8f 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -47,6 +47,29 @@ struct liquidio_if_cfg_resp {
u64 status;
};
+#define LIO_IFCFG_WAIT_TIME 3000 /* In milli seconds */
+
+/* Structure of a node in list of gather components maintained by
+ * NIC driver for each network device.
+ */
+struct octnic_gather {
+ /* List manipulation. Next and prev pointers. */
+ struct list_head list;
+
+ /* Size of the gather component at sg in bytes. */
+ int sg_size;
+
+ /* Number of bytes that sg was adjusted to make it 8B-aligned. */
+ int adjust;
+
+ /* Gather component that can accommodate max sized fragment list
+ * received from the IP layer.
+ */
+ struct octeon_sg_entry *sg;
+
+ dma_addr_t sg_dma_ptr;
+};
+
struct oct_nic_stats_resp {
u64 rh;
struct oct_link_stats stats;
@@ -190,6 +213,8 @@ irqreturn_t liquidio_msix_intr_handler(int irq __attribute__((unused)),
int octeon_setup_interrupt(struct octeon_device *oct, u32 num_ioqs);
+int octnet_get_link_stats(struct net_device *netdev);
+
int lio_wait_for_clean_oq(struct octeon_device *oct);
/**
* \brief Register ethtool operations
@@ -197,6 +222,14 @@ int lio_wait_for_clean_oq(struct octeon_device *oct);
*/
void liquidio_set_ethtool_ops(struct net_device *netdev);
+void lio_if_cfg_callback(struct octeon_device *oct,
+ u32 status __attribute__((unused)),
+ void *buf);
+
+void lio_delete_glists(struct lio *lio);
+
+int lio_setup_glists(struct octeon_device *oct, struct lio *lio, int num_qs);
+
/**
* \brief Net device change_mtu
* @param netdev network device
@@ -515,7 +548,7 @@ static inline void stop_txqs(struct net_device *netdev)
{
int i;
- for (i = 0; i < netdev->num_tx_queues; i++)
+ for (i = 0; i < netdev->real_num_tx_queues; i++)
netif_stop_subqueue(netdev, i);
}
@@ -528,7 +561,7 @@ static inline void wake_txqs(struct net_device *netdev)
struct lio *lio = GET_LIO(netdev);
int i, qno;
- for (i = 0; i < netdev->num_tx_queues; i++) {
+ for (i = 0; i < netdev->real_num_tx_queues; i++) {
qno = lio->linfo.txpciq[i % lio->oct_dev->num_iqs].s.q_no;
if (__netif_subqueue_stopped(netdev, i)) {
@@ -549,14 +582,33 @@ static inline void start_txqs(struct net_device *netdev)
int i;
if (lio->linfo.link.s.link_up) {
- for (i = 0; i < netdev->num_tx_queues; i++)
+ for (i = 0; i < netdev->real_num_tx_queues; i++)
netif_start_subqueue(netdev, i);
}
}
-static inline int skb_iq(struct lio *lio, struct sk_buff *skb)
+static inline int skb_iq(struct octeon_device *oct, struct sk_buff *skb)
{
- return skb->queue_mapping % lio->linfo.num_txpciq;
+ return skb->queue_mapping % oct->num_iqs;
+}
+
+/**
+ * Remove the node at the head of the list. The list would be empty at
+ * the end of this call if there are no more nodes in the list.
+ */
+static inline struct list_head *lio_list_delete_head(struct list_head *root)
+{
+ struct list_head *node;
+
+ if (root->prev == root && root->next == root)
+ node = NULL;
+ else
+ node = root->next;
+
+ if (node)
+ list_del(node);
+
+ return node;
}
#endif
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 707db3304396..7135db45927e 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -538,9 +538,9 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
action = bpf_prog_run_xdp(prog, &xdp);
rcu_read_unlock();
+ len = xdp.data_end - xdp.data;
/* Check if XDP program has changed headers */
if (orig_data != xdp.data) {
- len = xdp.data_end - xdp.data;
offset = orig_data - xdp.data;
dma_addr -= offset;
}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index db92f1858060..aae980205ece 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -64,8 +64,7 @@ static int set_tcb_field(struct adapter *adap, struct filter_entry *f,
if (!skb)
return -ENOMEM;
- req = (struct cpl_set_tcb_field *)__skb_put(skb, sizeof(*req));
- memset(req, 0, sizeof(*req));
+ req = (struct cpl_set_tcb_field *)__skb_put_zero(skb, sizeof(*req));
INIT_TP_WR_CPL(req, CPL_SET_TCB_FIELD, ftid);
req->reply_ctrl = htons(REPLY_CHAN_V(0) |
QUEUENO_V(adap->sge.fw_evtq.abs_id) |
diff --git a/drivers/net/ethernet/chelsio/cxgb4/srq.c b/drivers/net/ethernet/chelsio/cxgb4/srq.c
index 6228a5708307..82b70a565e24 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/srq.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/srq.c
@@ -84,8 +84,7 @@ int cxgb4_get_srq_entry(struct net_device *dev,
if (!skb)
return -ENOMEM;
req = (struct cpl_srq_table_req *)
- __skb_put(skb, sizeof(*req));
- memset(req, 0, sizeof(*req));
+ __skb_put_zero(skb, sizeof(*req));
INIT_TP_WR(req, 0);
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SRQ_TABLE_REQ,
TID_TID_V(srq_idx) |
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
index 51b18035d691..90b5274a8ba1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
@@ -145,6 +145,9 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN
CH_PCI_ID_TABLE_FENTRY(0x5016), /* T580-OCP-SO */
CH_PCI_ID_TABLE_FENTRY(0x5017), /* T520-OCP-SO */
CH_PCI_ID_TABLE_FENTRY(0x5018), /* T540-BT */
+ CH_PCI_ID_TABLE_FENTRY(0x5019), /* T540-LP-BT */
+ CH_PCI_ID_TABLE_FENTRY(0x501a), /* T540-SO-BT */
+ CH_PCI_ID_TABLE_FENTRY(0x501b), /* T540-SO-CR */
CH_PCI_ID_TABLE_FENTRY(0x5080), /* Custom T540-cr */
CH_PCI_ID_TABLE_FENTRY(0x5081), /* Custom T540-LL-cr */
CH_PCI_ID_TABLE_FENTRY(0x5082), /* Custom T504-cr */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
index 123e2c1b65f5..4eb15ceddca3 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
@@ -36,8 +36,8 @@
#define __T4FW_VERSION_H__
#define T4FW_VERSION_MAJOR 0x01
-#define T4FW_VERSION_MINOR 0x10
-#define T4FW_VERSION_MICRO 0x3F
+#define T4FW_VERSION_MINOR 0x13
+#define T4FW_VERSION_MICRO 0x01
#define T4FW_VERSION_BUILD 0x00
#define T4FW_MIN_VERSION_MAJOR 0x01
@@ -45,8 +45,8 @@
#define T4FW_MIN_VERSION_MICRO 0x00
#define T5FW_VERSION_MAJOR 0x01
-#define T5FW_VERSION_MINOR 0x10
-#define T5FW_VERSION_MICRO 0x3F
+#define T5FW_VERSION_MINOR 0x13
+#define T5FW_VERSION_MICRO 0x01
#define T5FW_VERSION_BUILD 0x00
#define T5FW_MIN_VERSION_MAJOR 0x00
@@ -54,8 +54,8 @@
#define T5FW_MIN_VERSION_MICRO 0x00
#define T6FW_VERSION_MAJOR 0x01
-#define T6FW_VERSION_MINOR 0x10
-#define T6FW_VERSION_MICRO 0x3F
+#define T6FW_VERSION_MINOR 0x13
+#define T6FW_VERSION_MICRO 0x01
#define T6FW_VERSION_BUILD 0x00
#define T6FW_MIN_VERSION_MAJOR 0x00
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index 9a81b52307a9..71f13bd2b5e4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -1419,6 +1419,22 @@ static int cxgb4vf_get_link_ksettings(struct net_device *dev,
base->duplex = DUPLEX_UNKNOWN;
}
+ if (pi->link_cfg.fc & PAUSE_RX) {
+ if (pi->link_cfg.fc & PAUSE_TX) {
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising,
+ Pause);
+ } else {
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising,
+ Asym_Pause);
+ }
+ } else if (pi->link_cfg.fc & PAUSE_TX) {
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising,
+ Asym_Pause);
+ }
+
base->autoneg = pi->link_cfg.autoneg;
if (pi->link_cfg.pcaps & FW_PORT_CAP32_ANEG)
ethtool_link_ksettings_add_link_mode(link_ksettings,
diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h
index 4b5aacc09cab..240ba9d4c399 100644
--- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h
+++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h
@@ -90,8 +90,7 @@ cxgb_mk_tid_release(struct sk_buff *skb, u32 len, u32 tid, u16 chan)
{
struct cpl_tid_release *req;
- req = __skb_put(skb, len);
- memset(req, 0, len);
+ req = __skb_put_zero(skb, len);
INIT_TP_WR(req, tid);
OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
@@ -104,8 +103,7 @@ cxgb_mk_close_con_req(struct sk_buff *skb, u32 len, u32 tid, u16 chan,
{
struct cpl_close_con_req *req;
- req = __skb_put(skb, len);
- memset(req, 0, len);
+ req = __skb_put_zero(skb, len);
INIT_TP_WR(req, tid);
OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid));
@@ -119,8 +117,7 @@ cxgb_mk_abort_req(struct sk_buff *skb, u32 len, u32 tid, u16 chan,
{
struct cpl_abort_req *req;
- req = __skb_put(skb, len);
- memset(req, 0, len);
+ req = __skb_put_zero(skb, len);
INIT_TP_WR(req, tid);
OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, tid));
@@ -134,8 +131,7 @@ cxgb_mk_abort_rpl(struct sk_buff *skb, u32 len, u32 tid, u16 chan)
{
struct cpl_abort_rpl *rpl;
- rpl = __skb_put(skb, len);
- memset(rpl, 0, len);
+ rpl = __skb_put_zero(skb, len);
INIT_TP_WR(rpl, tid);
OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, tid));
@@ -149,8 +145,7 @@ cxgb_mk_rx_data_ack(struct sk_buff *skb, u32 len, u32 tid, u16 chan,
{
struct cpl_rx_data_ack *req;
- req = __skb_put(skb, len);
- memset(req, 0, len);
+ req = __skb_put_zero(skb, len);
INIT_TP_WR(req, tid);
OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK, tid));
diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c
index 6552d68ea6e1..ce6e24c74978 100644
--- a/drivers/net/ethernet/freescale/fman/fman_port.c
+++ b/drivers/net/ethernet/freescale/fman/fman_port.c
@@ -1391,12 +1391,10 @@ int fman_port_config(struct fman_port *port, struct fman_port_params *params)
/* FM_WRONG_RESET_VALUES_ERRATA_FMAN_A005127 Errata
* workaround
*/
- if (port->rev_info.major >= 6) {
- u32 reg;
+ u32 reg;
- reg = 0x00001013;
- iowrite32be(reg, &port->bmi_regs->tx.fmbm_tfp);
- }
+ reg = 0x00001013;
+ iowrite32be(reg, &port->bmi_regs->tx.fmbm_tfp);
}
return 0;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 8c55965a66ac..c4e295094da7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -502,7 +502,7 @@ static int hns3_get_l4_protocol(struct sk_buff *skb, u8 *ol4_proto,
/* find outer header point */
l3.hdr = skb_network_header(skb);
- l4_hdr = skb_inner_transport_header(skb);
+ l4_hdr = skb_transport_header(skb);
if (skb->protocol == htons(ETH_P_IPV6)) {
exthdr = l3.hdr + sizeof(*l3.v6);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index ff13d1876d9e..fab70683bbf7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -31,6 +31,17 @@ static int hclge_ring_space(struct hclge_cmq_ring *ring)
return ring->desc_num - used - 1;
}
+static int is_valid_csq_clean_head(struct hclge_cmq_ring *ring, int h)
+{
+ int u = ring->next_to_use;
+ int c = ring->next_to_clean;
+
+ if (unlikely(h >= ring->desc_num))
+ return 0;
+
+ return u > c ? (h > c && h <= u) : (h > c || h <= u);
+}
+
static int hclge_alloc_cmd_desc(struct hclge_cmq_ring *ring)
{
int size = ring->desc_num * sizeof(struct hclge_desc);
@@ -141,6 +152,7 @@ static void hclge_cmd_init_regs(struct hclge_hw *hw)
static int hclge_cmd_csq_clean(struct hclge_hw *hw)
{
+ struct hclge_dev *hdev = (struct hclge_dev *)hw->back;
struct hclge_cmq_ring *csq = &hw->cmq.csq;
u16 ntc = csq->next_to_clean;
struct hclge_desc *desc;
@@ -149,6 +161,13 @@ static int hclge_cmd_csq_clean(struct hclge_hw *hw)
desc = &csq->desc[ntc];
head = hclge_read_dev(hw, HCLGE_NIC_CSQ_HEAD_REG);
+ rmb(); /* Make sure head is ready before touch any data */
+
+ if (!is_valid_csq_clean_head(csq, head)) {
+ dev_warn(&hdev->pdev->dev, "wrong head (%d, %d-%d)\n", head,
+ csq->next_to_use, csq->next_to_clean);
+ return 0;
+ }
while (head != ntc) {
memset(desc, 0, sizeof(*desc));
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 2066dd734444..dd5d65c9cca6 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -304,8 +304,6 @@ static const struct hclge_comm_stats_str g_mac_stats_string[] = {
HCLGE_MAC_STATS_FIELD_OFF(mac_tx_2048_4095_oct_pkt_num)},
{"mac_tx_4096_8191_oct_pkt_num",
HCLGE_MAC_STATS_FIELD_OFF(mac_tx_4096_8191_oct_pkt_num)},
- {"mac_tx_8192_12287_oct_pkt_num",
- HCLGE_MAC_STATS_FIELD_OFF(mac_tx_8192_12287_oct_pkt_num)},
{"mac_tx_8192_9216_oct_pkt_num",
HCLGE_MAC_STATS_FIELD_OFF(mac_tx_8192_9216_oct_pkt_num)},
{"mac_tx_9217_12287_oct_pkt_num",
@@ -356,8 +354,6 @@ static const struct hclge_comm_stats_str g_mac_stats_string[] = {
HCLGE_MAC_STATS_FIELD_OFF(mac_rx_2048_4095_oct_pkt_num)},
{"mac_rx_4096_8191_oct_pkt_num",
HCLGE_MAC_STATS_FIELD_OFF(mac_rx_4096_8191_oct_pkt_num)},
- {"mac_rx_8192_12287_oct_pkt_num",
- HCLGE_MAC_STATS_FIELD_OFF(mac_rx_8192_12287_oct_pkt_num)},
{"mac_rx_8192_9216_oct_pkt_num",
HCLGE_MAC_STATS_FIELD_OFF(mac_rx_8192_9216_oct_pkt_num)},
{"mac_rx_9217_12287_oct_pkt_num",
@@ -1459,8 +1455,11 @@ static int hclge_alloc_vport(struct hclge_dev *hdev)
/* We need to alloc a vport for main NIC of PF */
num_vport = hdev->num_vmdq_vport + hdev->num_req_vfs + 1;
- if (hdev->num_tqps < num_vport)
- num_vport = hdev->num_tqps;
+ if (hdev->num_tqps < num_vport) {
+ dev_err(&hdev->pdev->dev, "tqps(%d) is less than vports(%d)",
+ hdev->num_tqps, num_vport);
+ return -EINVAL;
+ }
/* Alloc the same number of TQPs for every vport */
tqp_per_vport = hdev->num_tqps / num_vport;
@@ -4540,8 +4539,9 @@ static void hclge_enable_vlan_filter(struct hnae3_handle *handle, bool enable)
hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_VF, enable);
}
-int hclge_set_vf_vlan_common(struct hclge_dev *hdev, int vfid,
- bool is_kill, u16 vlan, u8 qos, __be16 proto)
+static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, int vfid,
+ bool is_kill, u16 vlan, u8 qos,
+ __be16 proto)
{
#define HCLGE_MAX_VF_BYTES 16
struct hclge_vlan_filter_vf_cfg_cmd *req0;
@@ -4599,12 +4599,9 @@ int hclge_set_vf_vlan_common(struct hclge_dev *hdev, int vfid,
return -EIO;
}
-static int hclge_set_port_vlan_filter(struct hnae3_handle *handle,
- __be16 proto, u16 vlan_id,
- bool is_kill)
+static int hclge_set_port_vlan_filter(struct hclge_dev *hdev, __be16 proto,
+ u16 vlan_id, bool is_kill)
{
- struct hclge_vport *vport = hclge_get_vport(handle);
- struct hclge_dev *hdev = vport->back;
struct hclge_vlan_filter_pf_cfg_cmd *req;
struct hclge_desc desc;
u8 vlan_offset_byte_val;
@@ -4624,22 +4621,66 @@ static int hclge_set_port_vlan_filter(struct hnae3_handle *handle,
req->vlan_offset_bitmap[vlan_offset_byte] = vlan_offset_byte_val;
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+ if (ret)
+ dev_err(&hdev->pdev->dev,
+ "port vlan command, send fail, ret =%d.\n", ret);
+ return ret;
+}
+
+static int hclge_set_vlan_filter_hw(struct hclge_dev *hdev, __be16 proto,
+ u16 vport_id, u16 vlan_id, u8 qos,
+ bool is_kill)
+{
+ u16 vport_idx, vport_num = 0;
+ int ret;
+
+ ret = hclge_set_vf_vlan_common(hdev, vport_id, is_kill, vlan_id,
+ 0, proto);
if (ret) {
dev_err(&hdev->pdev->dev,
- "port vlan command, send fail, ret =%d.\n",
- ret);
+ "Set %d vport vlan filter config fail, ret =%d.\n",
+ vport_id, ret);
return ret;
}
- ret = hclge_set_vf_vlan_common(hdev, 0, is_kill, vlan_id, 0, proto);
- if (ret) {
+ /* vlan 0 may be added twice when 8021q module is enabled */
+ if (!is_kill && !vlan_id &&
+ test_bit(vport_id, hdev->vlan_table[vlan_id]))
+ return 0;
+
+ if (!is_kill && test_and_set_bit(vport_id, hdev->vlan_table[vlan_id])) {
dev_err(&hdev->pdev->dev,
- "Set pf vlan filter config fail, ret =%d.\n",
- ret);
- return -EIO;
+ "Add port vlan failed, vport %d is already in vlan %d\n",
+ vport_id, vlan_id);
+ return -EINVAL;
}
- return 0;
+ if (is_kill &&
+ !test_and_clear_bit(vport_id, hdev->vlan_table[vlan_id])) {
+ dev_err(&hdev->pdev->dev,
+ "Delete port vlan failed, vport %d is not in vlan %d\n",
+ vport_id, vlan_id);
+ return -EINVAL;
+ }
+
+ for_each_set_bit(vport_idx, hdev->vlan_table[vlan_id], VLAN_N_VID)
+ vport_num++;
+
+ if ((is_kill && vport_num == 0) || (!is_kill && vport_num == 1))
+ ret = hclge_set_port_vlan_filter(hdev, proto, vlan_id,
+ is_kill);
+
+ return ret;
+}
+
+int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto,
+ u16 vlan_id, bool is_kill)
+{
+ struct hclge_vport *vport = hclge_get_vport(handle);
+ struct hclge_dev *hdev = vport->back;
+
+ return hclge_set_vlan_filter_hw(hdev, proto, vport->vport_id, vlan_id,
+ 0, is_kill);
}
static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid,
@@ -4653,7 +4694,7 @@ static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid,
if (proto != htons(ETH_P_8021Q))
return -EPROTONOSUPPORT;
- return hclge_set_vf_vlan_common(hdev, vfid, false, vlan, qos, proto);
+ return hclge_set_vlan_filter_hw(hdev, proto, vfid, vlan, qos, false);
}
static int hclge_set_vlan_tx_offload_cfg(struct hclge_vport *vport)
@@ -4818,7 +4859,7 @@ static int hclge_init_vlan_config(struct hclge_dev *hdev)
}
handle = &hdev->vport[0].nic;
- return hclge_set_port_vlan_filter(handle, htons(ETH_P_8021Q), 0, false);
+ return hclge_set_vlan_filter(handle, htons(ETH_P_8021Q), 0, false);
}
static int hclge_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable)
@@ -5166,12 +5207,6 @@ static int hclge_set_pauseparam(struct hnae3_handle *handle, u32 auto_neg,
struct phy_device *phydev = hdev->hw.mac.phydev;
u32 fc_autoneg;
- /* Only support flow control negotiation for netdev with
- * phy attached for now.
- */
- if (!phydev)
- return -EOPNOTSUPP;
-
fc_autoneg = hclge_get_autoneg(handle);
if (auto_neg != fc_autoneg) {
dev_info(&hdev->pdev->dev,
@@ -5190,6 +5225,12 @@ static int hclge_set_pauseparam(struct hnae3_handle *handle, u32 auto_neg,
if (!fc_autoneg)
return hclge_cfg_pauseparam(hdev, rx_en, tx_en);
+ /* Only support flow control negotiation for netdev with
+ * phy attached for now.
+ */
+ if (!phydev)
+ return -EOPNOTSUPP;
+
return phy_start_aneg(phydev);
}
@@ -5427,7 +5468,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
hdev = devm_kzalloc(&pdev->dev, sizeof(*hdev), GFP_KERNEL);
if (!hdev) {
ret = -ENOMEM;
- goto err_hclge_dev;
+ goto out;
}
hdev->pdev = pdev;
@@ -5440,38 +5481,38 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
ret = hclge_pci_init(hdev);
if (ret) {
dev_err(&pdev->dev, "PCI init failed\n");
- goto err_pci_init;
+ goto out;
}
/* Firmware command queue initialize */
ret = hclge_cmd_queue_init(hdev);
if (ret) {
dev_err(&pdev->dev, "Cmd queue init failed, ret = %d.\n", ret);
- return ret;
+ goto err_pci_uninit;
}
/* Firmware command initialize */
ret = hclge_cmd_init(hdev);
if (ret)
- goto err_cmd_init;
+ goto err_cmd_uninit;
ret = hclge_get_cap(hdev);
if (ret) {
dev_err(&pdev->dev, "get hw capability error, ret = %d.\n",
ret);
- return ret;
+ goto err_cmd_uninit;
}
ret = hclge_configure(hdev);
if (ret) {
dev_err(&pdev->dev, "Configure dev error, ret = %d.\n", ret);
- return ret;
+ goto err_cmd_uninit;
}
ret = hclge_init_msi(hdev);
if (ret) {
dev_err(&pdev->dev, "Init MSI/MSI-X error, ret = %d.\n", ret);
- return ret;
+ goto err_cmd_uninit;
}
ret = hclge_misc_irq_init(hdev);
@@ -5479,69 +5520,71 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
dev_err(&pdev->dev,
"Misc IRQ(vector0) init error, ret = %d.\n",
ret);
- return ret;
+ goto err_msi_uninit;
}
ret = hclge_alloc_tqps(hdev);
if (ret) {
dev_err(&pdev->dev, "Allocate TQPs error, ret = %d.\n", ret);
- return ret;
+ goto err_msi_irq_uninit;
}
ret = hclge_alloc_vport(hdev);
if (ret) {
dev_err(&pdev->dev, "Allocate vport error, ret = %d.\n", ret);
- return ret;
+ goto err_msi_irq_uninit;
}
ret = hclge_map_tqp(hdev);
if (ret) {
dev_err(&pdev->dev, "Map tqp error, ret = %d.\n", ret);
- return ret;
+ goto err_sriov_disable;
}
- ret = hclge_mac_mdio_config(hdev);
- if (ret) {
- dev_warn(&hdev->pdev->dev,
- "mdio config fail ret=%d\n", ret);
- return ret;
+ if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER) {
+ ret = hclge_mac_mdio_config(hdev);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "mdio config fail ret=%d\n", ret);
+ goto err_sriov_disable;
+ }
}
ret = hclge_mac_init(hdev);
if (ret) {
dev_err(&pdev->dev, "Mac init error, ret = %d\n", ret);
- return ret;
+ goto err_mdiobus_unreg;
}
ret = hclge_config_tso(hdev, HCLGE_TSO_MSS_MIN, HCLGE_TSO_MSS_MAX);
if (ret) {
dev_err(&pdev->dev, "Enable tso fail, ret =%d\n", ret);
- return ret;
+ goto err_mdiobus_unreg;
}
ret = hclge_init_vlan_config(hdev);
if (ret) {
dev_err(&pdev->dev, "VLAN init fail, ret =%d\n", ret);
- return ret;
+ goto err_mdiobus_unreg;
}
ret = hclge_tm_schd_init(hdev);
if (ret) {
dev_err(&pdev->dev, "tm schd init fail, ret =%d\n", ret);
- return ret;
+ goto err_mdiobus_unreg;
}
hclge_rss_init_cfg(hdev);
ret = hclge_rss_init_hw(hdev);
if (ret) {
dev_err(&pdev->dev, "Rss init fail, ret =%d\n", ret);
- return ret;
+ goto err_mdiobus_unreg;
}
ret = init_mgr_tbl(hdev);
if (ret) {
dev_err(&pdev->dev, "manager table init fail, ret =%d\n", ret);
- return ret;
+ goto err_mdiobus_unreg;
}
hclge_dcb_ops_set(hdev);
@@ -5564,11 +5607,24 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
pr_info("%s driver initialization finished.\n", HCLGE_DRIVER_NAME);
return 0;
-err_cmd_init:
+err_mdiobus_unreg:
+ if (hdev->hw.mac.phydev)
+ mdiobus_unregister(hdev->hw.mac.mdio_bus);
+err_sriov_disable:
+ if (IS_ENABLED(CONFIG_PCI_IOV))
+ hclge_disable_sriov(hdev);
+err_msi_irq_uninit:
+ hclge_misc_irq_uninit(hdev);
+err_msi_uninit:
+ pci_free_irq_vectors(pdev);
+err_cmd_uninit:
+ hclge_destroy_cmd_queue(&hdev->hw);
+err_pci_uninit:
+ pci_clear_master(pdev);
pci_release_regions(pdev);
-err_pci_init:
+ pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
-err_hclge_dev:
+out:
return ret;
}
@@ -5586,6 +5642,7 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
set_bit(HCLGE_STATE_DOWN, &hdev->state);
hclge_stats_clear(hdev);
+ memset(hdev->vlan_table, 0, sizeof(hdev->vlan_table));
ret = hclge_cmd_init(hdev);
if (ret) {
@@ -6203,7 +6260,7 @@ static const struct hnae3_ae_ops hclge_ops = {
.get_fw_version = hclge_get_fw_version,
.get_mdix_mode = hclge_get_mdix_mode,
.enable_vlan_filter = hclge_enable_vlan_filter,
- .set_vlan_filter = hclge_set_port_vlan_filter,
+ .set_vlan_filter = hclge_set_vlan_filter,
.set_vf_vlan_filter = hclge_set_vf_vlan_filter,
.enable_hw_strip_rxvtag = hclge_en_hw_strip_rxvtag,
.reset_event = hclge_reset_event,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 0f4157e71282..b7ee91daea0c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -12,6 +12,8 @@
#include <linux/fs.h>
#include <linux/types.h>
#include <linux/phy.h>
+#include <linux/if_vlan.h>
+
#include "hclge_cmd.h"
#include "hnae3.h"
@@ -406,9 +408,9 @@ struct hclge_mac_stats {
u64 mac_tx_1519_2047_oct_pkt_num;
u64 mac_tx_2048_4095_oct_pkt_num;
u64 mac_tx_4096_8191_oct_pkt_num;
- u64 mac_tx_8192_12287_oct_pkt_num; /* valid for GE MAC only */
- u64 mac_tx_8192_9216_oct_pkt_num; /* valid for LGE & CGE MAC only */
- u64 mac_tx_9217_12287_oct_pkt_num; /* valid for LGE & CGE MAC */
+ u64 rsv0;
+ u64 mac_tx_8192_9216_oct_pkt_num;
+ u64 mac_tx_9217_12287_oct_pkt_num;
u64 mac_tx_12288_16383_oct_pkt_num;
u64 mac_tx_1519_max_good_oct_pkt_num;
u64 mac_tx_1519_max_bad_oct_pkt_num;
@@ -433,9 +435,9 @@ struct hclge_mac_stats {
u64 mac_rx_1519_2047_oct_pkt_num;
u64 mac_rx_2048_4095_oct_pkt_num;
u64 mac_rx_4096_8191_oct_pkt_num;
- u64 mac_rx_8192_12287_oct_pkt_num;/* valid for GE MAC only */
- u64 mac_rx_8192_9216_oct_pkt_num; /* valid for LGE & CGE MAC only */
- u64 mac_rx_9217_12287_oct_pkt_num; /* valid for LGE & CGE MAC only */
+ u64 rsv1;
+ u64 mac_rx_8192_9216_oct_pkt_num;
+ u64 mac_rx_9217_12287_oct_pkt_num;
u64 mac_rx_12288_16383_oct_pkt_num;
u64 mac_rx_1519_max_good_oct_pkt_num;
u64 mac_rx_1519_max_bad_oct_pkt_num;
@@ -471,6 +473,7 @@ struct hclge_vlan_type_cfg {
u16 tx_in_vlan_type;
};
+#define HCLGE_VPORT_NUM 256
struct hclge_dev {
struct pci_dev *pdev;
struct hnae3_ae_dev *ae_dev;
@@ -562,6 +565,7 @@ struct hclge_dev {
u64 rx_pkts_for_led;
u64 tx_pkts_for_led;
+ unsigned long vlan_table[VLAN_N_VID][BITS_TO_LONGS(HCLGE_VPORT_NUM)];
};
/* VPort level vlan tag configuration for TX direction */
@@ -646,8 +650,8 @@ static inline int hclge_get_queue_id(struct hnae3_queue *queue)
}
int hclge_cfg_mac_speed_dup(struct hclge_dev *hdev, int speed, u8 duplex);
-int hclge_set_vf_vlan_common(struct hclge_dev *vport, int vfid,
- bool is_kill, u16 vlan, u8 qos, __be16 proto);
+int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto,
+ u16 vlan_id, bool is_kill);
int hclge_buffer_alloc(struct hclge_dev *hdev);
int hclge_rss_init_hw(struct hclge_dev *hdev);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index a6f7ffa9c259..7563335b0c7f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -264,19 +264,18 @@ static int hclge_set_vf_vlan_cfg(struct hclge_vport *vport,
struct hclge_mbx_vf_to_pf_cmd *mbx_req,
bool gen_resp)
{
- struct hclge_dev *hdev = vport->back;
int status = 0;
if (mbx_req->msg[1] == HCLGE_MBX_VLAN_FILTER) {
+ struct hnae3_handle *handle = &vport->nic;
u16 vlan, proto;
bool is_kill;
is_kill = !!mbx_req->msg[2];
memcpy(&vlan, &mbx_req->msg[3], sizeof(vlan));
memcpy(&proto, &mbx_req->msg[5], sizeof(proto));
- status = hclge_set_vf_vlan_common(hdev, vport->vport_id,
- is_kill, vlan, 0,
- cpu_to_be16(proto));
+ status = hclge_set_vlan_filter(handle, cpu_to_be16(proto),
+ vlan, is_kill);
}
if (gen_resp)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
index 682c2d6618e7..9f7932e423b5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
@@ -140,8 +140,11 @@ int hclge_mac_mdio_config(struct hclge_dev *hdev)
struct mii_bus *mdio_bus;
int ret;
- if (hdev->hw.mac.phy_addr >= PHY_MAX_ADDR)
- return 0;
+ if (hdev->hw.mac.phy_addr >= PHY_MAX_ADDR) {
+ dev_err(&hdev->pdev->dev, "phy_addr(%d) is too large.\n",
+ hdev->hw.mac.phy_addr);
+ return -EINVAL;
+ }
mdio_bus = devm_mdiobus_alloc(&hdev->pdev->dev);
if (!mdio_bus)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 885f25cd7be4..c69ecab460f9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -134,11 +134,8 @@ static int hclge_pfc_stats_get(struct hclge_dev *hdev,
}
ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_TM_PFC_PKT_GET_CMD_NUM);
- if (ret) {
- dev_err(&hdev->pdev->dev,
- "Get pfc pause stats fail, ret = %d.\n", ret);
+ if (ret)
return ret;
- }
for (i = 0; i < HCLGE_TM_PFC_PKT_GET_CMD_NUM; i++) {
struct hclge_pfc_stats_cmd *pfc_stats =
diff --git a/drivers/net/ethernet/huawei/hinic/Kconfig b/drivers/net/ethernet/huawei/hinic/Kconfig
index 08db24954f7e..e4e8b24c1a5d 100644
--- a/drivers/net/ethernet/huawei/hinic/Kconfig
+++ b/drivers/net/ethernet/huawei/hinic/Kconfig
@@ -4,7 +4,7 @@
config HINIC
tristate "Huawei Intelligent PCIE Network Interface Card"
- depends on (PCI_MSI && X86)
+ depends on (PCI_MSI && (X86 || ARM64))
---help---
This driver supports HiNIC PCIE Ethernet cards.
To compile this driver as part of the kernel, choose Y here.
diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c
index 41ad56edfb96..27d5f27163d2 100644
--- a/drivers/net/ethernet/intel/e100.c
+++ b/drivers/net/ethernet/intel/e100.c
@@ -1,31 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
- Intel PRO/100 Linux driver
- Copyright(c) 1999 - 2006 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2006 Intel Corporation. */
/*
* e100.c: Intel(R) PRO/100 ethernet driver
diff --git a/drivers/net/ethernet/intel/e1000/Makefile b/drivers/net/ethernet/intel/e1000/Makefile
index c7caadd3c8af..314c52d44b7c 100644
--- a/drivers/net/ethernet/intel/e1000/Makefile
+++ b/drivers/net/ethernet/intel/e1000/Makefile
@@ -1,31 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-################################################################################
-#
-# Intel PRO/1000 Linux driver
# Copyright(c) 1999 - 2006 Intel Corporation.
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# The full GNU General Public License is included in this distribution in
-# the file called "COPYING".
-#
-# Contact Information:
-# Linux NICS <linux.nics@intel.com>
-# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-#
-################################################################################
#
# Makefile for the Intel(R) PRO/1000 ethernet driver
diff --git a/drivers/net/ethernet/intel/e1000/e1000.h b/drivers/net/ethernet/intel/e1000/e1000.h
index 3a0feea2df54..c40729b2c184 100644
--- a/drivers/net/ethernet/intel/e1000/e1000.h
+++ b/drivers/net/ethernet/intel/e1000/e1000.h
@@ -1,32 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
- Intel PRO/1000 Linux driver
- Copyright(c) 1999 - 2006 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
+/* Copyright(c) 1999 - 2006 Intel Corporation. */
/* Linux PRO/1000 Ethernet Driver main header file */
diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
index 3e80ca170dd7..5d365a986bb0 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
@@ -1,26 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- * Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2006 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 1999 - 2006 Intel Corporation. */
/* ethtool support for e1000 */
diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.c b/drivers/net/ethernet/intel/e1000/e1000_hw.c
index 6e7e923d57bf..48428d6a00be 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_hw.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_hw.c
@@ -1,31 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-*
- Intel PRO/1000 Linux driver
- Copyright(c) 1999 - 2006 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
- */
+/* Copyright(c) 1999 - 2006 Intel Corporation. */
/* e1000_hw.c
* Shared functions for accessing and configuring the MAC
diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.h b/drivers/net/ethernet/intel/e1000/e1000_hw.h
index f09c569ec19b..b57a04954ccf 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_hw.h
+++ b/drivers/net/ethernet/intel/e1000/e1000_hw.h
@@ -1,31 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
- Intel PRO/1000 Linux driver
- Copyright(c) 1999 - 2006 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2006 Intel Corporation. */
/* e1000_hw.h
* Structures, enums, and macros for the MAC
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index d5eb19b86a0a..2110d5f2da19 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -1,31 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
- Intel PRO/1000 Linux driver
- Copyright(c) 1999 - 2006 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2006 Intel Corporation. */
#include "e1000.h"
#include <net/ip6_checksum.h>
diff --git a/drivers/net/ethernet/intel/e1000/e1000_osdep.h b/drivers/net/ethernet/intel/e1000/e1000_osdep.h
index ae0559b8b011..e966bb290797 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_osdep.h
+++ b/drivers/net/ethernet/intel/e1000/e1000_osdep.h
@@ -1,32 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
- Intel PRO/1000 Linux driver
- Copyright(c) 1999 - 2006 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
+/* Copyright(c) 1999 - 2006 Intel Corporation. */
/* glue for the OS independent part of e1000
* includes register access macros
diff --git a/drivers/net/ethernet/intel/e1000/e1000_param.c b/drivers/net/ethernet/intel/e1000/e1000_param.c
index 345f23927bcc..d3f29ffe1e47 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_param.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_param.c
@@ -1,31 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
- Intel PRO/1000 Linux driver
- Copyright(c) 1999 - 2006 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2006 Intel Corporation. */
#include "e1000.h"
diff --git a/drivers/net/ethernet/intel/e1000e/80003es2lan.c b/drivers/net/ethernet/intel/e1000e/80003es2lan.c
index 953e99df420c..257bd59bc9c6 100644
--- a/drivers/net/ethernet/intel/e1000e/80003es2lan.c
+++ b/drivers/net/ethernet/intel/e1000e/80003es2lan.c
@@ -1,24 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
/* 80003ES2LAN Gigabit Ethernet Controller (Copper)
* 80003ES2LAN Gigabit Ethernet Controller (Serdes)
diff --git a/drivers/net/ethernet/intel/e1000e/80003es2lan.h b/drivers/net/ethernet/intel/e1000e/80003es2lan.h
index ee6d1256fda4..aa9d639c6cbb 100644
--- a/drivers/net/ethernet/intel/e1000e/80003es2lan.h
+++ b/drivers/net/ethernet/intel/e1000e/80003es2lan.h
@@ -1,24 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#ifndef _E1000E_80003ES2LAN_H_
#define _E1000E_80003ES2LAN_H_
diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c
index 924f2c8dfa6c..b9309302c29e 100644
--- a/drivers/net/ethernet/intel/e1000e/82571.c
+++ b/drivers/net/ethernet/intel/e1000e/82571.c
@@ -1,24 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
/* 82571EB Gigabit Ethernet Controller
* 82571EB Gigabit Ethernet Controller (Copper)
diff --git a/drivers/net/ethernet/intel/e1000e/82571.h b/drivers/net/ethernet/intel/e1000e/82571.h
index 9a24c645f726..834c238d02db 100644
--- a/drivers/net/ethernet/intel/e1000e/82571.h
+++ b/drivers/net/ethernet/intel/e1000e/82571.h
@@ -1,24 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#ifndef _E1000E_82571_H_
#define _E1000E_82571_H_
diff --git a/drivers/net/ethernet/intel/e1000e/Makefile b/drivers/net/ethernet/intel/e1000e/Makefile
index 24e391a4ac68..44e58b6e7660 100644
--- a/drivers/net/ethernet/intel/e1000e/Makefile
+++ b/drivers/net/ethernet/intel/e1000e/Makefile
@@ -1,30 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-################################################################################
-#
-# Intel PRO/1000 Linux driver
-# Copyright(c) 1999 - 2014 Intel Corporation.
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, see <http://www.gnu.org/licenses/>.
-#
-# The full GNU General Public License is included in this distribution in
-# the file called "COPYING".
-#
-# Contact Information:
-# Linux NICS <linux.nics@intel.com>
-# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-#
-################################################################################
+# Copyright(c) 1999 - 2018 Intel Corporation.
#
# Makefile for the Intel(R) PRO/1000 ethernet driver
diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h
index 22883015a695..fd550dee4982 100644
--- a/drivers/net/ethernet/intel/e1000e/defines.h
+++ b/drivers/net/ethernet/intel/e1000e/defines.h
@@ -1,24 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#ifndef _E1000_DEFINES_H_
#define _E1000_DEFINES_H_
diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h
index da88555ba1fd..c760dc72c520 100644
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -1,24 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
/* Linux PRO/1000 Ethernet Driver main header file */
diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c
index 64dc0c11147f..e084cb734eb1 100644
--- a/drivers/net/ethernet/intel/e1000e/ethtool.c
+++ b/drivers/net/ethernet/intel/e1000e/ethtool.c
@@ -1,24 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
/* ethtool support for e1000 */
diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h
index 21802396bed6..eff75bd8a8f0 100644
--- a/drivers/net/ethernet/intel/e1000e/hw.h
+++ b/drivers/net/ethernet/intel/e1000e/hw.h
@@ -1,24 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#ifndef _E1000_HW_H_
#define _E1000_HW_H_
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 1551d6ce5341..cdae0efde8e6 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -1,24 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
/* 82562G 10/100 Network Connection
* 82562G-2 10/100 Network Connection
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h
index 3c4f82c21084..eb09c755fa17 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.h
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h
@@ -1,24 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#ifndef _E1000E_ICH8LAN_H_
#define _E1000E_ICH8LAN_H_
diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c
index b293464a9f27..4abd55d646c5 100644
--- a/drivers/net/ethernet/intel/e1000e/mac.c
+++ b/drivers/net/ethernet/intel/e1000e/mac.c
@@ -1,24 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#include "e1000.h"
diff --git a/drivers/net/ethernet/intel/e1000e/mac.h b/drivers/net/ethernet/intel/e1000e/mac.h
index cb0abf6c76a5..6ab261119801 100644
--- a/drivers/net/ethernet/intel/e1000e/mac.h
+++ b/drivers/net/ethernet/intel/e1000e/mac.h
@@ -1,24 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#ifndef _E1000E_MAC_H_
#define _E1000E_MAC_H_
diff --git a/drivers/net/ethernet/intel/e1000e/manage.c b/drivers/net/ethernet/intel/e1000e/manage.c
index e027660aeb92..c4c9b20bc51f 100644
--- a/drivers/net/ethernet/intel/e1000e/manage.c
+++ b/drivers/net/ethernet/intel/e1000e/manage.c
@@ -1,24 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#include "e1000.h"
diff --git a/drivers/net/ethernet/intel/e1000e/manage.h b/drivers/net/ethernet/intel/e1000e/manage.h
index 3268f2e58593..d868aad806d4 100644
--- a/drivers/net/ethernet/intel/e1000e/manage.h
+++ b/drivers/net/ethernet/intel/e1000e/manage.h
@@ -1,24 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#ifndef _E1000E_MANAGE_H_
#define _E1000E_MANAGE_H_
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index ec4a9759a6f2..d3fef7fefea8 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1,24 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/drivers/net/ethernet/intel/e1000e/nvm.c b/drivers/net/ethernet/intel/e1000e/nvm.c
index 68949bb41b7b..937f9af22d26 100644
--- a/drivers/net/ethernet/intel/e1000e/nvm.c
+++ b/drivers/net/ethernet/intel/e1000e/nvm.c
@@ -1,24 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#include "e1000.h"
diff --git a/drivers/net/ethernet/intel/e1000e/nvm.h b/drivers/net/ethernet/intel/e1000e/nvm.h
index 8e082028be7d..6a30dfea4117 100644
--- a/drivers/net/ethernet/intel/e1000e/nvm.h
+++ b/drivers/net/ethernet/intel/e1000e/nvm.h
@@ -1,24 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#ifndef _E1000E_NVM_H_
#define _E1000E_NVM_H_
diff --git a/drivers/net/ethernet/intel/e1000e/param.c b/drivers/net/ethernet/intel/e1000e/param.c
index 2def33eba9e6..098369fd3e65 100644
--- a/drivers/net/ethernet/intel/e1000e/param.c
+++ b/drivers/net/ethernet/intel/e1000e/param.c
@@ -1,24 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#include <linux/netdevice.h>
#include <linux/module.h>
diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c
index b8226ed0e338..42233019255a 100644
--- a/drivers/net/ethernet/intel/e1000e/phy.c
+++ b/drivers/net/ethernet/intel/e1000e/phy.c
@@ -1,24 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#include "e1000.h"
diff --git a/drivers/net/ethernet/intel/e1000e/phy.h b/drivers/net/ethernet/intel/e1000e/phy.h
index d4180b5e9196..c48777d09523 100644
--- a/drivers/net/ethernet/intel/e1000e/phy.h
+++ b/drivers/net/ethernet/intel/e1000e/phy.h
@@ -1,24 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#ifndef _E1000E_PHY_H_
#define _E1000E_PHY_H_
diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c
index f941e5085f44..37c76945ad9b 100644
--- a/drivers/net/ethernet/intel/e1000e/ptp.c
+++ b/drivers/net/ethernet/intel/e1000e/ptp.c
@@ -1,24 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
/* PTP 1588 Hardware Clock (PHC)
* Derived from PTP Hardware Clock driver for Intel 82576 and 82580 (igb)
diff --git a/drivers/net/ethernet/intel/e1000e/regs.h b/drivers/net/ethernet/intel/e1000e/regs.h
index 16afc3c2a986..47f5ca793970 100644
--- a/drivers/net/ethernet/intel/e1000e/regs.h
+++ b/drivers/net/ethernet/intel/e1000e/regs.h
@@ -1,24 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Intel PRO/1000 Linux driver
- * Copyright(c) 1999 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#ifndef _E1000E_REGS_H_
#define _E1000E_REGS_H_
diff --git a/drivers/net/ethernet/intel/fm10k/Makefile b/drivers/net/ethernet/intel/fm10k/Makefile
index 93277cb99cb7..26a9746ccb14 100644
--- a/drivers/net/ethernet/intel/fm10k/Makefile
+++ b/drivers/net/ethernet/intel/fm10k/Makefile
@@ -1,26 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-################################################################################
-#
-# Intel(R) Ethernet Switch Host Interface Driver
-# Copyright(c) 2013 - 2016 Intel Corporation.
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-# more details.
-#
-# The full GNU General Public License is included in this distribution in
-# the file called "COPYING".
-#
-# Contact Information:
-# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-#
-################################################################################
+# Copyright(c) 2013 - 2018 Intel Corporation.
#
# Makefile for the Intel(R) Ethernet Switch Host Interface Driver
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h
index a9cdf763c59d..a903a0ba45e1 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k.h
@@ -1,23 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _FM10K_H_
#define _FM10K_H_
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.c b/drivers/net/ethernet/intel/fm10k/fm10k_common.c
index e303d88720ef..f51a63fca513 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_common.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.c
@@ -1,23 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2018 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include "fm10k_common.h"
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.h b/drivers/net/ethernet/intel/fm10k/fm10k_common.h
index 2bdb24d2ca9d..4c48fb73b3e7 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_common.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.h
@@ -1,23 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _FM10K_COMMON_H_
#define _FM10K_COMMON_H_
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c b/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c
index c4f733452ef2..20768ac7f17e 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c
@@ -1,23 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include "fm10k.h"
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c
index 43e8d839831f..dca104121c05 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c
@@ -1,23 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include "fm10k.h"
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
index 28b6b4e56487..eeac2b75a195 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
@@ -1,23 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include <linux/vmalloc.h>
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
index 30395f5e5e87..e707d717012f 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
@@ -1,23 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include "fm10k.h"
#include "fm10k_vf.h"
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index df8607097e4a..3f536541f45f 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -1,23 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include <linux/types.h>
#include <linux/module.h>
@@ -445,15 +427,14 @@ static void fm10k_type_trans(struct fm10k_ring *rx_ring,
l2_accel = NULL;
}
- skb->protocol = eth_type_trans(skb, dev);
-
/* Record Rx queue, or update macvlan statistics */
if (!l2_accel)
skb_record_rx_queue(skb, rx_ring->queue_index);
else
macvlan_count_rx(netdev_priv(dev), skb->len + ETH_HLEN, true,
- (skb->pkt_type == PACKET_BROADCAST) ||
- (skb->pkt_type == PACKET_MULTICAST));
+ false);
+
+ skb->protocol = eth_type_trans(skb, dev);
}
/**
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
index c01bf30a0c9e..21021fe4f1c3 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
@@ -1,23 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include "fm10k_common.h"
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h
index 007e1dfa9b7a..56d1abff04e2 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h
@@ -1,23 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _FM10K_MBX_H_
#define _FM10K_MBX_H_
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index 45793491d4ba..c879af72bbf5 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -1,27 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2018 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include "fm10k.h"
#include <linux/vmalloc.h>
#include <net/udp_tunnel.h>
+#include <linux/if_macvlan.h>
/**
* fm10k_setup_tx_resources - allocate Tx resources (Descriptors)
@@ -1254,7 +1237,7 @@ void fm10k_restore_rx_state(struct fm10k_intfc *interface)
glort = l2_accel->dglort + 1 + i;
hw->mac.ops.update_xcast_mode(hw, glort,
- FM10K_XCAST_MODE_MULTI);
+ FM10K_XCAST_MODE_NONE);
fm10k_queue_mac_request(interface, glort,
sdev->dev_addr,
hw->mac.default_vid, true);
@@ -1449,6 +1432,13 @@ static void *fm10k_dfwd_add_station(struct net_device *dev,
int size = 0, i;
u16 glort;
+ /* The hardware supported by fm10k only filters on the destination MAC
+ * address. In order to avoid issues we only support offloading modes
+ * where the hardware can actually provide the functionality.
+ */
+ if (!macvlan_supports_dest_filter(sdev))
+ return ERR_PTR(-EMEDIUMTYPE);
+
/* allocate l2 accel structure if it is not available */
if (!l2_accel) {
/* verify there is enough free GLORTs to support l2_accel */
@@ -1515,7 +1505,7 @@ static void *fm10k_dfwd_add_station(struct net_device *dev,
if (fm10k_host_mbx_ready(interface)) {
hw->mac.ops.update_xcast_mode(hw, glort,
- FM10K_XCAST_MODE_MULTI);
+ FM10K_XCAST_MODE_NONE);
fm10k_queue_mac_request(interface, glort, sdev->dev_addr,
hw->mac.default_vid, true);
}
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
index c4a2b688b38b..15071e4adb98 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
@@ -1,23 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2018 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include <linux/module.h>
#include <linux/interrupt.h>
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
index 7ba54c534f8c..8f0a99b6a537 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
@@ -1,23 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2018 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include "fm10k_pf.h"
#include "fm10k_vf.h"
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.h b/drivers/net/ethernet/intel/fm10k/fm10k_pf.h
index ae81f9a16602..8e814df709d2 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.h
@@ -1,23 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _FM10K_PF_H_
#define _FM10K_PF_H_
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
index 725ecb7abccd..2a7a40bf2b1c 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
@@ -1,23 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2018 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include "fm10k_tlv.h"
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h
index 5d2ee759507e..160bc5b78f99 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h
@@ -1,23 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _FM10K_TLV_H_
#define _FM10K_TLV_H_
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_type.h b/drivers/net/ethernet/intel/fm10k/fm10k_type.h
index dd23af11e2c1..3e608e493f9d 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_type.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_type.h
@@ -1,23 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _FM10K_TYPE_H_
#define _FM10K_TYPE_H_
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c
index f06913630b39..a8519c1f0406 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c
@@ -1,23 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include "fm10k_vf.h"
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_vf.h b/drivers/net/ethernet/intel/fm10k/fm10k_vf.h
index 66a66b73a2f1..787d0d570a28 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_vf.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_vf.h
@@ -1,23 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _FM10K_VF_H_
#define _FM10K_VF_H_
diff --git a/drivers/net/ethernet/intel/i40e/Makefile b/drivers/net/ethernet/intel/i40e/Makefile
index 75437768a07c..14397e7e9925 100644
--- a/drivers/net/ethernet/intel/i40e/Makefile
+++ b/drivers/net/ethernet/intel/i40e/Makefile
@@ -1,29 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-################################################################################
-#
-# Intel Ethernet Controller XL710 Family Linux Driver
-# Copyright(c) 2013 - 2015 Intel Corporation.
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program. If not, see <http://www.gnu.org/licenses/>.
-#
-# The full GNU General Public License is included in this distribution in
-# the file called "COPYING".
-#
-# Contact Information:
-# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-#
-################################################################################
+# Copyright(c) 2013 - 2018 Intel Corporation.
#
# Makefile for the Intel(R) Ethernet Connection XL710 (i40e.ko) driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index a44139c1de80..7a80652e2500 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _I40E_H_
#define _I40E_H_
@@ -334,10 +310,12 @@ struct i40e_tc_configuration {
struct i40e_tc_info tc_info[I40E_MAX_TRAFFIC_CLASS];
};
+#define I40E_UDP_PORT_INDEX_UNUSED 255
struct i40e_udp_port_config {
/* AdminQ command interface expects port number in Host byte order */
u16 port;
u8 type;
+ u8 filter_index;
};
/* macros related to FLX_PIT */
@@ -608,7 +586,7 @@ struct i40e_pf {
unsigned long ptp_tx_start;
struct hwtstamp_config tstamp_config;
struct mutex tmreg_lock; /* Used to protect the SYSTIME registers. */
- u64 ptp_base_adj;
+ u32 ptp_adj_mult;
u32 tx_hwtstamp_timeouts;
u32 tx_hwtstamp_skipped;
u32 rx_hwtstamp_cleared;
@@ -1009,6 +987,9 @@ void i40e_service_event_schedule(struct i40e_pf *pf);
void i40e_notify_client_of_vf_msg(struct i40e_vsi *vsi, u32 vf_id,
u8 *msg, u16 len);
+int i40e_control_wait_tx_q(int seid, struct i40e_pf *pf, int pf_q, bool is_xdp,
+ bool enable);
+int i40e_control_wait_rx_q(struct i40e_pf *pf, int pf_q, bool enable);
int i40e_vsi_start_rings(struct i40e_vsi *vsi);
void i40e_vsi_stop_rings(struct i40e_vsi *vsi);
void i40e_vsi_stop_rings_no_wait(struct i40e_vsi *vsi);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
index 843fc7781ef8..ddbea79d18e5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
@@ -1,29 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include "i40e_status.h"
#include "i40e_type.h"
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.h b/drivers/net/ethernet/intel/i40e/i40e_adminq.h
index 0a8749ee9fd3..edec3df78971 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _I40E_ADMINQ_H_
#define _I40E_ADMINQ_H_
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
index 0244923edeb8..7d888e05f96f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _I40E_ADMINQ_CMD_H_
#define _I40E_ADMINQ_CMD_H_
diff --git a/drivers/net/ethernet/intel/i40e/i40e_alloc.h b/drivers/net/ethernet/intel/i40e/i40e_alloc.h
index abed0c52e782..cb8689222c8b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_alloc.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_alloc.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _I40E_ALLOC_H_
#define _I40E_ALLOC_H_
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
index d8ce4999864f..5f3b8b9ff511 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_client.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.c
@@ -1,29 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include <linux/list.h>
#include <linux/errno.h>
@@ -64,7 +40,7 @@ static struct i40e_ops i40e_lan_ops = {
/**
* i40e_client_get_params - Get the params that can change at runtime
* @vsi: the VSI with the message
- * @param: clinet param struct
+ * @params: client param struct
*
**/
static
@@ -590,7 +566,7 @@ static int i40e_client_virtchnl_send(struct i40e_info *ldev,
* i40e_client_setup_qvlist
* @ldev: pointer to L2 context.
* @client: Client pointer.
- * @qv_info: queue and vector list
+ * @qvlist_info: queue and vector list
*
* Return 0 on success or < 0 on error
**/
@@ -665,7 +641,7 @@ err:
* i40e_client_request_reset
* @ldev: pointer to L2 context.
* @client: Client pointer.
- * @level: reset level
+ * @reset_level: reset level
**/
static void i40e_client_request_reset(struct i40e_info *ldev,
struct i40e_client *client,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.h b/drivers/net/ethernet/intel/i40e/i40e_client.h
index 9d464d40bc17..72994baf4941 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_client.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _I40E_CLIENT_H_
#define _I40E_CLIENT_H_
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index c0a3dae8a2db..eb2d1530d331 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -1,29 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include "i40e_type.h"
#include "i40e_adminq.h"
@@ -1695,6 +1671,8 @@ enum i40e_status_code i40e_aq_set_phy_config(struct i40e_hw *hw,
/**
* i40e_set_fc
* @hw: pointer to the hw struct
+ * @aq_failures: buffer to return AdminQ failure information
+ * @atomic_restart: whether to enable atomic link restart
*
* Set the requested flow control mode using set_phy_config.
**/
@@ -2831,8 +2809,8 @@ i40e_status i40e_aq_remove_macvlan(struct i40e_hw *hw, u16 seid,
* @mr_list: list of mirrored VSI SEIDs or VLAN IDs
* @cmd_details: pointer to command details structure or NULL
* @rule_id: Rule ID returned from FW
- * @rule_used: Number of rules used in internal switch
- * @rule_free: Number of rules free in internal switch
+ * @rules_used: Number of rules used in internal switch
+ * @rules_free: Number of rules free in internal switch
*
* Add/Delete a mirror rule to a specific switch. Mirror rules are supported for
* VEBs/VEPA elements only
@@ -2892,8 +2870,8 @@ static i40e_status i40e_mirrorrule_op(struct i40e_hw *hw,
* @mr_list: list of mirrored VSI SEIDs or VLAN IDs
* @cmd_details: pointer to command details structure or NULL
* @rule_id: Rule ID returned from FW
- * @rule_used: Number of rules used in internal switch
- * @rule_free: Number of rules free in internal switch
+ * @rules_used: Number of rules used in internal switch
+ * @rules_free: Number of rules free in internal switch
*
* Add mirror rule. Mirror rules are supported for VEBs or VEPA elements only
**/
@@ -2923,8 +2901,8 @@ i40e_status i40e_aq_add_mirrorrule(struct i40e_hw *hw, u16 sw_seid,
* add_mirrorrule.
* @mr_list: list of mirrored VLAN IDs to be removed
* @cmd_details: pointer to command details structure or NULL
- * @rule_used: Number of rules used in internal switch
- * @rule_free: Number of rules free in internal switch
+ * @rules_used: Number of rules used in internal switch
+ * @rules_free: Number of rules free in internal switch
*
* Delete a mirror rule. Mirror rules are supported for VEBs/VEPA elements only
**/
@@ -3672,6 +3650,8 @@ i40e_status i40e_aq_stop_lldp(struct i40e_hw *hw, bool shutdown_agent,
/**
* i40e_aq_start_lldp
* @hw: pointer to the hw struct
+ * @buff: buffer for result
+ * @buff_size: buffer size
* @cmd_details: pointer to command details structure or NULL
*
* Start the embedded LLDP Agent on all ports.
@@ -3752,7 +3732,6 @@ i40e_status i40e_aq_get_cee_dcb_config(struct i40e_hw *hw,
* i40e_aq_add_udp_tunnel
* @hw: pointer to the hw struct
* @udp_port: the UDP port to add in Host byte order
- * @header_len: length of the tunneling header length in DWords
* @protocol_index: protocol index type
* @filter_index: pointer to filter index
* @cmd_details: pointer to command details structure or NULL
@@ -3971,6 +3950,7 @@ i40e_status i40e_aq_config_vsi_tc_bw(struct i40e_hw *hw,
* @hw: pointer to the hw struct
* @seid: seid of the switching component connected to Physical Port
* @ets_data: Buffer holding ETS parameters
+ * @opcode: Tx scheduler AQ command opcode
* @cmd_details: pointer to command details structure or NULL
**/
i40e_status i40e_aq_config_switch_comp_ets(struct i40e_hw *hw,
@@ -4314,10 +4294,10 @@ i40e_status i40e_aq_add_rem_control_packet_filter(struct i40e_hw *hw,
* @hw: pointer to the hw struct
* @seid: VSI seid to add ethertype filter from
**/
-#define I40E_FLOW_CONTROL_ETHTYPE 0x8808
void i40e_add_filter_to_drop_tx_flow_control_frames(struct i40e_hw *hw,
u16 seid)
{
+#define I40E_FLOW_CONTROL_ETHTYPE 0x8808
u16 flag = I40E_AQC_ADD_CONTROL_PACKET_FLAGS_IGNORE_MAC |
I40E_AQC_ADD_CONTROL_PACKET_FLAGS_DROP |
I40E_AQC_ADD_CONTROL_PACKET_FLAGS_TX;
@@ -4448,6 +4428,7 @@ void i40e_set_pci_config_data(struct i40e_hw *hw, u16 link_status)
* @ret_buff_size: actual buffer size returned
* @ret_next_table: next block to read
* @ret_next_index: next index to read
+ * @cmd_details: pointer to command details structure or NULL
*
* Dump internal FW/HW data for debug purposes.
*
@@ -4574,7 +4555,7 @@ i40e_status i40e_aq_configure_partition_bw(struct i40e_hw *hw,
* i40e_read_phy_register_clause22
* @hw: pointer to the HW structure
* @reg: register address in the page
- * @phy_adr: PHY address on MDIO interface
+ * @phy_addr: PHY address on MDIO interface
* @value: PHY register value
*
* Reads specified PHY register value
@@ -4619,7 +4600,7 @@ i40e_status i40e_read_phy_register_clause22(struct i40e_hw *hw,
* i40e_write_phy_register_clause22
* @hw: pointer to the HW structure
* @reg: register address in the page
- * @phy_adr: PHY address on MDIO interface
+ * @phy_addr: PHY address on MDIO interface
* @value: PHY register value
*
* Writes specified PHY register value
@@ -4660,7 +4641,7 @@ i40e_status i40e_write_phy_register_clause22(struct i40e_hw *hw,
* @hw: pointer to the HW structure
* @page: registers page number
* @reg: register address in the page
- * @phy_adr: PHY address on MDIO interface
+ * @phy_addr: PHY address on MDIO interface
* @value: PHY register value
*
* Reads specified PHY register value
@@ -4734,7 +4715,7 @@ phy_read_end:
* @hw: pointer to the HW structure
* @page: registers page number
* @reg: register address in the page
- * @phy_adr: PHY address on MDIO interface
+ * @phy_addr: PHY address on MDIO interface
* @value: PHY register value
*
* Writes value to specified PHY register
@@ -4801,7 +4782,7 @@ phy_write_end:
* @hw: pointer to the HW structure
* @page: registers page number
* @reg: register address in the page
- * @phy_adr: PHY address on MDIO interface
+ * @phy_addr: PHY address on MDIO interface
* @value: PHY register value
*
* Writes value to specified PHY register
@@ -4837,7 +4818,7 @@ i40e_status i40e_write_phy_register(struct i40e_hw *hw,
* @hw: pointer to the HW structure
* @page: registers page number
* @reg: register address in the page
- * @phy_adr: PHY address on MDIO interface
+ * @phy_addr: PHY address on MDIO interface
* @value: PHY register value
*
* Reads specified PHY register value
@@ -4872,7 +4853,6 @@ i40e_status i40e_read_phy_register(struct i40e_hw *hw,
* i40e_get_phy_address
* @hw: pointer to the HW structure
* @dev_num: PHY port num that address we want
- * @phy_addr: Returned PHY address
*
* Gets PHY address for current port
**/
@@ -5082,7 +5062,9 @@ i40e_status i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr,
* i40e_led_set_phy
* @hw: pointer to the HW structure
* @on: true or false
+ * @led_addr: address of led register to use
* @mode: original val plus bit for set or ignore
+ *
* Set led's on or off when controlled by the PHY
*
**/
@@ -5371,6 +5353,7 @@ i40e_status_code i40e_aq_write_ddp(struct i40e_hw *hw, void *buff,
* @hw: pointer to the hw struct
* @buff: command buffer (size in bytes = buff_size)
* @buff_size: buffer size in bytes
+ * @flags: AdminQ command flags
* @cmd_details: pointer to command details structure or NULL
**/
enum
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
index 9fec728dc4b9..56bff8faf371 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
@@ -1,29 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include "i40e_adminq.h"
#include "i40e_prototype.h"
@@ -945,6 +921,70 @@ i40e_status i40e_init_dcb(struct i40e_hw *hw)
}
/**
+ * _i40e_read_lldp_cfg - generic read of LLDP Configuration data from NVM
+ * @hw: pointer to the HW structure
+ * @lldp_cfg: pointer to hold lldp configuration variables
+ * @module: address of the module pointer
+ * @word_offset: offset of LLDP configuration
+ *
+ * Reads the LLDP configuration data from NVM using passed addresses
+ **/
+static i40e_status _i40e_read_lldp_cfg(struct i40e_hw *hw,
+ struct i40e_lldp_variables *lldp_cfg,
+ u8 module, u32 word_offset)
+{
+ u32 address, offset = (2 * word_offset);
+ i40e_status ret;
+ __le16 raw_mem;
+ u16 mem;
+
+ ret = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
+ if (ret)
+ return ret;
+
+ ret = i40e_aq_read_nvm(hw, 0x0, module * 2, sizeof(raw_mem), &raw_mem,
+ true, NULL);
+ i40e_release_nvm(hw);
+ if (ret)
+ return ret;
+
+ mem = le16_to_cpu(raw_mem);
+ /* Check if this pointer needs to be read in word size or 4K sector
+ * units.
+ */
+ if (mem & I40E_PTR_TYPE)
+ address = (0x7FFF & mem) * 4096;
+ else
+ address = (0x7FFF & mem) * 2;
+
+ ret = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
+ if (ret)
+ goto err_lldp_cfg;
+
+ ret = i40e_aq_read_nvm(hw, module, offset, sizeof(raw_mem), &raw_mem,
+ true, NULL);
+ i40e_release_nvm(hw);
+ if (ret)
+ return ret;
+
+ mem = le16_to_cpu(raw_mem);
+ offset = mem + word_offset;
+ offset *= 2;
+
+ ret = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
+ if (ret)
+ goto err_lldp_cfg;
+
+ ret = i40e_aq_read_nvm(hw, 0, address + offset,
+ sizeof(struct i40e_lldp_variables), lldp_cfg,
+ true, NULL);
+ i40e_release_nvm(hw);
+
+err_lldp_cfg:
+ return ret;
+}
+
+/**
* i40e_read_lldp_cfg - read LLDP Configuration data from NVM
* @hw: pointer to the HW structure
* @lldp_cfg: pointer to hold lldp configuration variables
@@ -955,21 +995,34 @@ i40e_status i40e_read_lldp_cfg(struct i40e_hw *hw,
struct i40e_lldp_variables *lldp_cfg)
{
i40e_status ret = 0;
- u32 offset = (2 * I40E_NVM_LLDP_CFG_PTR);
+ u32 mem;
if (!lldp_cfg)
return I40E_ERR_PARAM;
ret = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
if (ret)
- goto err_lldp_cfg;
+ return ret;
- ret = i40e_aq_read_nvm(hw, I40E_SR_EMP_MODULE_PTR, offset,
- sizeof(struct i40e_lldp_variables),
- (u8 *)lldp_cfg,
- true, NULL);
+ ret = i40e_aq_read_nvm(hw, I40E_SR_NVM_CONTROL_WORD, 0, sizeof(mem),
+ &mem, true, NULL);
i40e_release_nvm(hw);
+ if (ret)
+ return ret;
+
+ /* Read a bit that holds information whether we are running flat or
+ * structured NVM image. Flat image has LLDP configuration in shadow
+ * ram, so there is a need to pass different addresses for both cases.
+ */
+ if (mem & I40E_SR_NVM_MAP_STRUCTURE_TYPE) {
+ /* Flat NVM case */
+ ret = _i40e_read_lldp_cfg(hw, lldp_cfg, I40E_SR_EMP_MODULE_PTR,
+ I40E_SR_LLDP_CFG_PTR);
+ } else {
+ /* Good old structured NVM image */
+ ret = _i40e_read_lldp_cfg(hw, lldp_cfg, I40E_EMP_MODULE_PTR,
+ I40E_NVM_LLDP_CFG_PTR);
+ }
-err_lldp_cfg:
return ret;
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.h b/drivers/net/ethernet/intel/i40e/i40e_dcb.h
index 4f806386cb22..2b748a60a843 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _I40E_DCB_H_
#define _I40E_DCB_H_
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
index 502818e3da78..9deae9a35423 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
@@ -1,29 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifdef CONFIG_I40E_DCB
#include "i40e.h"
@@ -47,7 +23,7 @@ static void i40e_get_pfc_delay(struct i40e_hw *hw, u16 *delay)
/**
* i40e_dcbnl_ieee_getets - retrieve local IEEE ETS configuration
- * @netdev: the corresponding netdev
+ * @dev: the corresponding netdev
* @ets: structure to hold the ETS information
*
* Returns local IEEE ETS configuration
@@ -86,8 +62,8 @@ static int i40e_dcbnl_ieee_getets(struct net_device *dev,
/**
* i40e_dcbnl_ieee_getpfc - retrieve local IEEE PFC configuration
- * @netdev: the corresponding netdev
- * @ets: structure to hold the PFC information
+ * @dev: the corresponding netdev
+ * @pfc: structure to hold the PFC information
*
* Returns local IEEE PFC configuration
**/
@@ -119,7 +95,7 @@ static int i40e_dcbnl_ieee_getpfc(struct net_device *dev,
/**
* i40e_dcbnl_getdcbx - retrieve current DCBx capability
- * @netdev: the corresponding netdev
+ * @dev: the corresponding netdev
*
* Returns DCBx capability features
**/
@@ -132,7 +108,8 @@ static u8 i40e_dcbnl_getdcbx(struct net_device *dev)
/**
* i40e_dcbnl_get_perm_hw_addr - MAC address used by DCBx
- * @netdev: the corresponding netdev
+ * @dev: the corresponding netdev
+ * @perm_addr: buffer to store the MAC address
*
* Returns the SAN MAC address used for LLDP exchange
**/
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index d494dcaf18d0..56b911a5dd8b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -1,29 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifdef CONFIG_DEBUG_FS
@@ -36,8 +12,8 @@ static struct dentry *i40e_dbg_root;
/**
* i40e_dbg_find_vsi - searches for the vsi with the given seid
- * @pf - the PF structure to search for the vsi
- * @seid - seid of the vsi it is searching for
+ * @pf: the PF structure to search for the vsi
+ * @seid: seid of the vsi it is searching for
**/
static struct i40e_vsi *i40e_dbg_find_vsi(struct i40e_pf *pf, int seid)
{
@@ -55,8 +31,8 @@ static struct i40e_vsi *i40e_dbg_find_vsi(struct i40e_pf *pf, int seid)
/**
* i40e_dbg_find_veb - searches for the veb with the given seid
- * @pf - the PF structure to search for the veb
- * @seid - seid of the veb it is searching for
+ * @pf: the PF structure to search for the veb
+ * @seid: seid of the veb it is searching for
**/
static struct i40e_veb *i40e_dbg_find_veb(struct i40e_pf *pf, int seid)
{
diff --git a/drivers/net/ethernet/intel/i40e/i40e_devids.h b/drivers/net/ethernet/intel/i40e/i40e_devids.h
index ad6a66ccb576..334b05ff685a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_devids.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_devids.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _I40E_DEVIDS_H_
#define _I40E_DEVIDS_H_
diff --git a/drivers/net/ethernet/intel/i40e/i40e_diag.c b/drivers/net/ethernet/intel/i40e/i40e_diag.c
index df3e60470f8b..ef4d3762bf37 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_diag.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_diag.c
@@ -1,29 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include "i40e_diag.h"
#include "i40e_prototype.h"
diff --git a/drivers/net/ethernet/intel/i40e/i40e_diag.h b/drivers/net/ethernet/intel/i40e/i40e_diag.h
index be8341763475..c3340f320a18 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_diag.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_diag.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _I40E_DIAG_H_
#define _I40E_DIAG_H_
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index b974482ff630..fc6a5eef141c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1,29 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
/* ethtool support for i40e */
@@ -977,7 +953,9 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
ethtool_link_ksettings_test_link_mode(ks, advertising,
10000baseCR_Full) ||
ethtool_link_ksettings_test_link_mode(ks, advertising,
- 10000baseSR_Full))
+ 10000baseSR_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 10000baseLR_Full))
config.link_speed |= I40E_LINK_SPEED_10GB;
if (ethtool_link_ksettings_test_link_mode(ks, advertising,
20000baseKR2_Full))
@@ -1079,6 +1057,9 @@ static int i40e_nway_reset(struct net_device *netdev)
/**
* i40e_get_pauseparam - Get Flow Control status
+ * @netdev: netdevice structure
+ * @pause: buffer to return pause parameters
+ *
* Return tx/rx-pause status
**/
static void i40e_get_pauseparam(struct net_device *netdev,
@@ -2550,7 +2531,7 @@ static int i40e_get_rss_hash_opts(struct i40e_pf *pf, struct ethtool_rxnfc *cmd)
/**
* i40e_check_mask - Check whether a mask field is set
* @mask: the full mask value
- * @field; mask of the field to check
+ * @field: mask of the field to check
*
* If the given mask is fully set, return positive value. If the mask for the
* field is fully unset, return zero. Otherwise return a negative error code.
@@ -2621,6 +2602,7 @@ static int i40e_parse_rx_flow_user_data(struct ethtool_rx_flow_spec *fsp,
/**
* i40e_fill_rx_flow_user_data - Fill in user-defined data field
* @fsp: pointer to rx_flow specification
+ * @data: pointer to return userdef data
*
* Reads the userdef data structure and properly fills in the user defined
* fields of the rx_flow_spec.
@@ -2799,6 +2781,7 @@ no_input_set:
* i40e_get_rxnfc - command to get RX flow classification rules
* @netdev: network interface device structure
* @cmd: ethtool rxnfc command
+ * @rule_locs: pointer to store rule data
*
* Returns Success if the command is supported.
**/
@@ -2840,7 +2823,7 @@ static int i40e_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
/**
* i40e_get_rss_hash_bits - Read RSS Hash bits from register
* @nfc: pointer to user request
- * @i_setc bits currently set
+ * @i_setc: bits currently set
*
* Returns value of bits to be set per user request
**/
@@ -2885,7 +2868,7 @@ static u64 i40e_get_rss_hash_bits(struct ethtool_rxnfc *nfc, u64 i_setc)
/**
* i40e_set_rss_hash_opt - Enable/Disable flow types for RSS hash
* @pf: pointer to the physical function struct
- * @cmd: ethtool rxnfc command
+ * @nfc: ethtool rxnfc command
*
* Returns Success if the flow input set is supported.
**/
@@ -3284,7 +3267,7 @@ static int i40e_add_flex_offset(struct list_head *flex_pit_list,
* __i40e_reprogram_flex_pit - Re-program specific FLX_PIT table
* @pf: Pointer to the PF structure
* @flex_pit_list: list of flexible src offsets in use
- * #flex_pit_start: index to first entry for this section of the table
+ * @flex_pit_start: index to first entry for this section of the table
*
* In order to handle flexible data, the hardware uses a table of values
* called the FLX_PIT table. This table is used to indicate which sections of
@@ -3398,7 +3381,7 @@ static void i40e_reprogram_flex_pit(struct i40e_pf *pf)
/**
* i40e_flow_str - Converts a flow_type into a human readable string
- * @flow_type: the flow type from a flow specification
+ * @fsp: the flow specification
*
* Currently only flow types we support are included here, and the string
* value attempts to match what ethtool would use to configure this flow type.
@@ -4103,7 +4086,7 @@ static unsigned int i40e_max_channels(struct i40e_vsi *vsi)
/**
* i40e_get_channels - Get the current channels enabled and max supported etc.
- * @netdev: network interface device structure
+ * @dev: network interface device structure
* @ch: ethtool channels structure
*
* We don't support separate tx and rx queues as channels. The other count
@@ -4112,7 +4095,7 @@ static unsigned int i40e_max_channels(struct i40e_vsi *vsi)
* q_vectors since we support a lot more queue pairs than q_vectors.
**/
static void i40e_get_channels(struct net_device *dev,
- struct ethtool_channels *ch)
+ struct ethtool_channels *ch)
{
struct i40e_netdev_priv *np = netdev_priv(dev);
struct i40e_vsi *vsi = np->vsi;
@@ -4131,14 +4114,14 @@ static void i40e_get_channels(struct net_device *dev,
/**
* i40e_set_channels - Set the new channels count.
- * @netdev: network interface device structure
+ * @dev: network interface device structure
* @ch: ethtool channels structure
*
* The new channels count may not be the same as requested by the user
* since it gets rounded down to a power of 2 value.
**/
static int i40e_set_channels(struct net_device *dev,
- struct ethtool_channels *ch)
+ struct ethtool_channels *ch)
{
const u8 drop = I40E_FILTER_PROGRAM_DESC_DEST_DROP_PACKET;
struct i40e_netdev_priv *np = netdev_priv(dev);
@@ -4273,6 +4256,7 @@ out:
* @netdev: network interface device structure
* @indir: indirection table
* @key: hash key
+ * @hfunc: hash function to use
*
* Returns -EINVAL if the table specifies an invalid queue id, otherwise
* returns 0 after programming the table.
diff --git a/drivers/net/ethernet/intel/i40e/i40e_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_hmc.c
index 6d4b590f851b..19ce93d7fd0a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_hmc.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_hmc.c
@@ -1,29 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include "i40e_osdep.h"
#include "i40e_register.h"
@@ -198,7 +174,6 @@ exit:
* @hw: pointer to our HW structure
* @hmc_info: pointer to the HMC configuration information structure
* @idx: the page index
- * @is_pf: distinguishes a VF from a PF
*
* This function:
* 1. Marks the entry in pd tabe (for paged address mode) or in sd table
diff --git a/drivers/net/ethernet/intel/i40e/i40e_hmc.h b/drivers/net/ethernet/intel/i40e/i40e_hmc.h
index 7b5fd33d70ae..1c78de838857 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_hmc.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_hmc.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _I40E_HMC_H_
#define _I40E_HMC_H_
diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
index cd40dc487b38..994011c38fb4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
@@ -1,29 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include "i40e_osdep.h"
#include "i40e_register.h"
diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h
index 79e1396735d9..c46a2c449e60 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _I40E_LAN_HMC_H_
#define _I40E_LAN_HMC_H_
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 16229998fb1e..c8659fbd7111 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -1,29 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include <linux/etherdevice.h>
#include <linux/of_net.h>
@@ -278,8 +254,8 @@ static int i40e_put_lump(struct i40e_lump_tracking *pile, u16 index, u16 id)
/**
* i40e_find_vsi_from_id - searches for the vsi with the given id
- * @pf - the pf structure to search for the vsi
- * @id - id of the vsi it is searching for
+ * @pf: the pf structure to search for the vsi
+ * @id: id of the vsi it is searching for
**/
struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id)
{
@@ -435,6 +411,7 @@ static void i40e_get_netdev_stats_struct_tx(struct i40e_ring *ring,
/**
* i40e_get_netdev_stats_struct - Get statistics for netdev interface
* @netdev: network interface device structure
+ * @stats: data structure to store statistics
*
* Returns the address of the device statistics structure.
* The statistics are actually updated from the service task.
@@ -2027,7 +2004,7 @@ struct i40e_new_mac_filter *i40e_next_filter(struct i40e_new_mac_filter *next)
* from firmware
* @count: Number of filters added
* @add_list: return data from fw
- * @head: pointer to first filter in current batch
+ * @add_head: pointer to first filter in current batch
*
* MAC filter entries from list were slated to be added to device. Returns
* number of successful filters. Note that 0 does NOT mean success!
@@ -2134,6 +2111,7 @@ void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name,
/**
* i40e_aqc_broadcast_filter - Set promiscuous broadcast flags
* @vsi: pointer to the VSI
+ * @vsi_name: the VSI name
* @f: filter data
*
* This function sets or clears the promiscuous broadcast flags for VLAN
@@ -2840,6 +2818,7 @@ void i40e_vsi_kill_vlan(struct i40e_vsi *vsi, u16 vid)
/**
* i40e_vlan_rx_add_vid - Add a vlan id filter to HW offload
* @netdev: network interface to be adjusted
+ * @proto: unused protocol value
* @vid: vlan id to be added
*
* net_device_ops implementation for adding vlan ids
@@ -2864,6 +2843,7 @@ static int i40e_vlan_rx_add_vid(struct net_device *netdev,
/**
* i40e_vlan_rx_kill_vid - Remove a vlan id filter from HW offload
* @netdev: network interface to be adjusted
+ * @proto: unused protocol value
* @vid: vlan id to be removed
*
* net_device_ops implementation for removing vlan ids
@@ -3485,7 +3465,7 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi)
/**
* i40e_enable_misc_int_causes - enable the non-queue interrupts
- * @hw: ptr to the hardware info
+ * @pf: pointer to private device data structure
**/
static void i40e_enable_misc_int_causes(struct i40e_pf *pf)
{
@@ -4255,8 +4235,8 @@ static void i40e_control_tx_q(struct i40e_pf *pf, int pf_q, bool enable)
* @is_xdp: true if the queue is used for XDP
* @enable: start or stop the queue
**/
-static int i40e_control_wait_tx_q(int seid, struct i40e_pf *pf, int pf_q,
- bool is_xdp, bool enable)
+int i40e_control_wait_tx_q(int seid, struct i40e_pf *pf, int pf_q,
+ bool is_xdp, bool enable)
{
int ret;
@@ -4301,7 +4281,6 @@ static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable)
if (ret)
break;
}
-
return ret;
}
@@ -4340,9 +4319,9 @@ static int i40e_pf_rxq_wait(struct i40e_pf *pf, int pf_q, bool enable)
* @pf_q: the PF queue to configure
* @enable: start or stop the queue
*
- * This function enables or disables a single queue. Note that any delay
- * required after the operation is expected to be handled by the caller of
- * this function.
+ * This function enables or disables a single queue. Note that
+ * any delay required after the operation is expected to be
+ * handled by the caller of this function.
**/
static void i40e_control_rx_q(struct i40e_pf *pf, int pf_q, bool enable)
{
@@ -4372,6 +4351,30 @@ static void i40e_control_rx_q(struct i40e_pf *pf, int pf_q, bool enable)
}
/**
+ * i40e_control_wait_rx_q
+ * @pf: the PF structure
+ * @pf_q: queue being configured
+ * @enable: start or stop the rings
+ *
+ * This function enables or disables a single queue along with waiting
+ * for the change to finish. The caller of this function should handle
+ * the delays needed in the case of disabling queues.
+ **/
+int i40e_control_wait_rx_q(struct i40e_pf *pf, int pf_q, bool enable)
+{
+ int ret = 0;
+
+ i40e_control_rx_q(pf, pf_q, enable);
+
+ /* wait for the change to finish */
+ ret = i40e_pf_rxq_wait(pf, pf_q, enable);
+ if (ret)
+ return ret;
+
+ return ret;
+}
+
+/**
* i40e_vsi_control_rx - Start or stop a VSI's rings
* @vsi: the VSI being configured
* @enable: start or stop the rings
@@ -4383,10 +4386,7 @@ static int i40e_vsi_control_rx(struct i40e_vsi *vsi, bool enable)
pf_q = vsi->base_queue;
for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) {
- i40e_control_rx_q(pf, pf_q, enable);
-
- /* wait for the change to finish */
- ret = i40e_pf_rxq_wait(pf, pf_q, enable);
+ ret = i40e_control_wait_rx_q(pf, pf_q, enable);
if (ret) {
dev_info(&pf->pdev->dev,
"VSI seid %d Rx ring %d %sable timeout\n",
@@ -5096,7 +5096,7 @@ static int i40e_vsi_get_bw_info(struct i40e_vsi *vsi)
* i40e_vsi_configure_bw_alloc - Configure VSI BW allocation per TC
* @vsi: the VSI being configured
* @enabled_tc: TC bitmap
- * @bw_credits: BW shared credits per TC
+ * @bw_share: BW shared credits per TC
*
* Returns 0 on success, negative value on failure
**/
@@ -6353,6 +6353,7 @@ out:
/**
* i40e_print_link_message - print link up or down
* @vsi: the VSI for which link needs a message
+ * @isup: true of link is up, false otherwise
*/
void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
{
@@ -7212,8 +7213,7 @@ static int i40e_parse_cls_flower(struct i40e_vsi *vsi,
if (mask->dst == cpu_to_be32(0xffffffff)) {
field_flags |= I40E_CLOUD_FIELD_IIP;
} else {
- mask->dst = be32_to_cpu(mask->dst);
- dev_err(&pf->pdev->dev, "Bad ip dst mask %pI4\n",
+ dev_err(&pf->pdev->dev, "Bad ip dst mask %pI4b\n",
&mask->dst);
return I40E_ERR_CONFIG;
}
@@ -7223,8 +7223,7 @@ static int i40e_parse_cls_flower(struct i40e_vsi *vsi,
if (mask->src == cpu_to_be32(0xffffffff)) {
field_flags |= I40E_CLOUD_FIELD_IIP;
} else {
- mask->src = be32_to_cpu(mask->src);
- dev_err(&pf->pdev->dev, "Bad ip src mask %pI4\n",
+ dev_err(&pf->pdev->dev, "Bad ip src mask %pI4b\n",
&mask->src);
return I40E_ERR_CONFIG;
}
@@ -9691,9 +9690,9 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
i40e_flush(hw);
}
-static const char *i40e_tunnel_name(struct i40e_udp_port_config *port)
+static const char *i40e_tunnel_name(u8 type)
{
- switch (port->type) {
+ switch (type) {
case UDP_TUNNEL_TYPE_VXLAN:
return "vxlan";
case UDP_TUNNEL_TYPE_GENEVE:
@@ -9727,37 +9726,68 @@ static void i40e_sync_udp_filters(struct i40e_pf *pf)
static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf)
{
struct i40e_hw *hw = &pf->hw;
- i40e_status ret;
+ u8 filter_index, type;
u16 port;
int i;
if (!test_and_clear_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state))
return;
+ /* acquire RTNL to maintain state of flags and port requests */
+ rtnl_lock();
+
for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) {
if (pf->pending_udp_bitmap & BIT_ULL(i)) {
+ struct i40e_udp_port_config *udp_port;
+ i40e_status ret = 0;
+
+ udp_port = &pf->udp_ports[i];
pf->pending_udp_bitmap &= ~BIT_ULL(i);
- port = pf->udp_ports[i].port;
+
+ port = READ_ONCE(udp_port->port);
+ type = READ_ONCE(udp_port->type);
+ filter_index = READ_ONCE(udp_port->filter_index);
+
+ /* release RTNL while we wait on AQ command */
+ rtnl_unlock();
+
if (port)
ret = i40e_aq_add_udp_tunnel(hw, port,
- pf->udp_ports[i].type,
- NULL, NULL);
- else
- ret = i40e_aq_del_udp_tunnel(hw, i, NULL);
+ type,
+ &filter_index,
+ NULL);
+ else if (filter_index != I40E_UDP_PORT_INDEX_UNUSED)
+ ret = i40e_aq_del_udp_tunnel(hw, filter_index,
+ NULL);
+
+ /* reacquire RTNL so we can update filter_index */
+ rtnl_lock();
if (ret) {
dev_info(&pf->pdev->dev,
"%s %s port %d, index %d failed, err %s aq_err %s\n",
- i40e_tunnel_name(&pf->udp_ports[i]),
+ i40e_tunnel_name(type),
port ? "add" : "delete",
- port, i,
+ port,
+ filter_index,
i40e_stat_str(&pf->hw, ret),
i40e_aq_str(&pf->hw,
pf->hw.aq.asq_last_status));
- pf->udp_ports[i].port = 0;
+ if (port) {
+ /* failed to add, just reset port,
+ * drop pending bit for any deletion
+ */
+ udp_port->port = 0;
+ pf->pending_udp_bitmap &= ~BIT_ULL(i);
+ }
+ } else if (port) {
+ /* record filter index on success */
+ udp_port->filter_index = filter_index;
}
}
}
+
+ rtnl_unlock();
}
/**
@@ -10004,7 +10034,7 @@ unlock_pf:
/**
* i40e_vsi_free_arrays - Free queue and vector pointer arrays for the VSI
- * @type: VSI pointer
+ * @vsi: VSI pointer
* @free_qvectors: a bool to specify if q_vectors need to be freed.
*
* On error: returns error code (negative)
@@ -10800,7 +10830,7 @@ int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
* @vsi: Pointer to VSI structure
* @seed: Buffer to store the keys
* @lut: Buffer to store the lookup table entries
- * lut_size: Size of buffer to store the lookup table entries
+ * @lut_size: Size of buffer to store the lookup table entries
*
* Returns 0 on success, negative on failure
*/
@@ -11374,6 +11404,11 @@ static u8 i40e_get_udp_port_idx(struct i40e_pf *pf, u16 port)
u8 i;
for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) {
+ /* Do not report ports with pending deletions as
+ * being available.
+ */
+ if (!port && (pf->pending_udp_bitmap & BIT_ULL(i)))
+ continue;
if (pf->udp_ports[i].port == port)
return i;
}
@@ -11428,6 +11463,7 @@ static void i40e_udp_tunnel_add(struct net_device *netdev,
/* New port: add it and mark its index in the bitmap */
pf->udp_ports[next_idx].port = port;
+ pf->udp_ports[next_idx].filter_index = I40E_UDP_PORT_INDEX_UNUSED;
pf->pending_udp_bitmap |= BIT_ULL(next_idx);
set_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state);
}
@@ -11469,7 +11505,12 @@ static void i40e_udp_tunnel_del(struct net_device *netdev,
* and make it pending
*/
pf->udp_ports[idx].port = 0;
- pf->pending_udp_bitmap |= BIT_ULL(idx);
+
+ /* Toggle pending bit instead of setting it. This way if we are
+ * deleting a port that has yet to be added we just clear the pending
+ * bit and don't have to worry about it.
+ */
+ pf->pending_udp_bitmap ^= BIT_ULL(idx);
set_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state);
return;
@@ -11500,6 +11541,7 @@ static int i40e_get_phys_port_id(struct net_device *netdev,
* @tb: pointer to array of nladdr (unused)
* @dev: the net device pointer
* @addr: the MAC address entry being added
+ * @vid: VLAN ID
* @flags: instructions from stack about fdb operation
*/
static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
@@ -11545,6 +11587,7 @@ static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
* i40e_ndo_bridge_setlink - Set the hardware bridge mode
* @dev: the netdev being configured
* @nlh: RTNL message
+ * @flags: bridge flags
*
* Inserts a new hardware bridge if not already created and
* enables the bridging mode requested (VEB or VEPA). If the
@@ -14118,6 +14161,7 @@ static void i40e_remove(struct pci_dev *pdev)
/**
* i40e_pci_error_detected - warning that something funky happened in PCI land
* @pdev: PCI device information struct
+ * @error: the type of PCI error
*
* Called to warn that something happened and the error handling steps
* are in progress. Allows the driver to quiesce things, be ready for
diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
index ba9687c03795..0299e5bbb902 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
@@ -1,29 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include "i40e_prototype.h"
@@ -1173,6 +1149,7 @@ void i40e_nvmupd_clear_wait_state(struct i40e_hw *hw)
* i40e_nvmupd_check_wait_event - handle NVM update operation events
* @hw: pointer to the hardware structure
* @opcode: the event that just happened
+ * @desc: AdminQ descriptor
**/
void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode,
struct i40e_aq_desc *desc)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_osdep.h b/drivers/net/ethernet/intel/i40e/i40e_osdep.h
index 9c3c3b0d3ac4..a07574bff550 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_osdep.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_osdep.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _I40E_OSDEP_H_
#define _I40E_OSDEP_H_
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index 2ec24188d6e2..3170655cdeb9 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _I40E_PROTOTYPE_H_
#define _I40E_PROTOTYPE_H_
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
index 5b47dd1f75a5..aa3daec2049d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
@@ -1,29 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include "i40e.h"
#include <linux/ptp_classify.h>
@@ -40,9 +16,9 @@
* At 1Gb link, the period is multiplied by 20. (32ns)
* 1588 functionality is not supported at 100Mbps.
*/
-#define I40E_PTP_40GB_INCVAL 0x0199999999ULL
-#define I40E_PTP_10GB_INCVAL 0x0333333333ULL
-#define I40E_PTP_1GB_INCVAL 0x2000000000ULL
+#define I40E_PTP_40GB_INCVAL 0x0199999999ULL
+#define I40E_PTP_10GB_INCVAL_MULT 2
+#define I40E_PTP_1GB_INCVAL_MULT 20
#define I40E_PRTTSYN_CTL1_TSYNTYPE_V1 BIT(I40E_PRTTSYN_CTL1_TSYNTYPE_SHIFT)
#define I40E_PRTTSYN_CTL1_TSYNTYPE_V2 (2 << \
@@ -130,17 +106,24 @@ static int i40e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
ppb = -ppb;
}
- smp_mb(); /* Force any pending update before accessing. */
- adj = READ_ONCE(pf->ptp_base_adj);
-
- freq = adj;
+ freq = I40E_PTP_40GB_INCVAL;
freq *= ppb;
diff = div_u64(freq, 1000000000ULL);
if (neg_adj)
- adj -= diff;
+ adj = I40E_PTP_40GB_INCVAL - diff;
else
- adj += diff;
+ adj = I40E_PTP_40GB_INCVAL + diff;
+
+ /* At some link speeds, the base incval is so large that directly
+ * multiplying by ppb would result in arithmetic overflow even when
+ * using a u64. Avoid this by instead calculating the new incval
+ * always in terms of the 40GbE clock rate and then multiplying by the
+ * link speed factor afterwards. This does result in slightly lower
+ * precision at lower link speeds, but it is fairly minor.
+ */
+ smp_mb(); /* Force any pending update before accessing. */
+ adj *= READ_ONCE(pf->ptp_adj_mult);
wr32(hw, I40E_PRTTSYN_INC_L, adj & 0xFFFFFFFF);
wr32(hw, I40E_PRTTSYN_INC_H, adj >> 32);
@@ -462,6 +445,7 @@ void i40e_ptp_set_increment(struct i40e_pf *pf)
struct i40e_link_status *hw_link_info;
struct i40e_hw *hw = &pf->hw;
u64 incval;
+ u32 mult;
hw_link_info = &hw->phy.link_info;
@@ -469,10 +453,10 @@ void i40e_ptp_set_increment(struct i40e_pf *pf)
switch (hw_link_info->link_speed) {
case I40E_LINK_SPEED_10GB:
- incval = I40E_PTP_10GB_INCVAL;
+ mult = I40E_PTP_10GB_INCVAL_MULT;
break;
case I40E_LINK_SPEED_1GB:
- incval = I40E_PTP_1GB_INCVAL;
+ mult = I40E_PTP_1GB_INCVAL_MULT;
break;
case I40E_LINK_SPEED_100MB:
{
@@ -483,15 +467,20 @@ void i40e_ptp_set_increment(struct i40e_pf *pf)
"1588 functionality is not supported at 100 Mbps. Stopping the PHC.\n");
warn_once++;
}
- incval = 0;
+ mult = 0;
break;
}
case I40E_LINK_SPEED_40GB:
default:
- incval = I40E_PTP_40GB_INCVAL;
+ mult = 1;
break;
}
+ /* The increment value is calculated by taking the base 40GbE incvalue
+ * and multiplying it by a factor based on the link speed.
+ */
+ incval = I40E_PTP_40GB_INCVAL * mult;
+
/* Write the new increment value into the increment register. The
* hardware will not update the clock until both registers have been
* written.
@@ -500,14 +489,14 @@ void i40e_ptp_set_increment(struct i40e_pf *pf)
wr32(hw, I40E_PRTTSYN_INC_H, incval >> 32);
/* Update the base adjustement value. */
- WRITE_ONCE(pf->ptp_base_adj, incval);
+ WRITE_ONCE(pf->ptp_adj_mult, mult);
smp_mb(); /* Force the above update. */
}
/**
* i40e_ptp_get_ts_config - ioctl interface to read the HW timestamping
* @pf: Board private structure
- * @ifreq: ioctl data
+ * @ifr: ioctl data
*
* Obtain the current hardware timestamping settigs as requested. To do this,
* keep a shadow copy of the timestamp settings rather than attempting to
@@ -651,7 +640,7 @@ static int i40e_ptp_set_timestamp_mode(struct i40e_pf *pf,
/**
* i40e_ptp_set_ts_config - ioctl interface to control the HW timestamping
* @pf: Board private structure
- * @ifreq: ioctl data
+ * @ifr: ioctl data
*
* Respond to the user filter requests and make the appropriate hardware
* changes here. The XL710 cannot support splitting of the Tx/Rx timestamping
diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h
index b3e206e49cc2..52e3680c57f8 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_register.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_register.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _I40E_REGISTER_H_
#define _I40E_REGISTER_H_
diff --git a/drivers/net/ethernet/intel/i40e/i40e_status.h b/drivers/net/ethernet/intel/i40e/i40e_status.h
index 10c86f63dc52..77be0702d07c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_status.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_status.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _I40E_STATUS_H_
#define _I40E_STATUS_H_
diff --git a/drivers/net/ethernet/intel/i40e/i40e_trace.h b/drivers/net/ethernet/intel/i40e/i40e_trace.h
index 410ba13bcf21..424f02077e2e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_trace.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_trace.h
@@ -1,26 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel(R) 40-10 Gigabit Ethernet Connection Network Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
/* Modeled on trace-events-sample.h */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index f174c72480ab..5efa68de935b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1,29 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include <linux/prefetch.h>
#include <net/busy_poll.h>
@@ -495,7 +471,7 @@ static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi,
/**
* i40e_add_del_fdir - Build raw packets to add/del fdir filter
* @vsi: pointer to the targeted VSI
- * @cmd: command to get or set RX flow classification rules
+ * @input: filter to add or delete
* @add: true adds a filter, false removes it
*
**/
@@ -638,7 +614,7 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
kfree(tx_buffer->raw_buf);
else if (ring_is_xdp(ring))
- page_frag_free(tx_buffer->raw_buf);
+ xdp_return_frame(tx_buffer->xdpf);
else
dev_kfree_skb_any(tx_buffer->skb);
if (dma_unmap_len(tx_buffer, len))
@@ -713,7 +689,7 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring)
/**
* i40e_get_tx_pending - how many tx descriptors not processed
- * @tx_ring: the ring of descriptors
+ * @ring: the ring of descriptors
* @in_sw: use SW variables
*
* Since there is no access to the ring head register
@@ -841,7 +817,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
/* free the skb/XDP data */
if (ring_is_xdp(tx_ring))
- page_frag_free(tx_buf->raw_buf);
+ xdp_return_frame(tx_buf->xdpf);
else
napi_consume_skb(tx_buf->skb, napi_budget);
@@ -1795,6 +1771,8 @@ static inline int i40e_ptype_to_htype(u8 ptype)
* i40e_rx_hash - set the hash value in the skb
* @ring: descriptor ring
* @rx_desc: specific descriptor
+ * @skb: skb currently being received and modified
+ * @rx_ptype: Rx packet type
**/
static inline void i40e_rx_hash(struct i40e_ring *ring,
union i40e_rx_desc *rx_desc,
@@ -2203,9 +2181,20 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring,
#define I40E_XDP_CONSUMED 1
#define I40E_XDP_TX 2
-static int i40e_xmit_xdp_ring(struct xdp_buff *xdp,
+static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf,
struct i40e_ring *xdp_ring);
+static int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp,
+ struct i40e_ring *xdp_ring)
+{
+ struct xdp_frame *xdpf = convert_to_xdp_frame(xdp);
+
+ if (unlikely(!xdpf))
+ return I40E_XDP_CONSUMED;
+
+ return i40e_xmit_xdp_ring(xdpf, xdp_ring);
+}
+
/**
* i40e_run_xdp - run an XDP program
* @rx_ring: Rx ring being processed
@@ -2225,13 +2214,15 @@ static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring,
if (!xdp_prog)
goto xdp_out;
+ prefetchw(xdp->data_hard_start); /* xdp_frame write */
+
act = bpf_prog_run_xdp(xdp_prog, xdp);
switch (act) {
case XDP_PASS:
break;
case XDP_TX:
xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
- result = i40e_xmit_xdp_ring(xdp, xdp_ring);
+ result = i40e_xmit_xdp_tx_ring(xdp, xdp_ring);
break;
case XDP_REDIRECT:
err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
@@ -3478,13 +3469,13 @@ dma_error:
* @xdp: data to transmit
* @xdp_ring: XDP Tx ring
**/
-static int i40e_xmit_xdp_ring(struct xdp_buff *xdp,
+static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf,
struct i40e_ring *xdp_ring)
{
- u32 size = xdp->data_end - xdp->data;
u16 i = xdp_ring->next_to_use;
struct i40e_tx_buffer *tx_bi;
struct i40e_tx_desc *tx_desc;
+ u32 size = xdpf->len;
dma_addr_t dma;
if (!unlikely(I40E_DESC_UNUSED(xdp_ring))) {
@@ -3492,14 +3483,14 @@ static int i40e_xmit_xdp_ring(struct xdp_buff *xdp,
return I40E_XDP_CONSUMED;
}
- dma = dma_map_single(xdp_ring->dev, xdp->data, size, DMA_TO_DEVICE);
+ dma = dma_map_single(xdp_ring->dev, xdpf->data, size, DMA_TO_DEVICE);
if (dma_mapping_error(xdp_ring->dev, dma))
return I40E_XDP_CONSUMED;
tx_bi = &xdp_ring->tx_bi[i];
tx_bi->bytecount = size;
tx_bi->gso_segs = 1;
- tx_bi->raw_buf = xdp->data;
+ tx_bi->xdpf = xdpf;
/* record length, and DMA address */
dma_unmap_len_set(tx_bi, len, size);
@@ -3675,7 +3666,7 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
*
* Returns Zero if sent, else an error code
**/
-int i40e_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
+int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
{
struct i40e_netdev_priv *np = netdev_priv(dev);
unsigned int queue_index = smp_processor_id();
@@ -3688,7 +3679,7 @@ int i40e_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
return -ENXIO;
- err = i40e_xmit_xdp_ring(xdp, vsi->xdp_rings[queue_index]);
+ err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]);
if (err != I40E_XDP_TX)
return -ENOSPC;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 3043483ec426..fdd2c55f03a6 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _I40E_TXRX_H_
#define _I40E_TXRX_H_
@@ -306,6 +282,7 @@ static inline unsigned int i40e_txd_use_count(unsigned int size)
struct i40e_tx_buffer {
struct i40e_tx_desc *next_to_watch;
union {
+ struct xdp_frame *xdpf;
struct sk_buff *skb;
void *raw_buf;
};
@@ -510,7 +487,7 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
void i40e_detect_recover_hung(struct i40e_vsi *vsi);
int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
bool __i40e_chk_linearize(struct sk_buff *skb);
-int i40e_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp);
+int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf);
void i40e_xdp_flush(struct net_device *dev);
/**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index bfb80092b352..7df969c59855 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _I40E_TYPE_H_
#define _I40E_TYPE_H_
@@ -1318,7 +1294,8 @@ struct i40e_hw_port_stats {
/* Checksum and Shadow RAM pointers */
#define I40E_SR_NVM_CONTROL_WORD 0x00
-#define I40E_SR_EMP_MODULE_PTR 0x0F
+#define I40E_EMP_MODULE_PTR 0x0F
+#define I40E_SR_EMP_MODULE_PTR 0x48
#define I40E_SR_PBA_FLAGS 0x15
#define I40E_SR_PBA_BLOCK_PTR 0x16
#define I40E_SR_BOOT_CONFIG_PTR 0x17
@@ -1337,6 +1314,8 @@ struct i40e_hw_port_stats {
#define I40E_SR_PCIE_ALT_MODULE_MAX_SIZE 1024
#define I40E_SR_CONTROL_WORD_1_SHIFT 0x06
#define I40E_SR_CONTROL_WORD_1_MASK (0x03 << I40E_SR_CONTROL_WORD_1_SHIFT)
+#define I40E_SR_CONTROL_WORD_1_NVM_BANK_VALID BIT(5)
+#define I40E_SR_NVM_MAP_STRUCTURE_TYPE BIT(12)
#define I40E_PTR_TYPE BIT(15)
#define I40E_SR_OCP_CFG_WORD0 0x2B
#define I40E_SR_OCP_ENABLED BIT(15)
@@ -1454,7 +1433,8 @@ enum i40e_reset_type {
};
/* IEEE 802.1AB LLDP Agent Variables from NVM */
-#define I40E_NVM_LLDP_CFG_PTR 0xD
+#define I40E_NVM_LLDP_CFG_PTR 0x06
+#define I40E_SR_LLDP_CFG_PTR 0x31
struct i40e_lldp_variables {
u16 length;
u16 adminstatus;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 35173cbe80f7..c6d24eaede18 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1,29 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include "i40e.h"
@@ -32,8 +8,8 @@
/**
* i40e_vc_vf_broadcast
* @pf: pointer to the PF structure
- * @opcode: operation code
- * @retval: return value
+ * @v_opcode: operation code
+ * @v_retval: return value
* @msg: pointer to the msg buffer
* @msglen: msg length
*
@@ -1663,6 +1639,7 @@ static int i40e_vc_send_resp_to_vf(struct i40e_vf *vf,
/**
* i40e_vc_get_version_msg
* @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
*
* called from the VF to request the API version used by the PF
**/
@@ -1706,7 +1683,6 @@ static void i40e_del_qch(struct i40e_vf *vf)
* i40e_vc_get_vf_resources_msg
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
- * @msglen: msg length
*
* called from the VF to request its resources
**/
@@ -1830,8 +1806,6 @@ err:
/**
* i40e_vc_reset_vf_msg
* @vf: pointer to the VF info
- * @msg: pointer to the msg buffer
- * @msglen: msg length
*
* called from the VF to reset itself,
* unlike other virtchnl messages, PF driver
@@ -2180,6 +2154,51 @@ error_param:
}
/**
+ * i40e_ctrl_vf_tx_rings
+ * @vsi: the SRIOV VSI being configured
+ * @q_map: bit map of the queues to be enabled
+ * @enable: start or stop the queue
+ **/
+static int i40e_ctrl_vf_tx_rings(struct i40e_vsi *vsi, unsigned long q_map,
+ bool enable)
+{
+ struct i40e_pf *pf = vsi->back;
+ int ret = 0;
+ u16 q_id;
+
+ for_each_set_bit(q_id, &q_map, I40E_MAX_VF_QUEUES) {
+ ret = i40e_control_wait_tx_q(vsi->seid, pf,
+ vsi->base_queue + q_id,
+ false /*is xdp*/, enable);
+ if (ret)
+ break;
+ }
+ return ret;
+}
+
+/**
+ * i40e_ctrl_vf_rx_rings
+ * @vsi: the SRIOV VSI being configured
+ * @q_map: bit map of the queues to be enabled
+ * @enable: start or stop the queue
+ **/
+static int i40e_ctrl_vf_rx_rings(struct i40e_vsi *vsi, unsigned long q_map,
+ bool enable)
+{
+ struct i40e_pf *pf = vsi->back;
+ int ret = 0;
+ u16 q_id;
+
+ for_each_set_bit(q_id, &q_map, I40E_MAX_VF_QUEUES) {
+ ret = i40e_control_wait_rx_q(pf, vsi->base_queue + q_id,
+ enable);
+ if (ret)
+ break;
+ }
+ return ret;
+}
+
+/**
* i40e_vc_enable_queues_msg
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
@@ -2211,8 +2230,17 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
goto error_param;
}
- if (i40e_vsi_start_rings(pf->vsi[vf->lan_vsi_idx]))
+ /* Use the queue bit map sent by the VF */
+ if (i40e_ctrl_vf_rx_rings(pf->vsi[vf->lan_vsi_idx], vqs->rx_queues,
+ true)) {
aq_ret = I40E_ERR_TIMEOUT;
+ goto error_param;
+ }
+ if (i40e_ctrl_vf_tx_rings(pf->vsi[vf->lan_vsi_idx], vqs->tx_queues,
+ true)) {
+ aq_ret = I40E_ERR_TIMEOUT;
+ goto error_param;
+ }
/* need to start the rings for additional ADq VSI's as well */
if (vf->adq_enabled) {
@@ -2260,8 +2288,17 @@ static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
goto error_param;
}
- i40e_vsi_stop_rings(pf->vsi[vf->lan_vsi_idx]);
-
+ /* Use the queue bit map sent by the VF */
+ if (i40e_ctrl_vf_tx_rings(pf->vsi[vf->lan_vsi_idx], vqs->tx_queues,
+ false)) {
+ aq_ret = I40E_ERR_TIMEOUT;
+ goto error_param;
+ }
+ if (i40e_ctrl_vf_rx_rings(pf->vsi[vf->lan_vsi_idx], vqs->rx_queues,
+ false)) {
+ aq_ret = I40E_ERR_TIMEOUT;
+ goto error_param;
+ }
error_param:
/* send the response to the VF */
return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DISABLE_QUEUES,
@@ -3556,15 +3593,16 @@ err:
* i40e_vc_process_vf_msg
* @pf: pointer to the PF structure
* @vf_id: source VF id
+ * @v_opcode: operation code
+ * @v_retval: unused return value code
* @msg: pointer to the msg buffer
* @msglen: msg length
- * @msghndl: msg handle
*
* called from the common aeq/arq handler to
* process request from VF
**/
int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
- u32 v_retval, u8 *msg, u16 msglen)
+ u32 __always_unused v_retval, u8 *msg, u16 msglen)
{
struct i40e_hw *hw = &pf->hw;
int local_vf_id = vf_id - (s16)hw->func_caps.vf_base_id;
@@ -4015,7 +4053,8 @@ error_pvid:
* i40e_ndo_set_vf_bw
* @netdev: network interface device structure
* @vf_id: VF identifier
- * @tx_rate: Tx rate
+ * @min_tx_rate: Minimum Tx rate
+ * @max_tx_rate: Maximum Tx rate
*
* configure VF Tx rate
**/
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index 57f727bb9e36..bf67d62e2b5f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _I40E_VIRTCHNL_PF_H_
#define _I40E_VIRTCHNL_PF_H_
diff --git a/drivers/net/ethernet/intel/i40evf/Makefile b/drivers/net/ethernet/intel/i40evf/Makefile
index 1e89c5487676..3c5c6e962280 100644
--- a/drivers/net/ethernet/intel/i40evf/Makefile
+++ b/drivers/net/ethernet/intel/i40evf/Makefile
@@ -1,29 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-################################################################################
-#
-# Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
-# Copyright(c) 2013 - 2014 Intel Corporation.
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program. If not, see <http://www.gnu.org/licenses/>.
-#
-# The full GNU General Public License is included in this distribution in
-# the file called "COPYING".
-#
-# Contact Information:
-# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-#
-################################################################################
+# Copyright(c) 2013 - 2018 Intel Corporation.
#
## Makefile for the Intel(R) 40GbE VF driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c
index 6fd677efa9da..c355120dfdfd 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c
@@ -1,29 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include "i40e_status.h"
#include "i40e_type.h"
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq.h
index a7137c165256..1f264b9b6805 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _I40E_ADMINQ_H_
#define _I40E_ADMINQ_H_
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
index 439e71882049..aa81e87cd471 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _I40E_ADMINQ_CMD_H_
#define _I40E_ADMINQ_CMD_H_
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_alloc.h b/drivers/net/ethernet/intel/i40evf/i40e_alloc.h
index 7e0fddd8af36..cb8689222c8b 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_alloc.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_alloc.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _I40E_ALLOC_H_
#define _I40E_ALLOC_H_
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c
index 67140cdbcd7a..9cef54971312 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c
@@ -1,29 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include "i40e_type.h"
#include "i40e_adminq.h"
@@ -1255,6 +1231,7 @@ i40e_status_code i40evf_aq_write_ddp(struct i40e_hw *hw, void *buff,
* @hw: pointer to the hw struct
* @buff: command buffer (size in bytes = buff_size)
* @buff_size: buffer size in bytes
+ * @flags: AdminQ command flags
* @cmd_details: pointer to command details structure or NULL
**/
enum
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_devids.h b/drivers/net/ethernet/intel/i40evf/i40e_devids.h
index 352dd3f3eb6a..f300bf271824 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_devids.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_devids.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _I40E_DEVIDS_H_
#define _I40E_DEVIDS_H_
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_hmc.h b/drivers/net/ethernet/intel/i40evf/i40e_hmc.h
index 7432596164f4..1c78de838857 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_hmc.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_hmc.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _I40E_HMC_H_
#define _I40E_HMC_H_
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h b/drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h
index ddac0e4908d3..82b00f70a632 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _I40E_LAN_HMC_H_
#define _I40E_LAN_HMC_H_
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_osdep.h b/drivers/net/ethernet/intel/i40evf/i40e_osdep.h
index 8668ad6c1a65..3ddddb46455b 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_osdep.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_osdep.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _I40E_OSDEP_H_
#define _I40E_OSDEP_H_
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
index 72501bd0f1a9..a358f4b9d5aa 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _I40E_PROTOTYPE_H_
#define _I40E_PROTOTYPE_H_
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_register.h b/drivers/net/ethernet/intel/i40evf/i40e_register.h
index c9c935659758..49e1f57d99cc 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_register.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_register.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _I40E_REGISTER_H_
#define _I40E_REGISTER_H_
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_status.h b/drivers/net/ethernet/intel/i40evf/i40e_status.h
index 0d7993ecb99a..77be0702d07c 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_status.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_status.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _I40E_STATUS_H_
#define _I40E_STATUS_H_
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_trace.h b/drivers/net/ethernet/intel/i40evf/i40e_trace.h
index ece01dd12a3c..d7a4e68820a8 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_trace.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_trace.h
@@ -1,26 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel(R) 40-10 Gigabit Ethernet Virtual Function Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
/* Modeled on trace-events-sample.h */
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index 12bd937861e7..a9730711e257 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -1,29 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include <linux/prefetch.h>
#include <net/busy_poll.h>
@@ -129,7 +105,7 @@ void i40evf_free_tx_resources(struct i40e_ring *tx_ring)
/**
* i40evf_get_tx_pending - how many Tx descriptors not processed
- * @tx_ring: the ring of descriptors
+ * @ring: the ring of descriptors
* @in_sw: is tx_pending being checked in SW or HW
*
* Since there is no access to the ring head register
@@ -1070,6 +1046,8 @@ static inline int i40e_ptype_to_htype(u8 ptype)
* i40e_rx_hash - set the hash value in the skb
* @ring: descriptor ring
* @rx_desc: specific descriptor
+ * @skb: skb currently being received and modified
+ * @rx_ptype: Rx packet type
**/
static inline void i40e_rx_hash(struct i40e_ring *ring,
union i40e_rx_desc *rx_desc,
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
index 5790897eae2e..3b5a63b3236e 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _I40E_TXRX_H_
#define _I40E_TXRX_H_
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h
index 449de4b0058e..094387db3c11 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _I40E_TYPE_H_
#define _I40E_TYPE_H_
@@ -1257,7 +1233,8 @@ struct i40e_hw_port_stats {
/* Checksum and Shadow RAM pointers */
#define I40E_SR_NVM_CONTROL_WORD 0x00
-#define I40E_SR_EMP_MODULE_PTR 0x0F
+#define I40E_EMP_MODULE_PTR 0x0F
+#define I40E_SR_EMP_MODULE_PTR 0x48
#define I40E_NVM_OEM_VER_OFF 0x83
#define I40E_SR_NVM_DEV_STARTER_VERSION 0x18
#define I40E_SR_NVM_WAKE_ON_LAN 0x19
@@ -1273,6 +1250,9 @@ struct i40e_hw_port_stats {
#define I40E_SR_PCIE_ALT_MODULE_MAX_SIZE 1024
#define I40E_SR_CONTROL_WORD_1_SHIFT 0x06
#define I40E_SR_CONTROL_WORD_1_MASK (0x03 << I40E_SR_CONTROL_WORD_1_SHIFT)
+#define I40E_SR_CONTROL_WORD_1_NVM_BANK_VALID BIT(5)
+#define I40E_SR_NVM_MAP_STRUCTURE_TYPE BIT(12)
+#define I40E_PTR_TYPE BIT(15)
/* Shadow RAM related */
#define I40E_SR_SECTOR_SIZE_IN_WORDS 0x800
@@ -1386,6 +1366,10 @@ enum i40e_reset_type {
I40E_RESET_EMPR = 3,
};
+/* IEEE 802.1AB LLDP Agent Variables from NVM */
+#define I40E_NVM_LLDP_CFG_PTR 0x06
+#define I40E_SR_LLDP_CFG_PTR 0x31
+
/* RSS Hash Table Size */
#define I40E_PFQF_CTL_0_HASHLUTSIZE_512 0x00010000
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h
index 3a7a1e77bf39..98b834932dd3 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#ifndef _I40EVF_H_
#define _I40EVF_H_
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_client.c b/drivers/net/ethernet/intel/i40evf/i40evf_client.c
index da60ce12b33d..3cc9d60d0d72 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_client.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_client.c
@@ -1,4 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
#include <linux/list.h>
#include <linux/errno.h>
@@ -176,7 +178,6 @@ void i40evf_notify_client_close(struct i40e_vsi *vsi, bool reset)
/**
* i40evf_client_add_instance - add a client instance to the instance list
* @adapter: pointer to the board struct
- * @client: pointer to a client struct in the client list.
*
* Returns cinst ptr on success, NULL on failure
**/
@@ -234,7 +235,6 @@ out:
/**
* i40evf_client_del_instance - removes a client instance from the list
* @adapter: pointer to the board struct
- * @client: pointer to the client struct
*
**/
static
@@ -438,7 +438,7 @@ static u32 i40evf_client_virtchnl_send(struct i40e_info *ldev,
* i40evf_client_setup_qvlist - send a message to the PF to setup iwarp qv map
* @ldev: pointer to L2 context.
* @client: Client pointer.
- * @qv_info: queue and vector list
+ * @qvlist_info: queue and vector list
*
* Return 0 on success or < 0 on error
**/
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_client.h b/drivers/net/ethernet/intel/i40evf/i40evf_client.h
index 15a10da5bd4a..fc6592c3de9c 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_client.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_client.h
@@ -1,4 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
#ifndef _I40E_CLIENT_H_
#define _I40E_CLIENT_H_
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
index dc4cde274fb8..69efe0aec76a 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
@@ -1,29 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
/* ethtool support for i40evf */
#include "i40evf.h"
@@ -226,7 +202,7 @@ static void i40evf_get_strings(struct net_device *netdev, u32 sset, u8 *data)
/**
* i40evf_get_priv_flags - report device private flags
- * @dev: network interface device structure
+ * @netdev: network interface device structure
*
* The get string set count and the string set should be matched for each
* flag returned. Add new strings for each flag to the i40e_gstrings_priv_flags
@@ -253,7 +229,7 @@ static u32 i40evf_get_priv_flags(struct net_device *netdev)
/**
* i40evf_set_priv_flags - set private flags
- * @dev: network interface device structure
+ * @netdev: network interface device structure
* @flags: bit flags to be set
**/
static int i40evf_set_priv_flags(struct net_device *netdev, u32 flags)
@@ -627,6 +603,7 @@ static int i40evf_set_per_queue_coalesce(struct net_device *netdev,
* i40evf_get_rxnfc - command to get RX flow classification rules
* @netdev: network interface device structure
* @cmd: ethtool rxnfc command
+ * @rule_locs: pointer to store rule locations
*
* Returns Success if the command is supported.
**/
@@ -746,6 +723,7 @@ static u32 i40evf_get_rxfh_indir_size(struct net_device *netdev)
* @netdev: network interface device structure
* @indir: indirection table
* @key: hash key
+ * @hfunc: hash function in use
*
* Reads the indirection table directly from the hardware. Always returns 0.
**/
@@ -774,6 +752,7 @@ static int i40evf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
* @netdev: network interface device structure
* @indir: indirection table
* @key: hash key
+ * @hfunc: hash function to use
*
* Returns -EINVAL if the table specifies an inavlid queue id, otherwise
* returns 0 after programming the table.
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 5f71532be7f1..3f04a182903d 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -1,29 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include "i40evf.h"
#include "i40e_prototype.h"
@@ -473,6 +449,7 @@ static void i40evf_irq_affinity_release(struct kref *ref) {}
/**
* i40evf_request_traffic_irqs - Initialize MSI-X interrupts
* @adapter: board private structure
+ * @basename: device basename
*
* Allocates MSI-X vectors for tx and rx handling, and requests
* interrupts from the kernel.
@@ -705,7 +682,7 @@ i40evf_vlan_filter *i40evf_add_vlan(struct i40evf_adapter *adapter, u16 vlan)
f = i40evf_find_vlan(adapter, vlan);
if (!f) {
- f = kzalloc(sizeof(*f), GFP_ATOMIC);
+ f = kzalloc(sizeof(*f), GFP_KERNEL);
if (!f)
goto clearout;
@@ -745,6 +722,7 @@ static void i40evf_del_vlan(struct i40evf_adapter *adapter, u16 vlan)
/**
* i40evf_vlan_rx_add_vid - Add a VLAN filter to a device
* @netdev: network device struct
+ * @proto: unused protocol data
* @vid: VLAN tag
**/
static int i40evf_vlan_rx_add_vid(struct net_device *netdev,
@@ -762,6 +740,7 @@ static int i40evf_vlan_rx_add_vid(struct net_device *netdev,
/**
* i40evf_vlan_rx_kill_vid - Remove a VLAN filter from a device
* @netdev: network device struct
+ * @proto: unused protocol data
* @vid: VLAN tag
**/
static int i40evf_vlan_rx_kill_vid(struct net_device *netdev,
@@ -3160,7 +3139,7 @@ static int i40evf_set_features(struct net_device *netdev,
/**
* i40evf_features_check - Validate encapsulated packet conforms to limits
* @skb: skb buff
- * @netdev: This physical port's netdev
+ * @dev: This physical port's netdev
* @features: Offload features that the stack believes apply
**/
static netdev_features_t i40evf_features_check(struct sk_buff *skb,
@@ -3378,6 +3357,24 @@ int i40evf_process_config(struct i40evf_adapter *adapter)
if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)
netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+ /* Do not turn on offloads when they are requested to be turned off.
+ * TSO needs minimum 576 bytes to work correctly.
+ */
+ if (netdev->wanted_features) {
+ if (!(netdev->wanted_features & NETIF_F_TSO) ||
+ netdev->mtu < 576)
+ netdev->features &= ~NETIF_F_TSO;
+ if (!(netdev->wanted_features & NETIF_F_TSO6) ||
+ netdev->mtu < 576)
+ netdev->features &= ~NETIF_F_TSO6;
+ if (!(netdev->wanted_features & NETIF_F_TSO_ECN))
+ netdev->features &= ~NETIF_F_TSO_ECN;
+ if (!(netdev->wanted_features & NETIF_F_GRO))
+ netdev->features &= ~NETIF_F_GRO;
+ if (!(netdev->wanted_features & NETIF_F_GSO))
+ netdev->features &= ~NETIF_F_GSO;
+ }
+
adapter->vsi.id = adapter->vsi_res->vsi_id;
adapter->vsi.back = adapter;
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index 26a59890532f..565677de5ba3 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -1,29 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include "i40evf.h"
#include "i40e_prototype.h"
@@ -179,8 +155,7 @@ int i40evf_send_vf_config_msg(struct i40evf_adapter *adapter)
/**
* i40evf_get_vf_config
- * @hw: pointer to the hardware structure
- * @len: length of buffer
+ * @adapter: private adapter structure
*
* Get VF configuration from PF and populate hw structure. Must be called after
* admin queue is initialized. Busy waits until response is received from PF,
@@ -423,8 +398,6 @@ int i40evf_request_queues(struct i40evf_adapter *adapter, int num)
/**
* i40evf_add_ether_addrs
* @adapter: adapter structure
- * @addrs: the MAC address filters to add (contiguous)
- * @count: number of filters
*
* Request that the PF add one or more addresses to our filters.
**/
@@ -497,8 +470,6 @@ void i40evf_add_ether_addrs(struct i40evf_adapter *adapter)
/**
* i40evf_del_ether_addrs
* @adapter: adapter structure
- * @addrs: the MAC address filters to remove (contiguous)
- * @count: number of filtes
*
* Request that the PF remove one or more addresses from our filters.
**/
@@ -571,8 +542,6 @@ void i40evf_del_ether_addrs(struct i40evf_adapter *adapter)
/**
* i40evf_add_vlans
* @adapter: adapter structure
- * @vlans: the VLANs to add
- * @count: number of VLANs
*
* Request that the PF add one or more VLAN filters to our VSI.
**/
@@ -643,8 +612,6 @@ void i40evf_add_vlans(struct i40evf_adapter *adapter)
/**
* i40evf_del_vlans
* @adapter: adapter structure
- * @vlans: the VLANs to remove
- * @count: number of VLANs
*
* Request that the PF remove one or more VLAN filters from our VSI.
**/
diff --git a/drivers/net/ethernet/intel/igb/Makefile b/drivers/net/ethernet/intel/igb/Makefile
index c48583e98ac1..394c1e0656b9 100644
--- a/drivers/net/ethernet/intel/igb/Makefile
+++ b/drivers/net/ethernet/intel/igb/Makefile
@@ -1,31 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-################################################################################
-#
-# Intel 82575 PCI-Express Ethernet Linux driver
-# Copyright(c) 1999 - 2014 Intel Corporation.
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# this program; if not, see <http://www.gnu.org/licenses/>.
-#
-# The full GNU General Public License is included in this distribution in
-# the file called "COPYING".
-#
-# Contact Information:
-# Linux NICS <linux.nics@intel.com>
-# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-#
-################################################################################
-
+# Copyright(c) 1999 - 2018 Intel Corporation.
#
# Makefile for the Intel(R) 82575 PCI-Express ethernet driver
#
diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c
index dd9b6cac220d..b13b42e5a1d9 100644
--- a/drivers/net/ethernet/intel/igb/e1000_82575.c
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.c
@@ -1,26 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
/* e1000_82575
* e1000_82576
diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.h b/drivers/net/ethernet/intel/igb/e1000_82575.h
index e53ebe97d709..6ad775b1a4c5 100644
--- a/drivers/net/ethernet/intel/igb/e1000_82575.h
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.h
@@ -1,26 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
#ifndef _E1000_82575_H_
#define _E1000_82575_H_
diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h
index 98534f765e0e..252440a418dc 100644
--- a/drivers/net/ethernet/intel/igb/e1000_defines.h
+++ b/drivers/net/ethernet/intel/igb/e1000_defines.h
@@ -1,26 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
#ifndef _E1000_DEFINES_H_
#define _E1000_DEFINES_H_
@@ -491,6 +470,8 @@
* manageability enabled, allowing us room for 15 multicast addresses.
*/
#define E1000_RAH_AV 0x80000000 /* Receive descriptor valid */
+#define E1000_RAH_ASEL_SRC_ADDR 0x00010000
+#define E1000_RAH_QSEL_ENABLE 0x10000000
#define E1000_RAL_MAC_ADDR_LEN 4
#define E1000_RAH_MAC_ADDR_LEN 2
#define E1000_RAH_POOL_MASK 0x03FC0000
diff --git a/drivers/net/ethernet/intel/igb/e1000_hw.h b/drivers/net/ethernet/intel/igb/e1000_hw.h
index ff835e1e853d..5d87957b2627 100644
--- a/drivers/net/ethernet/intel/igb/e1000_hw.h
+++ b/drivers/net/ethernet/intel/igb/e1000_hw.h
@@ -1,25 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
#ifndef _E1000_HW_H_
#define _E1000_HW_H_
diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.c b/drivers/net/ethernet/intel/igb/e1000_i210.c
index 6f548247e6d8..c54ebedca6da 100644
--- a/drivers/net/ethernet/intel/igb/e1000_i210.c
+++ b/drivers/net/ethernet/intel/igb/e1000_i210.c
@@ -1,26 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
/* e1000_i210
* e1000_i211
diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.h b/drivers/net/ethernet/intel/igb/e1000_i210.h
index 56f015ccb206..5c437fdc49ee 100644
--- a/drivers/net/ethernet/intel/igb/e1000_i210.h
+++ b/drivers/net/ethernet/intel/igb/e1000_i210.h
@@ -1,26 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
#ifndef _E1000_I210_H_
#define _E1000_I210_H_
diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c
index 298afa0d9159..79ee0a747260 100644
--- a/drivers/net/ethernet/intel/igb/e1000_mac.c
+++ b/drivers/net/ethernet/intel/igb/e1000_mac.c
@@ -1,26 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
#include <linux/if_ether.h>
#include <linux/delay.h>
diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.h b/drivers/net/ethernet/intel/igb/e1000_mac.h
index 04d80c765aee..6e110f28f922 100644
--- a/drivers/net/ethernet/intel/igb/e1000_mac.h
+++ b/drivers/net/ethernet/intel/igb/e1000_mac.h
@@ -1,26 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
#ifndef _E1000_MAC_H_
#define _E1000_MAC_H_
diff --git a/drivers/net/ethernet/intel/igb/e1000_mbx.c b/drivers/net/ethernet/intel/igb/e1000_mbx.c
index ef42f1689b3b..46debd991bfe 100644
--- a/drivers/net/ethernet/intel/igb/e1000_mbx.c
+++ b/drivers/net/ethernet/intel/igb/e1000_mbx.c
@@ -1,26 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
#include "e1000_mbx.h"
diff --git a/drivers/net/ethernet/intel/igb/e1000_mbx.h b/drivers/net/ethernet/intel/igb/e1000_mbx.h
index 4f0ecd28354d..178e60ec71d4 100644
--- a/drivers/net/ethernet/intel/igb/e1000_mbx.h
+++ b/drivers/net/ethernet/intel/igb/e1000_mbx.h
@@ -1,26 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
#ifndef _E1000_MBX_H_
#define _E1000_MBX_H_
diff --git a/drivers/net/ethernet/intel/igb/e1000_nvm.c b/drivers/net/ethernet/intel/igb/e1000_nvm.c
index e4596f151cd4..09f4dcb09632 100644
--- a/drivers/net/ethernet/intel/igb/e1000_nvm.c
+++ b/drivers/net/ethernet/intel/igb/e1000_nvm.c
@@ -1,25 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
#include <linux/if_ether.h>
#include <linux/delay.h>
diff --git a/drivers/net/ethernet/intel/igb/e1000_nvm.h b/drivers/net/ethernet/intel/igb/e1000_nvm.h
index dde68cd54a53..091cddf4ada8 100644
--- a/drivers/net/ethernet/intel/igb/e1000_nvm.h
+++ b/drivers/net/ethernet/intel/igb/e1000_nvm.h
@@ -1,26 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
#ifndef _E1000_NVM_H_
#define _E1000_NVM_H_
diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c
index 4ec61243da82..2be0e762ec69 100644
--- a/drivers/net/ethernet/intel/igb/e1000_phy.c
+++ b/drivers/net/ethernet/intel/igb/e1000_phy.c
@@ -1,26 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
#include <linux/if_ether.h>
#include <linux/delay.h>
diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.h b/drivers/net/ethernet/intel/igb/e1000_phy.h
index 856d2cda0643..5894e4b1d0a8 100644
--- a/drivers/net/ethernet/intel/igb/e1000_phy.h
+++ b/drivers/net/ethernet/intel/igb/e1000_phy.h
@@ -1,26 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
#ifndef _E1000_PHY_H_
#define _E1000_PHY_H_
diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h
index e8fa8c6530e0..0ad737d2f289 100644
--- a/drivers/net/ethernet/intel/igb/e1000_regs.h
+++ b/drivers/net/ethernet/intel/igb/e1000_regs.h
@@ -1,26 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
#ifndef _E1000_REGS_H_
#define _E1000_REGS_H_
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 8dbc399b345e..9643b5b3d444 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -1,26 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
/* Linux PRO/1000 Ethernet Driver main header file */
@@ -442,6 +421,8 @@ struct hwmon_buff {
enum igb_filter_match_flags {
IGB_FILTER_FLAG_ETHER_TYPE = 0x1,
IGB_FILTER_FLAG_VLAN_TCI = 0x2,
+ IGB_FILTER_FLAG_SRC_MAC_ADDR = 0x4,
+ IGB_FILTER_FLAG_DST_MAC_ADDR = 0x8,
};
#define IGB_MAX_RXNFC_FILTERS 16
@@ -456,11 +437,14 @@ struct igb_nfc_input {
u8 match_flags;
__be16 etype;
__be16 vlan_tci;
+ u8 src_addr[ETH_ALEN];
+ u8 dst_addr[ETH_ALEN];
};
struct igb_nfc_filter {
struct hlist_node nfc_node;
struct igb_nfc_input filter;
+ unsigned long cookie;
u16 etype_reg_index;
u16 sw_idx;
u16 action;
@@ -474,6 +458,8 @@ struct igb_mac_addr {
#define IGB_MAC_STATE_DEFAULT 0x1
#define IGB_MAC_STATE_IN_USE 0x2
+#define IGB_MAC_STATE_SRC_ADDR 0x4
+#define IGB_MAC_STATE_QUEUE_STEERING 0x8
/* board specific private data structure */
struct igb_adapter {
@@ -598,6 +584,7 @@ struct igb_adapter {
/* RX network flow classification support */
struct hlist_head nfc_filter_list;
+ struct hlist_head cls_flower_list;
unsigned int nfc_filter_count;
/* lock for RX network flow classification filter */
spinlock_t nfc_lock;
@@ -739,4 +726,9 @@ int igb_add_filter(struct igb_adapter *adapter,
int igb_erase_filter(struct igb_adapter *adapter,
struct igb_nfc_filter *input);
+int igb_add_mac_steering_filter(struct igb_adapter *adapter,
+ const u8 *addr, u8 queue, u8 flags);
+int igb_del_mac_steering_filter(struct igb_adapter *adapter,
+ const u8 *addr, u8 queue, u8 flags);
+
#endif /* _IGB_H_ */
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index e77ba0d5866d..2d798499d35e 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -1,26 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
/* ethtool support for igb */
@@ -2495,6 +2474,23 @@ static int igb_get_ethtool_nfc_entry(struct igb_adapter *adapter,
fsp->h_ext.vlan_tci = rule->filter.vlan_tci;
fsp->m_ext.vlan_tci = htons(VLAN_PRIO_MASK);
}
+ if (rule->filter.match_flags & IGB_FILTER_FLAG_DST_MAC_ADDR) {
+ ether_addr_copy(fsp->h_u.ether_spec.h_dest,
+ rule->filter.dst_addr);
+ /* As we only support matching by the full
+ * mask, return the mask to userspace
+ */
+ eth_broadcast_addr(fsp->m_u.ether_spec.h_dest);
+ }
+ if (rule->filter.match_flags & IGB_FILTER_FLAG_SRC_MAC_ADDR) {
+ ether_addr_copy(fsp->h_u.ether_spec.h_source,
+ rule->filter.src_addr);
+ /* As we only support matching by the full
+ * mask, return the mask to userspace
+ */
+ eth_broadcast_addr(fsp->m_u.ether_spec.h_source);
+ }
+
return 0;
}
return -EINVAL;
@@ -2768,14 +2764,41 @@ static int igb_rxnfc_write_vlan_prio_filter(struct igb_adapter *adapter,
int igb_add_filter(struct igb_adapter *adapter, struct igb_nfc_filter *input)
{
+ struct e1000_hw *hw = &adapter->hw;
int err = -EINVAL;
+ if (hw->mac.type == e1000_i210 &&
+ !(input->filter.match_flags & ~IGB_FILTER_FLAG_SRC_MAC_ADDR)) {
+ dev_err(&adapter->pdev->dev,
+ "i210 doesn't support flow classification rules specifying only source addresses.\n");
+ return -EOPNOTSUPP;
+ }
+
if (input->filter.match_flags & IGB_FILTER_FLAG_ETHER_TYPE) {
err = igb_rxnfc_write_etype_filter(adapter, input);
if (err)
return err;
}
+ if (input->filter.match_flags & IGB_FILTER_FLAG_DST_MAC_ADDR) {
+ err = igb_add_mac_steering_filter(adapter,
+ input->filter.dst_addr,
+ input->action, 0);
+ err = min_t(int, err, 0);
+ if (err)
+ return err;
+ }
+
+ if (input->filter.match_flags & IGB_FILTER_FLAG_SRC_MAC_ADDR) {
+ err = igb_add_mac_steering_filter(adapter,
+ input->filter.src_addr,
+ input->action,
+ IGB_MAC_STATE_SRC_ADDR);
+ err = min_t(int, err, 0);
+ if (err)
+ return err;
+ }
+
if (input->filter.match_flags & IGB_FILTER_FLAG_VLAN_TCI)
err = igb_rxnfc_write_vlan_prio_filter(adapter, input);
@@ -2824,6 +2847,15 @@ int igb_erase_filter(struct igb_adapter *adapter, struct igb_nfc_filter *input)
igb_clear_vlan_prio_filter(adapter,
ntohs(input->filter.vlan_tci));
+ if (input->filter.match_flags & IGB_FILTER_FLAG_SRC_MAC_ADDR)
+ igb_del_mac_steering_filter(adapter, input->filter.src_addr,
+ input->action,
+ IGB_MAC_STATE_SRC_ADDR);
+
+ if (input->filter.match_flags & IGB_FILTER_FLAG_DST_MAC_ADDR)
+ igb_del_mac_steering_filter(adapter, input->filter.dst_addr,
+ input->action, 0);
+
return 0;
}
@@ -2865,7 +2897,7 @@ static int igb_update_ethtool_nfc_entry(struct igb_adapter *adapter,
/* add filter to the list */
if (parent)
- hlist_add_behind(&parent->nfc_node, &input->nfc_node);
+ hlist_add_behind(&input->nfc_node, &parent->nfc_node);
else
hlist_add_head(&input->nfc_node, &adapter->nfc_filter_list);
@@ -2905,10 +2937,6 @@ static int igb_add_ethtool_nfc_entry(struct igb_adapter *adapter,
if ((fsp->flow_type & ~FLOW_EXT) != ETHER_FLOW)
return -EINVAL;
- if (fsp->m_u.ether_spec.h_proto != ETHER_TYPE_FULL_MASK &&
- fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK))
- return -EINVAL;
-
input = kzalloc(sizeof(*input), GFP_KERNEL);
if (!input)
return -ENOMEM;
@@ -2918,6 +2946,20 @@ static int igb_add_ethtool_nfc_entry(struct igb_adapter *adapter,
input->filter.match_flags = IGB_FILTER_FLAG_ETHER_TYPE;
}
+ /* Only support matching addresses by the full mask */
+ if (is_broadcast_ether_addr(fsp->m_u.ether_spec.h_source)) {
+ input->filter.match_flags |= IGB_FILTER_FLAG_SRC_MAC_ADDR;
+ ether_addr_copy(input->filter.src_addr,
+ fsp->h_u.ether_spec.h_source);
+ }
+
+ /* Only support matching addresses by the full mask */
+ if (is_broadcast_ether_addr(fsp->m_u.ether_spec.h_dest)) {
+ input->filter.match_flags |= IGB_FILTER_FLAG_DST_MAC_ADDR;
+ ether_addr_copy(input->filter.dst_addr,
+ fsp->h_u.ether_spec.h_dest);
+ }
+
if ((fsp->flow_type & FLOW_EXT) && fsp->m_ext.vlan_tci) {
if (fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK)) {
err = -EINVAL;
diff --git a/drivers/net/ethernet/intel/igb/igb_hwmon.c b/drivers/net/ethernet/intel/igb/igb_hwmon.c
index bebe43b3a836..3b83747b2700 100644
--- a/drivers/net/ethernet/intel/igb/igb_hwmon.c
+++ b/drivers/net/ethernet/intel/igb/igb_hwmon.c
@@ -1,26 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
#include "igb.h"
#include "e1000_82575.h"
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index cce7ada89255..78574c06635b 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -1,26 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Intel(R) Gigabit Ethernet Linux driver
- * Copyright(c) 2007-2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -36,6 +15,7 @@
#include <net/checksum.h>
#include <net/ip6_checksum.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <linux/net_tstamp.h>
#include <linux/mii.h>
#include <linux/ethtool.h>
@@ -2513,6 +2493,250 @@ static int igb_offload_cbs(struct igb_adapter *adapter,
return 0;
}
+#define ETHER_TYPE_FULL_MASK ((__force __be16)~0)
+#define VLAN_PRIO_FULL_MASK (0x07)
+
+static int igb_parse_cls_flower(struct igb_adapter *adapter,
+ struct tc_cls_flower_offload *f,
+ int traffic_class,
+ struct igb_nfc_filter *input)
+{
+ struct netlink_ext_ack *extack = f->common.extack;
+
+ if (f->dissector->used_keys &
+ ~(BIT(FLOW_DISSECTOR_KEY_BASIC) |
+ BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+ BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_VLAN))) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Unsupported key used, only BASIC, CONTROL, ETH_ADDRS and VLAN are supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+ struct flow_dissector_key_eth_addrs *key, *mask;
+
+ key = skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS,
+ f->key);
+ mask = skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS,
+ f->mask);
+
+ if (!is_zero_ether_addr(mask->dst)) {
+ if (!is_broadcast_ether_addr(mask->dst)) {
+ NL_SET_ERR_MSG_MOD(extack, "Only full masks are supported for destination MAC address");
+ return -EINVAL;
+ }
+
+ input->filter.match_flags |=
+ IGB_FILTER_FLAG_DST_MAC_ADDR;
+ ether_addr_copy(input->filter.dst_addr, key->dst);
+ }
+
+ if (!is_zero_ether_addr(mask->src)) {
+ if (!is_broadcast_ether_addr(mask->src)) {
+ NL_SET_ERR_MSG_MOD(extack, "Only full masks are supported for source MAC address");
+ return -EINVAL;
+ }
+
+ input->filter.match_flags |=
+ IGB_FILTER_FLAG_SRC_MAC_ADDR;
+ ether_addr_copy(input->filter.src_addr, key->src);
+ }
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+ struct flow_dissector_key_basic *key, *mask;
+
+ key = skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ f->key);
+ mask = skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ f->mask);
+
+ if (mask->n_proto) {
+ if (mask->n_proto != ETHER_TYPE_FULL_MASK) {
+ NL_SET_ERR_MSG_MOD(extack, "Only full mask is supported for EtherType filter");
+ return -EINVAL;
+ }
+
+ input->filter.match_flags |= IGB_FILTER_FLAG_ETHER_TYPE;
+ input->filter.etype = key->n_proto;
+ }
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
+ struct flow_dissector_key_vlan *key, *mask;
+
+ key = skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_VLAN,
+ f->key);
+ mask = skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_VLAN,
+ f->mask);
+
+ if (mask->vlan_priority) {
+ if (mask->vlan_priority != VLAN_PRIO_FULL_MASK) {
+ NL_SET_ERR_MSG_MOD(extack, "Only full mask is supported for VLAN priority");
+ return -EINVAL;
+ }
+
+ input->filter.match_flags |= IGB_FILTER_FLAG_VLAN_TCI;
+ input->filter.vlan_tci = key->vlan_priority;
+ }
+ }
+
+ input->action = traffic_class;
+ input->cookie = f->cookie;
+
+ return 0;
+}
+
+static int igb_configure_clsflower(struct igb_adapter *adapter,
+ struct tc_cls_flower_offload *cls_flower)
+{
+ struct netlink_ext_ack *extack = cls_flower->common.extack;
+ struct igb_nfc_filter *filter, *f;
+ int err, tc;
+
+ tc = tc_classid_to_hwtc(adapter->netdev, cls_flower->classid);
+ if (tc < 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid traffic class");
+ return -EINVAL;
+ }
+
+ filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+ if (!filter)
+ return -ENOMEM;
+
+ err = igb_parse_cls_flower(adapter, cls_flower, tc, filter);
+ if (err < 0)
+ goto err_parse;
+
+ spin_lock(&adapter->nfc_lock);
+
+ hlist_for_each_entry(f, &adapter->nfc_filter_list, nfc_node) {
+ if (!memcmp(&f->filter, &filter->filter, sizeof(f->filter))) {
+ err = -EEXIST;
+ NL_SET_ERR_MSG_MOD(extack,
+ "This filter is already set in ethtool");
+ goto err_locked;
+ }
+ }
+
+ hlist_for_each_entry(f, &adapter->cls_flower_list, nfc_node) {
+ if (!memcmp(&f->filter, &filter->filter, sizeof(f->filter))) {
+ err = -EEXIST;
+ NL_SET_ERR_MSG_MOD(extack,
+ "This filter is already set in cls_flower");
+ goto err_locked;
+ }
+ }
+
+ err = igb_add_filter(adapter, filter);
+ if (err < 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Could not add filter to the adapter");
+ goto err_locked;
+ }
+
+ hlist_add_head(&filter->nfc_node, &adapter->cls_flower_list);
+
+ spin_unlock(&adapter->nfc_lock);
+
+ return 0;
+
+err_locked:
+ spin_unlock(&adapter->nfc_lock);
+
+err_parse:
+ kfree(filter);
+
+ return err;
+}
+
+static int igb_delete_clsflower(struct igb_adapter *adapter,
+ struct tc_cls_flower_offload *cls_flower)
+{
+ struct igb_nfc_filter *filter;
+ int err;
+
+ spin_lock(&adapter->nfc_lock);
+
+ hlist_for_each_entry(filter, &adapter->cls_flower_list, nfc_node)
+ if (filter->cookie == cls_flower->cookie)
+ break;
+
+ if (!filter) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ err = igb_erase_filter(adapter, filter);
+ if (err < 0)
+ goto out;
+
+ hlist_del(&filter->nfc_node);
+ kfree(filter);
+
+out:
+ spin_unlock(&adapter->nfc_lock);
+
+ return err;
+}
+
+static int igb_setup_tc_cls_flower(struct igb_adapter *adapter,
+ struct tc_cls_flower_offload *cls_flower)
+{
+ switch (cls_flower->command) {
+ case TC_CLSFLOWER_REPLACE:
+ return igb_configure_clsflower(adapter, cls_flower);
+ case TC_CLSFLOWER_DESTROY:
+ return igb_delete_clsflower(adapter, cls_flower);
+ case TC_CLSFLOWER_STATS:
+ return -EOPNOTSUPP;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int igb_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ struct igb_adapter *adapter = cb_priv;
+
+ if (!tc_cls_can_offload_and_chain0(adapter->netdev, type_data))
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return igb_setup_tc_cls_flower(adapter, type_data);
+
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int igb_setup_tc_block(struct igb_adapter *adapter,
+ struct tc_block_offload *f)
+{
+ if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+ return -EOPNOTSUPP;
+
+ switch (f->command) {
+ case TC_BLOCK_BIND:
+ return tcf_block_cb_register(f->block, igb_setup_tc_block_cb,
+ adapter, adapter);
+ case TC_BLOCK_UNBIND:
+ tcf_block_cb_unregister(f->block, igb_setup_tc_block_cb,
+ adapter);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type,
void *type_data)
{
@@ -2521,6 +2745,8 @@ static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type,
switch (type) {
case TC_SETUP_QDISC_CBS:
return igb_offload_cbs(adapter, type_data);
+ case TC_SETUP_BLOCK:
+ return igb_setup_tc_block(adapter, type_data);
default:
return -EOPNOTSUPP;
@@ -2822,6 +3048,9 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (hw->mac.type >= e1000_82576)
netdev->features |= NETIF_F_SCTP_CRC;
+ if (hw->mac.type >= e1000_i350)
+ netdev->features |= NETIF_F_HW_TC;
+
#define IGB_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \
NETIF_F_GSO_GRE_CSUM | \
NETIF_F_GSO_IPXIP4 | \
@@ -6856,8 +7085,35 @@ static void igb_set_default_mac_filter(struct igb_adapter *adapter)
igb_rar_set_index(adapter, 0);
}
-static int igb_add_mac_filter(struct igb_adapter *adapter, const u8 *addr,
- const u8 queue)
+/* If the filter to be added and an already existing filter express
+ * the same address and address type, it should be possible to only
+ * override the other configurations, for example the queue to steer
+ * traffic.
+ */
+static bool igb_mac_entry_can_be_used(const struct igb_mac_addr *entry,
+ const u8 *addr, const u8 flags)
+{
+ if (!(entry->state & IGB_MAC_STATE_IN_USE))
+ return true;
+
+ if ((entry->state & IGB_MAC_STATE_SRC_ADDR) !=
+ (flags & IGB_MAC_STATE_SRC_ADDR))
+ return false;
+
+ if (!ether_addr_equal(addr, entry->addr))
+ return false;
+
+ return true;
+}
+
+/* Add a MAC filter for 'addr' directing matching traffic to 'queue',
+ * 'flags' is used to indicate what kind of match is made, match is by
+ * default for the destination address, if matching by source address
+ * is desired the flag IGB_MAC_STATE_SRC_ADDR can be used.
+ */
+static int igb_add_mac_filter_flags(struct igb_adapter *adapter,
+ const u8 *addr, const u8 queue,
+ const u8 flags)
{
struct e1000_hw *hw = &adapter->hw;
int rar_entries = hw->mac.rar_entry_count -
@@ -6872,12 +7128,13 @@ static int igb_add_mac_filter(struct igb_adapter *adapter, const u8 *addr,
* addresses.
*/
for (i = 0; i < rar_entries; i++) {
- if (adapter->mac_table[i].state & IGB_MAC_STATE_IN_USE)
+ if (!igb_mac_entry_can_be_used(&adapter->mac_table[i],
+ addr, flags))
continue;
ether_addr_copy(adapter->mac_table[i].addr, addr);
adapter->mac_table[i].queue = queue;
- adapter->mac_table[i].state |= IGB_MAC_STATE_IN_USE;
+ adapter->mac_table[i].state |= IGB_MAC_STATE_IN_USE | flags;
igb_rar_set_index(adapter, i);
return i;
@@ -6886,9 +7143,22 @@ static int igb_add_mac_filter(struct igb_adapter *adapter, const u8 *addr,
return -ENOSPC;
}
-static int igb_del_mac_filter(struct igb_adapter *adapter, const u8 *addr,
+static int igb_add_mac_filter(struct igb_adapter *adapter, const u8 *addr,
const u8 queue)
{
+ return igb_add_mac_filter_flags(adapter, addr, queue, 0);
+}
+
+/* Remove a MAC filter for 'addr' directing matching traffic to
+ * 'queue', 'flags' is used to indicate what kind of match need to be
+ * removed, match is by default for the destination address, if
+ * matching by source address is to be removed the flag
+ * IGB_MAC_STATE_SRC_ADDR can be used.
+ */
+static int igb_del_mac_filter_flags(struct igb_adapter *adapter,
+ const u8 *addr, const u8 queue,
+ const u8 flags)
+{
struct e1000_hw *hw = &adapter->hw;
int rar_entries = hw->mac.rar_entry_count -
adapter->vfs_allocated_count;
@@ -6904,14 +7174,26 @@ static int igb_del_mac_filter(struct igb_adapter *adapter, const u8 *addr,
for (i = 0; i < rar_entries; i++) {
if (!(adapter->mac_table[i].state & IGB_MAC_STATE_IN_USE))
continue;
+ if ((adapter->mac_table[i].state & flags) != flags)
+ continue;
if (adapter->mac_table[i].queue != queue)
continue;
if (!ether_addr_equal(adapter->mac_table[i].addr, addr))
continue;
- adapter->mac_table[i].state &= ~IGB_MAC_STATE_IN_USE;
- memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
- adapter->mac_table[i].queue = 0;
+ /* When a filter for the default address is "deleted",
+ * we return it to its initial configuration
+ */
+ if (adapter->mac_table[i].state & IGB_MAC_STATE_DEFAULT) {
+ adapter->mac_table[i].state =
+ IGB_MAC_STATE_DEFAULT | IGB_MAC_STATE_IN_USE;
+ adapter->mac_table[i].queue =
+ adapter->vfs_allocated_count;
+ } else {
+ adapter->mac_table[i].state = 0;
+ adapter->mac_table[i].queue = 0;
+ memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
+ }
igb_rar_set_index(adapter, i);
return 0;
@@ -6920,6 +7202,34 @@ static int igb_del_mac_filter(struct igb_adapter *adapter, const u8 *addr,
return -ENOENT;
}
+static int igb_del_mac_filter(struct igb_adapter *adapter, const u8 *addr,
+ const u8 queue)
+{
+ return igb_del_mac_filter_flags(adapter, addr, queue, 0);
+}
+
+int igb_add_mac_steering_filter(struct igb_adapter *adapter,
+ const u8 *addr, u8 queue, u8 flags)
+{
+ struct e1000_hw *hw = &adapter->hw;
+
+ /* In theory, this should be supported on 82575 as well, but
+ * that part wasn't easily accessible during development.
+ */
+ if (hw->mac.type != e1000_i210)
+ return -EOPNOTSUPP;
+
+ return igb_add_mac_filter_flags(adapter, addr, queue,
+ IGB_MAC_STATE_QUEUE_STEERING | flags);
+}
+
+int igb_del_mac_steering_filter(struct igb_adapter *adapter,
+ const u8 *addr, u8 queue, u8 flags)
+{
+ return igb_del_mac_filter_flags(adapter, addr, queue,
+ IGB_MAC_STATE_QUEUE_STEERING | flags);
+}
+
static int igb_uc_sync(struct net_device *netdev, const unsigned char *addr)
{
struct igb_adapter *adapter = netdev_priv(netdev);
@@ -8763,12 +9073,24 @@ static void igb_rar_set_index(struct igb_adapter *adapter, u32 index)
if (is_valid_ether_addr(addr))
rar_high |= E1000_RAH_AV;
- if (hw->mac.type == e1000_82575)
+ if (adapter->mac_table[index].state & IGB_MAC_STATE_SRC_ADDR)
+ rar_high |= E1000_RAH_ASEL_SRC_ADDR;
+
+ switch (hw->mac.type) {
+ case e1000_82575:
+ case e1000_i210:
+ if (adapter->mac_table[index].state &
+ IGB_MAC_STATE_QUEUE_STEERING)
+ rar_high |= E1000_RAH_QSEL_ENABLE;
+
rar_high |= E1000_RAH_POOL_1 *
adapter->mac_table[index].queue;
- else
+ break;
+ default:
rar_high |= E1000_RAH_POOL_1 <<
adapter->mac_table[index].queue;
+ break;
+ }
}
wr32(E1000_RAL(index), rar_low);
@@ -9206,6 +9528,9 @@ static void igb_nfc_filter_exit(struct igb_adapter *adapter)
hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node)
igb_erase_filter(adapter, rule);
+ hlist_for_each_entry(rule, &adapter->cls_flower_list, nfc_node)
+ igb_erase_filter(adapter, rule);
+
spin_unlock(&adapter->nfc_lock);
}
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index 7454b9895a65..9f4d700e09df 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -1,21 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+
-/* PTP Hardware Clock (PHC) driver for the Intel 82576 and 82580
- *
- * Copyright (C) 2011 Richard Cochran <richardcochran@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, see <http://www.gnu.org/licenses/>.
- */
+/* Copyright (C) 2011 Richard Cochran <richardcochran@gmail.com> */
+
#include <linux/module.h>
#include <linux/device.h>
#include <linux/pci.h>
diff --git a/drivers/net/ethernet/intel/igbvf/Makefile b/drivers/net/ethernet/intel/igbvf/Makefile
index efe29dae384a..afd3e36eae75 100644
--- a/drivers/net/ethernet/intel/igbvf/Makefile
+++ b/drivers/net/ethernet/intel/igbvf/Makefile
@@ -1,31 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-################################################################################
-#
-# Intel(R) 82576 Virtual Function Linux driver
-# Copyright(c) 2009 - 2012 Intel Corporation.
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# The full GNU General Public License is included in this distribution in
-# the file called "COPYING".
-#
-# Contact Information:
-# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-#
-################################################################################
-
+# Copyright(c) 2009 - 2018 Intel Corporation.
#
# Makefile for the Intel(R) 82576 VF ethernet driver
#
diff --git a/drivers/net/ethernet/intel/igbvf/defines.h b/drivers/net/ethernet/intel/igbvf/defines.h
index 04bcfec0641b..4437f832412d 100644
--- a/drivers/net/ethernet/intel/igbvf/defines.h
+++ b/drivers/net/ethernet/intel/igbvf/defines.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
- Intel(R) 82576 Virtual Function Linux driver
- Copyright(c) 1999 - 2012 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, see <http://www.gnu.org/licenses/>.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#ifndef _E1000_DEFINES_H_
#define _E1000_DEFINES_H_
diff --git a/drivers/net/ethernet/intel/igbvf/ethtool.c b/drivers/net/ethernet/intel/igbvf/ethtool.c
index ca39e3cccaeb..3ae358b35227 100644
--- a/drivers/net/ethernet/intel/igbvf/ethtool.c
+++ b/drivers/net/ethernet/intel/igbvf/ethtool.c
@@ -1,29 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
- Intel(R) 82576 Virtual Function Linux driver
- Copyright(c) 2009 - 2012 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, see <http://www.gnu.org/licenses/>.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 2009 - 2018 Intel Corporation. */
/* ethtool support for igbvf */
diff --git a/drivers/net/ethernet/intel/igbvf/igbvf.h b/drivers/net/ethernet/intel/igbvf/igbvf.h
index f5bf248e22eb..eee26a3be90b 100644
--- a/drivers/net/ethernet/intel/igbvf/igbvf.h
+++ b/drivers/net/ethernet/intel/igbvf/igbvf.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
- Intel(R) 82576 Virtual Function Linux driver
- Copyright(c) 2009 - 2012 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, see <http://www.gnu.org/licenses/>.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 2009 - 2018 Intel Corporation. */
/* Linux PRO/1000 Ethernet Driver main header file */
diff --git a/drivers/net/ethernet/intel/igbvf/mbx.c b/drivers/net/ethernet/intel/igbvf/mbx.c
index 9195884096f8..163e5838f7c2 100644
--- a/drivers/net/ethernet/intel/igbvf/mbx.c
+++ b/drivers/net/ethernet/intel/igbvf/mbx.c
@@ -1,29 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
- Intel(R) 82576 Virtual Function Linux driver
- Copyright(c) 2009 - 2012 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, see <http://www.gnu.org/licenses/>.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 2009 - 2018 Intel Corporation. */
#include "mbx.h"
diff --git a/drivers/net/ethernet/intel/igbvf/mbx.h b/drivers/net/ethernet/intel/igbvf/mbx.h
index 479b062fe9ee..e5b31818d565 100644
--- a/drivers/net/ethernet/intel/igbvf/mbx.h
+++ b/drivers/net/ethernet/intel/igbvf/mbx.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
- Intel(R) 82576 Virtual Function Linux driver
- Copyright(c) 1999 - 2012 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, see <http://www.gnu.org/licenses/>.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#ifndef _E1000_MBX_H_
#define _E1000_MBX_H_
diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index e2b7502f1953..f818f060e5a7 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -1,29 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
- Intel(R) 82576 Virtual Function Linux driver
- Copyright(c) 2009 - 2012 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, see <http://www.gnu.org/licenses/>.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 2009 - 2018 Intel Corporation. */
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/drivers/net/ethernet/intel/igbvf/regs.h b/drivers/net/ethernet/intel/igbvf/regs.h
index 614e52409f11..625a309a3355 100644
--- a/drivers/net/ethernet/intel/igbvf/regs.h
+++ b/drivers/net/ethernet/intel/igbvf/regs.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
- Intel(R) 82576 Virtual Function Linux driver
- Copyright(c) 2009 - 2012 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, see <http://www.gnu.org/licenses/>.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 2009 - 2018 Intel Corporation. */
#ifndef _E1000_REGS_H_
#define _E1000_REGS_H_
diff --git a/drivers/net/ethernet/intel/igbvf/vf.c b/drivers/net/ethernet/intel/igbvf/vf.c
index bfe8d8297b2e..b8ba3f94c363 100644
--- a/drivers/net/ethernet/intel/igbvf/vf.c
+++ b/drivers/net/ethernet/intel/igbvf/vf.c
@@ -1,29 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
- Intel(R) 82576 Virtual Function Linux driver
- Copyright(c) 2009 - 2012 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, see <http://www.gnu.org/licenses/>.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 2009 - 2018 Intel Corporation. */
#include "vf.h"
diff --git a/drivers/net/ethernet/intel/igbvf/vf.h b/drivers/net/ethernet/intel/igbvf/vf.h
index 193b50026246..c71b0d7dbcee 100644
--- a/drivers/net/ethernet/intel/igbvf/vf.h
+++ b/drivers/net/ethernet/intel/igbvf/vf.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
- Intel(R) 82576 Virtual Function Linux driver
- Copyright(c) 2009 - 2012 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, see <http://www.gnu.org/licenses/>.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 2009 - 2018 Intel Corporation. */
#ifndef _E1000_VF_H_
#define _E1000_VF_H_
diff --git a/drivers/net/ethernet/intel/ixgb/Makefile b/drivers/net/ethernet/intel/ixgb/Makefile
index 1b42dd554dd2..2433e9300a33 100644
--- a/drivers/net/ethernet/intel/ixgb/Makefile
+++ b/drivers/net/ethernet/intel/ixgb/Makefile
@@ -1,33 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-################################################################################
-#
-# Intel PRO/10GbE Linux driver
# Copyright(c) 1999 - 2008 Intel Corporation.
#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# The full GNU General Public License is included in this distribution in
-# the file called "COPYING".
-#
-# Contact Information:
-# Linux NICS <linux.nics@intel.com>
-# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-#
-################################################################################
-
-#
# Makefile for the Intel(R) PRO/10GbE ethernet driver
#
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb.h b/drivers/net/ethernet/intel/ixgb/ixgb.h
index 92022841755f..e85271b68410 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb.h
+++ b/drivers/net/ethernet/intel/ixgb/ixgb.h
@@ -1,31 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
- Intel PRO/10GbE Linux driver
- Copyright(c) 1999 - 2008 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2008 Intel Corporation. */
#ifndef _IXGB_H_
#define _IXGB_H_
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_ee.c b/drivers/net/ethernet/intel/ixgb/ixgb_ee.c
index eca216b9b859..129286fc1634 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_ee.c
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_ee.c
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
- Intel PRO/10GbE Linux driver
- Copyright(c) 1999 - 2008 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2008 Intel Corporation. */
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_ee.h b/drivers/net/ethernet/intel/ixgb/ixgb_ee.h
index 475297a810fe..3ee0a09e5d0a 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_ee.h
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_ee.h
@@ -1,31 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
- Intel PRO/10GbE Linux driver
- Copyright(c) 1999 - 2008 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2008 Intel Corporation. */
#ifndef _IXGB_EE_H_
#define _IXGB_EE_H_
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c b/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c
index d10a0d242dda..43744bf0fc1c 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
- Intel PRO/10GbE Linux driver
- Copyright(c) 1999 - 2008 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2008 Intel Corporation. */
/* ethtool support for ixgb */
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_hw.c b/drivers/net/ethernet/intel/ixgb/ixgb_hw.c
index bf9a220f71fb..cbaa933ef30d 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_hw.c
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_hw.c
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
- Intel PRO/10GbE Linux driver
- Copyright(c) 1999 - 2008 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2008 Intel Corporation. */
/* ixgb_hw.c
* Shared functions for accessing and configuring the adapter
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_hw.h b/drivers/net/ethernet/intel/ixgb/ixgb_hw.h
index 19f36d87ef61..6064583095da 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_hw.h
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_hw.h
@@ -1,31 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
- Intel PRO/10GbE Linux driver
- Copyright(c) 1999 - 2008 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2008 Intel Corporation. */
#ifndef _IXGB_HW_H_
#define _IXGB_HW_H_
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_ids.h b/drivers/net/ethernet/intel/ixgb/ixgb_ids.h
index 24e849902d60..9695b8215f01 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_ids.h
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_ids.h
@@ -1,31 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
- Intel PRO/10GbE Linux driver
- Copyright(c) 1999 - 2008 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2008 Intel Corporation. */
#ifndef _IXGB_IDS_H_
#define _IXGB_IDS_H_
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
index 2353c383f0a7..62f2173bc20e 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
- Intel PRO/10GbE Linux driver
- Copyright(c) 1999 - 2008 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2008 Intel Corporation. */
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_osdep.h b/drivers/net/ethernet/intel/ixgb/ixgb_osdep.h
index b1710379192e..7bd54efa698d 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_osdep.h
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_osdep.h
@@ -1,31 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
- Intel PRO/10GbE Linux driver
- Copyright(c) 1999 - 2008 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2008 Intel Corporation. */
/* glue for the OS independent part of ixgb
* includes register access macros
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_param.c b/drivers/net/ethernet/intel/ixgb/ixgb_param.c
index 04a60640ddda..f0cadd532c53 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_param.c
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_param.c
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
- Intel PRO/10GbE Linux driver
- Copyright(c) 1999 - 2008 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2008 Intel Corporation. */
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/drivers/net/ethernet/intel/ixgbe/Makefile b/drivers/net/ethernet/intel/ixgbe/Makefile
index 4cd96c88cb5d..5414685189ce 100644
--- a/drivers/net/ethernet/intel/ixgbe/Makefile
+++ b/drivers/net/ethernet/intel/ixgbe/Makefile
@@ -1,32 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-################################################################################
-#
-# Intel 10 Gigabit PCI Express Linux driver
-# Copyright(c) 1999 - 2013 Intel Corporation.
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# The full GNU General Public License is included in this distribution in
-# the file called "COPYING".
-#
-# Contact Information:
-# Linux NICS <linux.nics@intel.com>
-# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-#
-################################################################################
-
+# Copyright(c) 1999 - 2018 Intel Corporation.
#
# Makefile for the Intel(R) 10GbE PCI Express ethernet driver
#
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 4f08c712e58e..fc534e91c6b2 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -1,31 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
- Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2016 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#ifndef _IXGBE_H_
#define _IXGBE_H_
@@ -241,8 +215,7 @@ struct ixgbe_tx_buffer {
unsigned long time_stamp;
union {
struct sk_buff *skb;
- /* XDP uses address ptr on irq_clean */
- void *data;
+ struct xdp_frame *xdpf;
};
unsigned int bytecount;
unsigned short gso_segs;
@@ -306,7 +279,6 @@ enum ixgbe_ring_state_t {
struct ixgbe_fwd_adapter {
unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
struct net_device *netdev;
- struct ixgbe_adapter *real_adapter;
unsigned int tx_base_queue;
unsigned int rx_base_queue;
int pool;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
index cb0fe5fedb33..eee277c1bedf 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
@@ -1,31 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
- Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2016 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#include <linux/pci.h>
#include <linux/delay.h>
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
index 66a74f4651e8..42f63b943ea0 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
@@ -1,31 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
- Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2016 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#include <linux/pci.h>
#include <linux/delay.h>
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 633be93f3dbb..8d038837f72b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -1,31 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
- Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2016 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#include <linux/pci.h>
#include <linux/delay.h>
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
index 2b311382167a..4b531e8ae38a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
@@ -1,31 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
- Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2016 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#ifndef _IXGBE_COMMON_H_
#define _IXGBE_COMMON_H_
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c
index aaea8282bfd2..d26cea5b43bd 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c
@@ -1,31 +1,5 @@
-/*******************************************************************************
-
- Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2016 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#include "ixgbe.h"
#include "ixgbe_type.h"
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h
index 73b6362d4327..60cd5863bf5e 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h
@@ -1,31 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
- Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2013 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#ifndef _DCB_CONFIG_H_
#define _DCB_CONFIG_H_
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c
index 085130626330..379ae747cdce 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c
@@ -1,31 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/*******************************************************************************
-
- Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2013 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#include "ixgbe.h"
#include "ixgbe_type.h"
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.h
index 7edce607f901..fdca41abb44c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.h
@@ -1,31 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
- Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2013 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#ifndef _DCB_82598_CONFIG_H_
#define _DCB_82598_CONFIG_H_
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c
index 1eed6811e914..7948849840a5 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
- Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2013 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#include "ixgbe.h"
#include "ixgbe_type.h"
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h
index fa030f0abc18..c6f084883cab 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h
@@ -1,31 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
- Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2013 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#ifndef _DCB_82599_CONFIG_H_
#define _DCB_82599_CONFIG_H_
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
index b33f3f87e4b1..c00332d2e02a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
- Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2014 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#include "ixgbe.h"
#include <linux/dcbnl.h>
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c
index ad54080488ee..55fe8114fe99 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c
@@ -1,30 +1,6 @@
-/*******************************************************************************
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
- Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2013 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
#include <linux/debugfs.h>
#include <linux/module.h>
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index c0e6ab42e0e1..bdd179c29ea4 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
- Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2016 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
/* ethtool support for ixgbe */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
index 7a09a40e4472..b5219e024f70 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
- Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2014 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#include "ixgbe.h"
#include <linux/if_ether.h>
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h
index cf1919901514..724f5382329f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h
@@ -1,31 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
- Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2013 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#ifndef _IXGBE_FCOE_H
#define _IXGBE_FCOE_H
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index 68af127987bc..41af2b81e960 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -1,29 +1,5 @@
-/*******************************************************************************
- *
- * Intel 10 Gigabit PCI Express Linux driver
- * Copyright(c) 2017 Oracle and/or its affiliates. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017 Oracle and/or its affiliates. All rights reserved. */
#include "ixgbe.h"
#include <net/xfrm.h>
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
index 4f099f516645..9ef7faadda69 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
@@ -1,30 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
- Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 2017 Oracle and/or its affiliates. All rights reserved.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program. If not, see <http://www.gnu.org/licenses/>.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 2017 Oracle and/or its affiliates. All rights reserved. */
#ifndef _IXGBE_IPSEC_H_
#define _IXGBE_IPSEC_H_
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
index ed4cbe94c355..893a9206e718 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
- Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2016 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#include "ixgbe.h"
#include "ixgbe_sriov.h"
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index afadba99f7b8..6652b201df5b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
- Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2016 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#include <linux/types.h>
#include <linux/module.h>
@@ -1216,7 +1191,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
/* free the skb */
if (ring_is_xdp(tx_ring))
- page_frag_free(tx_buffer->data);
+ xdp_return_frame(tx_buffer->xdpf);
else
napi_consume_skb(tx_buffer->skb, napi_budget);
@@ -1768,15 +1743,14 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring,
if (ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_SECP))
ixgbe_ipsec_rx(rx_ring, rx_desc, skb);
- skb->protocol = eth_type_trans(skb, dev);
-
/* record Rx queue, or update MACVLAN statistics */
if (netif_is_ixgbe(dev))
skb_record_rx_queue(skb, rx_ring->queue_index);
else
macvlan_count_rx(netdev_priv(dev), skb->len + ETH_HLEN, true,
- (skb->pkt_type == PACKET_BROADCAST) ||
- (skb->pkt_type == PACKET_MULTICAST));
+ false);
+
+ skb->protocol = eth_type_trans(skb, dev);
}
static void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector,
@@ -2262,7 +2236,7 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
#define IXGBE_XDP_TX 2
static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
- struct xdp_buff *xdp);
+ struct xdp_frame *xdpf);
static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
struct ixgbe_ring *rx_ring,
@@ -2270,6 +2244,7 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
{
int err, result = IXGBE_XDP_PASS;
struct bpf_prog *xdp_prog;
+ struct xdp_frame *xdpf;
u32 act;
rcu_read_lock();
@@ -2278,12 +2253,19 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
if (!xdp_prog)
goto xdp_out;
+ prefetchw(xdp->data_hard_start); /* xdp_frame write */
+
act = bpf_prog_run_xdp(xdp_prog, xdp);
switch (act) {
case XDP_PASS:
break;
case XDP_TX:
- result = ixgbe_xmit_xdp_ring(adapter, xdp);
+ xdpf = convert_to_xdp_frame(xdp);
+ if (unlikely(!xdpf)) {
+ result = IXGBE_XDP_CONSUMED;
+ break;
+ }
+ result = ixgbe_xmit_xdp_ring(adapter, xdpf);
break;
case XDP_REDIRECT:
err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog);
@@ -4211,7 +4193,8 @@ static void ixgbe_setup_psrtype(struct ixgbe_adapter *adapter)
static void ixgbe_configure_virtualization(struct ixgbe_adapter *adapter)
{
struct ixgbe_hw *hw = &adapter->hw;
- u32 reg_offset, vf_shift;
+ u16 pool = adapter->num_rx_pools;
+ u32 reg_offset, vf_shift, vmolr;
u32 gcr_ext, vmdctl;
int i;
@@ -4225,6 +4208,13 @@ static void ixgbe_configure_virtualization(struct ixgbe_adapter *adapter)
vmdctl |= IXGBE_VT_CTL_REPLEN;
IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vmdctl);
+ /* accept untagged packets until a vlan tag is
+ * specifically set for the VMDQ queue/pool
+ */
+ vmolr = IXGBE_VMOLR_AUPE;
+ while (pool--)
+ IXGBE_WRITE_REG(hw, IXGBE_VMOLR(VMDQ_P(pool)), vmolr);
+
vf_shift = VMDQ_P(0) % 32;
reg_offset = (VMDQ_P(0) >= 32) ? 1 : 0;
@@ -4892,36 +4882,6 @@ int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter,
return -ENOMEM;
}
-/**
- * ixgbe_write_uc_addr_list - write unicast addresses to RAR table
- * @netdev: network interface device structure
- * @vfn: pool to associate with unicast addresses
- *
- * Writes unicast address list to the RAR table.
- * Returns: -ENOMEM on failure/insufficient address space
- * 0 on no addresses written
- * X on writing X addresses to the RAR table
- **/
-static int ixgbe_write_uc_addr_list(struct net_device *netdev, int vfn)
-{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
- int count = 0;
-
- /* return ENOMEM indicating insufficient memory for addresses */
- if (netdev_uc_count(netdev) > ixgbe_available_rars(adapter, vfn))
- return -ENOMEM;
-
- if (!netdev_uc_empty(netdev)) {
- struct netdev_hw_addr *ha;
- netdev_for_each_uc_addr(ha, netdev) {
- ixgbe_del_mac_filter(adapter, ha->addr, vfn);
- ixgbe_add_mac_filter(adapter, ha->addr, vfn);
- count++;
- }
- }
- return count;
-}
-
static int ixgbe_uc_sync(struct net_device *netdev, const unsigned char *addr)
{
struct ixgbe_adapter *adapter = netdev_priv(netdev);
@@ -5301,29 +5261,6 @@ static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter)
spin_unlock(&adapter->fdir_perfect_lock);
}
-static void ixgbe_macvlan_set_rx_mode(struct net_device *dev, unsigned int pool,
- struct ixgbe_adapter *adapter)
-{
- struct ixgbe_hw *hw = &adapter->hw;
- u32 vmolr;
-
- /* No unicast promiscuous support for VMDQ devices. */
- vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(pool));
- vmolr |= (IXGBE_VMOLR_ROMPE | IXGBE_VMOLR_BAM | IXGBE_VMOLR_AUPE);
-
- /* clear the affected bit */
- vmolr &= ~IXGBE_VMOLR_MPE;
-
- if (dev->flags & IFF_ALLMULTI) {
- vmolr |= IXGBE_VMOLR_MPE;
- } else {
- vmolr |= IXGBE_VMOLR_ROMPE;
- hw->mac.ops.update_mc_addr_list(hw, dev);
- }
- ixgbe_write_uc_addr_list(adapter->netdev, pool);
- IXGBE_WRITE_REG(hw, IXGBE_VMOLR(pool), vmolr);
-}
-
/**
* ixgbe_clean_rx_ring - Free Rx Buffers per Queue
* @rx_ring: ring to free buffers from
@@ -5376,21 +5313,17 @@ static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring)
rx_ring->next_to_use = 0;
}
-static int ixgbe_fwd_ring_up(struct net_device *vdev,
+static int ixgbe_fwd_ring_up(struct ixgbe_adapter *adapter,
struct ixgbe_fwd_adapter *accel)
{
- struct ixgbe_adapter *adapter = accel->real_adapter;
+ struct net_device *vdev = accel->netdev;
int i, baseq, err;
- if (!test_bit(accel->pool, adapter->fwd_bitmask))
- return 0;
-
baseq = accel->pool * adapter->num_rx_queues_per_pool;
netdev_dbg(vdev, "pool %i:%i queues %i:%i\n",
accel->pool, adapter->num_rx_pools,
baseq, baseq + adapter->num_rx_queues_per_pool);
- accel->netdev = vdev;
accel->rx_base_queue = baseq;
accel->tx_base_queue = baseq;
@@ -5407,26 +5340,36 @@ static int ixgbe_fwd_ring_up(struct net_device *vdev,
*/
err = ixgbe_add_mac_filter(adapter, vdev->dev_addr,
VMDQ_P(accel->pool));
- if (err >= 0) {
- ixgbe_macvlan_set_rx_mode(vdev, accel->pool, adapter);
+ if (err >= 0)
return 0;
- }
+
+ /* if we cannot add the MAC rule then disable the offload */
+ macvlan_release_l2fw_offload(vdev);
for (i = 0; i < adapter->num_rx_queues_per_pool; i++)
adapter->rx_ring[baseq + i]->netdev = NULL;
+ netdev_err(vdev, "L2FW offload disabled due to L2 filter error\n");
+
+ clear_bit(accel->pool, adapter->fwd_bitmask);
+ kfree(accel);
+
return err;
}
-static int ixgbe_upper_dev_walk(struct net_device *upper, void *data)
+static int ixgbe_macvlan_up(struct net_device *vdev, void *data)
{
- if (netif_is_macvlan(upper)) {
- struct macvlan_dev *dfwd = netdev_priv(upper);
- struct ixgbe_fwd_adapter *vadapter = dfwd->fwd_priv;
+ struct ixgbe_adapter *adapter = data;
+ struct ixgbe_fwd_adapter *accel;
- if (dfwd->fwd_priv)
- ixgbe_fwd_ring_up(upper, vadapter);
- }
+ if (!netif_is_macvlan(vdev))
+ return 0;
+
+ accel = macvlan_accel_priv(vdev);
+ if (!accel)
+ return 0;
+
+ ixgbe_fwd_ring_up(adapter, accel);
return 0;
}
@@ -5434,7 +5377,7 @@ static int ixgbe_upper_dev_walk(struct net_device *upper, void *data)
static void ixgbe_configure_dfwd(struct ixgbe_adapter *adapter)
{
netdev_walk_all_upper_dev_rcu(adapter->netdev,
- ixgbe_upper_dev_walk, NULL);
+ ixgbe_macvlan_up, adapter);
}
static void ixgbe_configure(struct ixgbe_adapter *adapter)
@@ -5797,7 +5740,7 @@ static void ixgbe_clean_tx_ring(struct ixgbe_ring *tx_ring)
/* Free all the Tx ring sk_buffs */
if (ring_is_xdp(tx_ring))
- page_frag_free(tx_buffer->data);
+ xdp_return_frame(tx_buffer->xdpf);
else
dev_kfree_skb_any(tx_buffer->skb);
@@ -6370,7 +6313,7 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,
struct device *dev = rx_ring->dev;
int orig_node = dev_to_node(dev);
int ring_node = -1;
- int size;
+ int size, err;
size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count;
@@ -6407,6 +6350,13 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,
rx_ring->queue_index) < 0)
goto err;
+ err = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq,
+ MEM_TYPE_PAGE_SHARED, NULL);
+ if (err) {
+ xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+ goto err;
+ }
+
rx_ring->xdp_prog = adapter->xdp_prog;
return 0;
@@ -8336,7 +8286,7 @@ static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
}
static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
- struct xdp_buff *xdp)
+ struct xdp_frame *xdpf)
{
struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()];
struct ixgbe_tx_buffer *tx_buffer;
@@ -8345,12 +8295,12 @@ static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
dma_addr_t dma;
u16 i;
- len = xdp->data_end - xdp->data;
+ len = xdpf->len;
if (unlikely(!ixgbe_desc_unused(ring)))
return IXGBE_XDP_CONSUMED;
- dma = dma_map_single(ring->dev, xdp->data, len, DMA_TO_DEVICE);
+ dma = dma_map_single(ring->dev, xdpf->data, len, DMA_TO_DEVICE);
if (dma_mapping_error(ring->dev, dma))
return IXGBE_XDP_CONSUMED;
@@ -8365,7 +8315,8 @@ static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
dma_unmap_len_set(tx_buffer, len, len);
dma_unmap_addr_set(tx_buffer, dma, dma);
- tx_buffer->data = xdp->data;
+ tx_buffer->xdpf = xdpf;
+
tx_desc->read.buffer_addr = cpu_to_le64(dma);
/* put descriptor type bits */
@@ -8827,6 +8778,49 @@ static void ixgbe_set_prio_tc_map(struct ixgbe_adapter *adapter)
}
#endif /* CONFIG_IXGBE_DCB */
+static int ixgbe_reassign_macvlan_pool(struct net_device *vdev, void *data)
+{
+ struct ixgbe_adapter *adapter = data;
+ struct ixgbe_fwd_adapter *accel;
+ int pool;
+
+ /* we only care about macvlans... */
+ if (!netif_is_macvlan(vdev))
+ return 0;
+
+ /* that have hardware offload enabled... */
+ accel = macvlan_accel_priv(vdev);
+ if (!accel)
+ return 0;
+
+ /* If we can relocate to a different bit do so */
+ pool = find_first_zero_bit(adapter->fwd_bitmask, adapter->num_rx_pools);
+ if (pool < adapter->num_rx_pools) {
+ set_bit(pool, adapter->fwd_bitmask);
+ accel->pool = pool;
+ return 0;
+ }
+
+ /* if we cannot find a free pool then disable the offload */
+ netdev_err(vdev, "L2FW offload disabled due to lack of queue resources\n");
+ macvlan_release_l2fw_offload(vdev);
+ kfree(accel);
+
+ return 0;
+}
+
+static void ixgbe_defrag_macvlan_pools(struct net_device *dev)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(dev);
+
+ /* flush any stale bits out of the fwd bitmask */
+ bitmap_clear(adapter->fwd_bitmask, 1, 63);
+
+ /* walk through upper devices reassigning pools */
+ netdev_walk_all_upper_dev_rcu(dev, ixgbe_reassign_macvlan_pool,
+ adapter);
+}
+
/**
* ixgbe_setup_tc - configure net_device for multiple traffic classes
*
@@ -8894,6 +8888,8 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc)
#endif /* CONFIG_IXGBE_DCB */
ixgbe_init_interrupt_scheme(adapter);
+ ixgbe_defrag_macvlan_pools(dev);
+
if (netif_running(dev))
return ixgbe_open(dev);
@@ -8998,13 +8994,12 @@ struct upper_walk_data {
static int get_macvlan_queue(struct net_device *upper, void *_data)
{
if (netif_is_macvlan(upper)) {
- struct macvlan_dev *dfwd = netdev_priv(upper);
- struct ixgbe_fwd_adapter *vadapter = dfwd->fwd_priv;
+ struct ixgbe_fwd_adapter *vadapter = macvlan_accel_priv(upper);
struct upper_walk_data *data = _data;
struct ixgbe_adapter *adapter = data->adapter;
int ifindex = data->ifindex;
- if (vadapter && vadapter->netdev->ifindex == ifindex) {
+ if (vadapter && upper->ifindex == ifindex) {
data->queue = adapter->rx_ring[vadapter->rx_base_queue]->reg_idx;
data->action = data->queue;
return 1;
@@ -9444,6 +9439,22 @@ static netdev_features_t ixgbe_fix_features(struct net_device *netdev,
return features;
}
+static void ixgbe_reset_l2fw_offload(struct ixgbe_adapter *adapter)
+{
+ int rss = min_t(int, ixgbe_max_rss_indices(adapter),
+ num_online_cpus());
+
+ /* go back to full RSS if we're not running SR-IOV */
+ if (!adapter->ring_feature[RING_F_VMDQ].offset)
+ adapter->flags &= ~(IXGBE_FLAG_VMDQ_ENABLED |
+ IXGBE_FLAG_SRIOV_ENABLED);
+
+ adapter->ring_feature[RING_F_RSS].limit = rss;
+ adapter->ring_feature[RING_F_VMDQ].limit = 1;
+
+ ixgbe_setup_tc(adapter->netdev, adapter->hw_tcs);
+}
+
static int ixgbe_set_features(struct net_device *netdev,
netdev_features_t features)
{
@@ -9524,7 +9535,9 @@ static int ixgbe_set_features(struct net_device *netdev,
}
}
- if (need_reset)
+ if ((changed & NETIF_F_HW_L2FW_DOFFLOAD) && adapter->num_rx_pools > 1)
+ ixgbe_reset_l2fw_offload(adapter);
+ else if (need_reset)
ixgbe_do_reset(netdev);
else if (changed & (NETIF_F_HW_VLAN_CTAG_RX |
NETIF_F_HW_VLAN_CTAG_FILTER))
@@ -9787,71 +9800,98 @@ static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev)
{
- struct ixgbe_fwd_adapter *fwd_adapter = NULL;
struct ixgbe_adapter *adapter = netdev_priv(pdev);
- int used_pools = adapter->num_vfs + adapter->num_rx_pools;
+ struct ixgbe_fwd_adapter *accel;
int tcs = adapter->hw_tcs ? : 1;
- unsigned int limit;
int pool, err;
- /* Hardware has a limited number of available pools. Each VF, and the
- * PF require a pool. Check to ensure we don't attempt to use more
- * then the available number of pools.
+ /* The hardware supported by ixgbe only filters on the destination MAC
+ * address. In order to avoid issues we only support offloading modes
+ * where the hardware can actually provide the functionality.
*/
- if (used_pools >= IXGBE_MAX_VF_FUNCTIONS)
- return ERR_PTR(-EINVAL);
+ if (!macvlan_supports_dest_filter(vdev))
+ return ERR_PTR(-EMEDIUMTYPE);
- if (((adapter->flags & IXGBE_FLAG_DCB_ENABLED) &&
- adapter->num_rx_pools >= (MAX_TX_QUEUES / tcs)) ||
- (adapter->num_rx_pools > IXGBE_MAX_MACVLANS))
- return ERR_PTR(-EBUSY);
+ pool = find_first_zero_bit(adapter->fwd_bitmask, adapter->num_rx_pools);
+ if (pool == adapter->num_rx_pools) {
+ u16 used_pools = adapter->num_vfs + adapter->num_rx_pools;
+ u16 reserved_pools;
+
+ if (((adapter->flags & IXGBE_FLAG_DCB_ENABLED) &&
+ adapter->num_rx_pools >= (MAX_TX_QUEUES / tcs)) ||
+ adapter->num_rx_pools > IXGBE_MAX_MACVLANS)
+ return ERR_PTR(-EBUSY);
+
+ /* Hardware has a limited number of available pools. Each VF,
+ * and the PF require a pool. Check to ensure we don't
+ * attempt to use more then the available number of pools.
+ */
+ if (used_pools >= IXGBE_MAX_VF_FUNCTIONS)
+ return ERR_PTR(-EBUSY);
- fwd_adapter = kzalloc(sizeof(*fwd_adapter), GFP_KERNEL);
- if (!fwd_adapter)
- return ERR_PTR(-ENOMEM);
+ /* Enable VMDq flag so device will be set in VM mode */
+ adapter->flags |= IXGBE_FLAG_VMDQ_ENABLED |
+ IXGBE_FLAG_SRIOV_ENABLED;
- pool = find_first_zero_bit(adapter->fwd_bitmask, adapter->num_rx_pools);
- set_bit(pool, adapter->fwd_bitmask);
- limit = find_last_bit(adapter->fwd_bitmask, adapter->num_rx_pools + 1);
+ /* Try to reserve as many queues per pool as possible,
+ * we start with the configurations that support 4 queues
+ * per pools, followed by 2, and then by just 1 per pool.
+ */
+ if (used_pools < 32 && adapter->num_rx_pools < 16)
+ reserved_pools = min_t(u16,
+ 32 - used_pools,
+ 16 - adapter->num_rx_pools);
+ else if (adapter->num_rx_pools < 32)
+ reserved_pools = min_t(u16,
+ 64 - used_pools,
+ 32 - adapter->num_rx_pools);
+ else
+ reserved_pools = 64 - used_pools;
- /* Enable VMDq flag so device will be set in VM mode */
- adapter->flags |= IXGBE_FLAG_VMDQ_ENABLED | IXGBE_FLAG_SRIOV_ENABLED;
- adapter->ring_feature[RING_F_VMDQ].limit = limit + 1;
- fwd_adapter->pool = pool;
- fwd_adapter->real_adapter = adapter;
+ if (!reserved_pools)
+ return ERR_PTR(-EBUSY);
- /* Force reinit of ring allocation with VMDQ enabled */
- err = ixgbe_setup_tc(pdev, adapter->hw_tcs);
+ adapter->ring_feature[RING_F_VMDQ].limit += reserved_pools;
- if (!err && netif_running(pdev))
- err = ixgbe_fwd_ring_up(vdev, fwd_adapter);
+ /* Force reinit of ring allocation with VMDQ enabled */
+ err = ixgbe_setup_tc(pdev, adapter->hw_tcs);
+ if (err)
+ return ERR_PTR(err);
- if (!err)
- return fwd_adapter;
+ if (pool >= adapter->num_rx_pools)
+ return ERR_PTR(-ENOMEM);
+ }
- /* unwind counter and free adapter struct */
- netdev_info(pdev,
- "%s: dfwd hardware acceleration failed\n", vdev->name);
- clear_bit(pool, adapter->fwd_bitmask);
- kfree(fwd_adapter);
- return ERR_PTR(err);
+ accel = kzalloc(sizeof(*accel), GFP_KERNEL);
+ if (!accel)
+ return ERR_PTR(-ENOMEM);
+
+ set_bit(pool, adapter->fwd_bitmask);
+ accel->pool = pool;
+ accel->netdev = vdev;
+
+ if (!netif_running(pdev))
+ return accel;
+
+ err = ixgbe_fwd_ring_up(adapter, accel);
+ if (err)
+ return ERR_PTR(err);
+
+ return accel;
}
static void ixgbe_fwd_del(struct net_device *pdev, void *priv)
{
struct ixgbe_fwd_adapter *accel = priv;
- struct ixgbe_adapter *adapter = accel->real_adapter;
+ struct ixgbe_adapter *adapter = netdev_priv(pdev);
unsigned int rxbase = accel->rx_base_queue;
- unsigned int limit, i;
+ unsigned int i;
/* delete unicast filter associated with offloaded interface */
ixgbe_del_mac_filter(adapter, accel->netdev->dev_addr,
VMDQ_P(accel->pool));
- /* disable ability to receive packets for this pool */
- IXGBE_WRITE_REG(&adapter->hw, IXGBE_VMOLR(accel->pool), 0);
-
/* Allow remaining Rx packets to get flushed out of the
* Rx FIFO before we drop the netdev for the ring.
*/
@@ -9870,25 +9910,6 @@ static void ixgbe_fwd_del(struct net_device *pdev, void *priv)
}
clear_bit(accel->pool, adapter->fwd_bitmask);
- limit = find_last_bit(adapter->fwd_bitmask, adapter->num_rx_pools);
- adapter->ring_feature[RING_F_VMDQ].limit = limit + 1;
-
- /* go back to full RSS if we're done with our VMQs */
- if (adapter->ring_feature[RING_F_VMDQ].limit == 1) {
- int rss = min_t(int, ixgbe_max_rss_indices(adapter),
- num_online_cpus());
-
- adapter->flags &= ~IXGBE_FLAG_VMDQ_ENABLED;
- adapter->flags &= ~IXGBE_FLAG_SRIOV_ENABLED;
- adapter->ring_feature[RING_F_RSS].limit = rss;
- }
-
- ixgbe_setup_tc(pdev, adapter->hw_tcs);
- netdev_dbg(pdev, "pool %i:%i queues %i:%i\n",
- accel->pool, adapter->num_rx_pools,
- accel->rx_base_queue,
- accel->rx_base_queue +
- adapter->num_rx_queues_per_pool);
kfree(accel);
}
@@ -9996,7 +10017,7 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
}
}
-static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
+static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
{
struct ixgbe_adapter *adapter = netdev_priv(dev);
struct ixgbe_ring *ring;
@@ -10012,7 +10033,7 @@ static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
if (unlikely(!ring))
return -ENXIO;
- err = ixgbe_xmit_xdp_ring(adapter, xdp);
+ err = ixgbe_xmit_xdp_ring(adapter, xdpf);
if (err != IXGBE_XDP_TX)
return -ENOSPC;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c
index a0cb84381cd0..5679293e53f7 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
- Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2016 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#include <linux/pci.h>
#include <linux/delay.h>
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
index c4628b663590..e085b6520dac 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
@@ -1,31 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
- Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2016 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#ifndef _IXGBE_MBX_H_
#define _IXGBE_MBX_H_
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h
index 72446644f9fa..fcae520647ce 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel 10 Gigabit PCI Express Linux drive
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#ifndef _IXGBE_MODEL_H_
#define _IXGBE_MODEL_H_
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
index 91bde90f9265..919a7af84b42 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
- Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2014 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#include <linux/pci.h>
#include <linux/delay.h>
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
index d6a7e77348c5..64e44e01c973 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
@@ -1,31 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
- Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2016 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#ifndef _IXGBE_PHY_H_
#define _IXGBE_PHY_H_
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
index f6cc9166082a..b3e0d8bb5cbd 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
@@ -1,30 +1,6 @@
-/*******************************************************************************
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
- Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2016 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
#include "ixgbe.h"
#include <linux/ptp_classify.h>
#include <linux/clocksource.h>
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index 008aa073a679..2649c06d877b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
- Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2015 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#include <linux/types.h>
#include <linux/module.h>
@@ -266,7 +241,7 @@ int ixgbe_disable_sriov(struct ixgbe_adapter *adapter)
#endif
/* Disable VMDq flag so device will be set in VM mode */
- if (adapter->ring_feature[RING_F_VMDQ].limit == 1) {
+ if (bitmap_weight(adapter->fwd_bitmask, adapter->num_rx_pools) == 1) {
adapter->flags &= ~IXGBE_FLAG_VMDQ_ENABLED;
adapter->flags &= ~IXGBE_FLAG_SRIOV_ENABLED;
rss = min_t(int, ixgbe_max_rss_indices(adapter),
@@ -312,7 +287,8 @@ static int ixgbe_pci_sriov_enable(struct pci_dev *dev, int num_vfs)
* other values out of range.
*/
num_tc = adapter->hw_tcs;
- num_rx_pools = adapter->num_rx_pools;
+ num_rx_pools = bitmap_weight(adapter->fwd_bitmask,
+ adapter->num_rx_pools);
limit = (num_tc > 4) ? IXGBE_MAX_VFS_8TC :
(num_tc > 1) ? IXGBE_MAX_VFS_4TC : IXGBE_MAX_VFS_1TC;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
index e30d1f07e891..3ec21923c89c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
@@ -1,31 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
- Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2013 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#ifndef _IXGBE_SRIOV_H_
#define _IXGBE_SRIOV_H_
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sysfs.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sysfs.c
index 24766e125592..204844288c16 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sysfs.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sysfs.c
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
- Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2013 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#include "ixgbe.h"
#include "ixgbe_common.h"
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index 2daa81e6e9b2..e8ed37749ab1 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -1,31 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
- Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2016 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#ifndef _IXGBE_TYPE_H_
#define _IXGBE_TYPE_H_
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
index b8c5fd2a2115..de563cfd294d 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
@@ -1,30 +1,5 @@
-/*******************************************************************************
-
- Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2016 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- Linux NICS <linux.nics@intel.com>
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#include <linux/pci.h>
#include <linux/delay.h>
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h
index 182d640e9f7a..e246c0d2a427 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h
@@ -1,27 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
- *
- * Intel 10 Gigabit PCI Express Linux driver
- * Copyright(c) 1999 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- *****************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#include "ixgbe_type.h"
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index 3123267dfba9..959a37599a6b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -1,26 +1,6 @@
-/*******************************************************************************
- *
- * Intel 10 Gigabit PCI Express Linux driver
- * Copyright(c) 1999 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * Linux NICS <linux.nics@intel.com>
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
#include "ixgbe_x540.h"
#include "ixgbe_type.h"
#include "ixgbe_common.h"
diff --git a/drivers/net/ethernet/intel/ixgbevf/Makefile b/drivers/net/ethernet/intel/ixgbevf/Makefile
index bb47814cfa90..aba1e6a37a6a 100644
--- a/drivers/net/ethernet/intel/ixgbevf/Makefile
+++ b/drivers/net/ethernet/intel/ixgbevf/Makefile
@@ -1,31 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-################################################################################
-#
-# Intel 82599 Virtual Function driver
-# Copyright(c) 1999 - 2012 Intel Corporation.
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# The full GNU General Public License is included in this distribution in
-# the file called "COPYING".
-#
-# Contact Information:
-# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-#
-################################################################################
-
+# Copyright(c) 1999 - 2018 Intel Corporation.
#
# Makefile for the Intel(R) 82599 VF ethernet driver
#
diff --git a/drivers/net/ethernet/intel/ixgbevf/defines.h b/drivers/net/ethernet/intel/ixgbevf/defines.h
index 71c828842b11..700d8eb2f6f8 100644
--- a/drivers/net/ethernet/intel/ixgbevf/defines.h
+++ b/drivers/net/ethernet/intel/ixgbevf/defines.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
- Intel 82599 Virtual Function driver
- Copyright(c) 1999 - 2015 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, see <http://www.gnu.org/licenses/>.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#ifndef _IXGBEVF_DEFINES_H_
#define _IXGBEVF_DEFINES_H_
diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
index 8e7d6c6f5c92..e7813d76527c 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
@@ -1,28 +1,5 @@
-/*******************************************************************************
-
- Intel 82599 Virtual Function driver
- Copyright(c) 1999 - 2018 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, see <http://www.gnu.org/licenses/>.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
/* ethtool support for ixgbevf */
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index 447ce1d5e0e3..70c75681495f 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
- Intel 82599 Virtual Function driver
- Copyright(c) 1999 - 2018 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, see <http://www.gnu.org/licenses/>.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#ifndef _IXGBEVF_H_
#define _IXGBEVF_H_
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index e3d04f226d57..e91c3d1a43ce 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -1,28 +1,5 @@
-/*******************************************************************************
-
- Intel 82599 Virtual Function driver
- Copyright(c) 1999 - 2018 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, see <http://www.gnu.org/licenses/>.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
/******************************************************************************
Copyright (c)2006 - 2007 Myricom, Inc. for some LRO specific code
diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.c b/drivers/net/ethernet/intel/ixgbevf/mbx.c
index 2819abc454c7..6bc1953263b9 100644
--- a/drivers/net/ethernet/intel/ixgbevf/mbx.c
+++ b/drivers/net/ethernet/intel/ixgbevf/mbx.c
@@ -1,28 +1,5 @@
-/*******************************************************************************
-
- Intel 82599 Virtual Function driver
- Copyright(c) 1999 - 2015 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, see <http://www.gnu.org/licenses/>.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#include "mbx.h"
#include "ixgbevf.h"
diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.h b/drivers/net/ethernet/intel/ixgbevf/mbx.h
index 5ec947fe3d09..bfd9ae150808 100644
--- a/drivers/net/ethernet/intel/ixgbevf/mbx.h
+++ b/drivers/net/ethernet/intel/ixgbevf/mbx.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
- Intel 82599 Virtual Function driver
- Copyright(c) 1999 - 2015 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, see <http://www.gnu.org/licenses/>.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#ifndef _IXGBE_MBX_H_
#define _IXGBE_MBX_H_
diff --git a/drivers/net/ethernet/intel/ixgbevf/regs.h b/drivers/net/ethernet/intel/ixgbevf/regs.h
index 278f73980501..68d16ae5b65a 100644
--- a/drivers/net/ethernet/intel/ixgbevf/regs.h
+++ b/drivers/net/ethernet/intel/ixgbevf/regs.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
- Intel 82599 Virtual Function driver
- Copyright(c) 1999 - 2015 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, see <http://www.gnu.org/licenses/>.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#ifndef _IXGBEVF_REGS_H_
#define _IXGBEVF_REGS_H_
diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c
index 38d3a327c1bc..bf0577e819e1 100644
--- a/drivers/net/ethernet/intel/ixgbevf/vf.c
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.c
@@ -1,28 +1,5 @@
-/*******************************************************************************
-
- Intel 82599 Virtual Function driver
- Copyright(c) 1999 - 2015 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, see <http://www.gnu.org/licenses/>.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#include "vf.h"
#include "ixgbevf.h"
diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.h b/drivers/net/ethernet/intel/ixgbevf/vf.h
index 194fbdaa4519..d1e9e306653b 100644
--- a/drivers/net/ethernet/intel/ixgbevf/vf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.h
@@ -1,29 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*******************************************************************************
-
- Intel 82599 Virtual Function driver
- Copyright(c) 1999 - 2015 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, see <http://www.gnu.org/licenses/>.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
#ifndef __IXGBE_VF_H__
#define __IXGBE_VF_H__
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index e0b72bf428f7..d8ebf0a05e0c 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -2503,7 +2503,6 @@ static int mtk_probe(struct platform_device *pdev)
{
struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
struct device_node *mac_np;
- const struct of_device_id *match;
struct mtk_eth *eth;
int err;
int i;
@@ -2512,8 +2511,7 @@ static int mtk_probe(struct platform_device *pdev)
if (!eth)
return -ENOMEM;
- match = of_match_device(of_mtk_match, &pdev->dev);
- eth->soc = (struct mtk_soc_data *)match->data;
+ eth->soc = of_device_get_match_data(&pdev->dev);
eth->dev = &pdev->dev;
eth->base = devm_ioremap_resource(&pdev->dev, res);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 5c613c6663da..efc55feddc5c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -775,8 +775,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
act = bpf_prog_run_xdp(xdp_prog, &xdp);
+ length = xdp.data_end - xdp.data;
if (xdp.data != orig_data) {
- length = xdp.data_end - xdp.data;
frags[0].page_offset = xdp.data -
xdp.data_hard_start;
va = xdp.data;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 6b6853773848..0227786308af 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -694,7 +694,7 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
u16 rings_p_up = priv->num_tx_rings_p_up;
if (netdev_get_num_tc(dev))
- return skb_tx_hash(dev, skb);
+ return fallback(dev, skb);
return fallback(dev, skb) % rings_p_up;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index c032319f1cb9..ee6684779d11 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -30,6 +30,7 @@ config MLX5_CORE_EN
bool "Mellanox Technologies ConnectX-4 Ethernet support"
depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE
depends on IPV6=y || IPV6=n || MLX5_CORE=m
+ select PAGE_POOL
default n
---help---
Ethernet support in Mellanox Technologies ConnectX-4 NIC.
@@ -85,3 +86,14 @@ config MLX5_EN_IPSEC
Build support for IPsec cryptography-offload accelaration in the NIC.
Note: Support for hardware with this capability needs to be selected
for this option to become available.
+
+config MLX5_EN_TLS
+ bool "TLS cryptography-offload accelaration"
+ depends on MLX5_CORE_EN
+ depends on TLS_DEVICE
+ depends on MLX5_ACCEL
+ default n
+ ---help---
+ Build support for TLS cryptography-offload accelaration in the NIC.
+ Note: Support for hardware with this capability needs to be selected
+ for this option to become available.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index c805769d92a9..a7135f5d5cf6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -8,10 +8,10 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o lib/clock.o \
diag/fs_tracepoint.o
-mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o
+mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o accel/tls.o
mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \
- fpga/ipsec.o
+ fpga/ipsec.o fpga/tls.o
mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
en_tx.o en_rx.o en_dim.o en_txrx.o en_stats.o vxlan.o \
@@ -28,4 +28,6 @@ mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o ipoib/ipoib
mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \
en_accel/ipsec_stats.o
+mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/tls_stats.o
+
CFLAGS_tracepoint.o := -I$(src)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
new file mode 100644
index 000000000000..77ac19f38cbe
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/mlx5/device.h>
+
+#include "accel/tls.h"
+#include "mlx5_core.h"
+#include "fpga/tls.h"
+
+int mlx5_accel_tls_add_tx_flow(struct mlx5_core_dev *mdev, void *flow,
+ struct tls_crypto_info *crypto_info,
+ u32 start_offload_tcp_sn, u32 *p_swid)
+{
+ return mlx5_fpga_tls_add_tx_flow(mdev, flow, crypto_info,
+ start_offload_tcp_sn, p_swid);
+}
+
+void mlx5_accel_tls_del_tx_flow(struct mlx5_core_dev *mdev, u32 swid)
+{
+ mlx5_fpga_tls_del_tx_flow(mdev, swid, GFP_KERNEL);
+}
+
+bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev)
+{
+ return mlx5_fpga_is_tls_device(mdev);
+}
+
+u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev)
+{
+ return mlx5_fpga_tls_device_caps(mdev);
+}
+
+int mlx5_accel_tls_init(struct mlx5_core_dev *mdev)
+{
+ return mlx5_fpga_tls_init(mdev);
+}
+
+void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev)
+{
+ mlx5_fpga_tls_cleanup(mdev);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
new file mode 100644
index 000000000000..6f9c9f446ecc
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5_ACCEL_TLS_H__
+#define __MLX5_ACCEL_TLS_H__
+
+#include <linux/mlx5/driver.h>
+#include <linux/tls.h>
+
+#ifdef CONFIG_MLX5_ACCEL
+
+enum {
+ MLX5_ACCEL_TLS_TX = BIT(0),
+ MLX5_ACCEL_TLS_RX = BIT(1),
+ MLX5_ACCEL_TLS_V12 = BIT(2),
+ MLX5_ACCEL_TLS_V13 = BIT(3),
+ MLX5_ACCEL_TLS_LRO = BIT(4),
+ MLX5_ACCEL_TLS_IPV6 = BIT(5),
+ MLX5_ACCEL_TLS_AES_GCM128 = BIT(30),
+ MLX5_ACCEL_TLS_AES_GCM256 = BIT(31),
+};
+
+struct mlx5_ifc_tls_flow_bits {
+ u8 src_port[0x10];
+ u8 dst_port[0x10];
+ union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits src_ipv4_src_ipv6;
+ union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6;
+ u8 ipv6[0x1];
+ u8 direction_sx[0x1];
+ u8 reserved_at_2[0x1e];
+};
+
+int mlx5_accel_tls_add_tx_flow(struct mlx5_core_dev *mdev, void *flow,
+ struct tls_crypto_info *crypto_info,
+ u32 start_offload_tcp_sn, u32 *p_swid);
+void mlx5_accel_tls_del_tx_flow(struct mlx5_core_dev *mdev, u32 swid);
+bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev);
+u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev);
+int mlx5_accel_tls_init(struct mlx5_core_dev *mdev);
+void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev);
+
+#else
+
+static inline int
+mlx5_accel_tls_add_tx_flow(struct mlx5_core_dev *mdev, void *flow,
+ struct tls_crypto_info *crypto_info,
+ u32 start_offload_tcp_sn, u32 *p_swid) { return 0; }
+static inline void mlx5_accel_tls_del_tx_flow(struct mlx5_core_dev *mdev, u32 swid) { }
+static inline bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev) { return false; }
+static inline u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev) { return 0; }
+static inline int mlx5_accel_tls_init(struct mlx5_core_dev *mdev) { return 0; }
+static inline void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev) { }
+
+#endif
+
+#endif /* __MLX5_ACCEL_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 30cad07be2b5..9cc07da09b70 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -53,6 +53,11 @@
#include "mlx5_core.h"
#include "en_stats.h"
+struct page_pool;
+
+#define MLX5E_METADATA_ETHER_TYPE (0x8CE4)
+#define MLX5E_METADATA_ETHER_LEN 8
+
#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v)
#define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
@@ -239,6 +244,7 @@ struct mlx5e_params {
bool vlan_strip_disable;
bool scatter_fcs_en;
bool rx_dim_enabled;
+ bool tx_dim_enabled;
u32 lro_timeout;
u32 pflags;
struct bpf_prog *xdp_prog;
@@ -328,6 +334,8 @@ enum {
MLX5E_SQ_STATE_ENABLED,
MLX5E_SQ_STATE_RECOVERING,
MLX5E_SQ_STATE_IPSEC,
+ MLX5E_SQ_STATE_AM,
+ MLX5E_SQ_STATE_TLS,
};
struct mlx5e_sq_wqe_info {
@@ -340,6 +348,7 @@ struct mlx5e_txqsq {
/* dirtied @completion */
u16 cc;
u32 dma_fifo_cc;
+ struct net_dim dim; /* Adaptive Moderation */
/* dirtied @xmit */
u16 pc ____cacheline_aligned_in_smp;
@@ -392,6 +401,7 @@ struct mlx5e_xdpsq {
struct {
struct mlx5e_dma_info *di;
bool doorbell;
+ bool redirect_flush;
} db;
/* read only */
@@ -533,6 +543,7 @@ struct mlx5e_rq {
unsigned int hw_mtu;
struct mlx5e_xdpsq xdpsq;
DECLARE_BITMAP(flags, 8);
+ struct page_pool *page_pool;
/* control */
struct mlx5_wq_ctrl wq_ctrl;
@@ -790,6 +801,9 @@ struct mlx5e_priv {
#ifdef CONFIG_MLX5_EN_IPSEC
struct mlx5e_ipsec *ipsec;
#endif
+#ifdef CONFIG_MLX5_EN_TLS
+ struct mlx5e_tls *tls;
+#endif
};
struct mlx5e_profile {
@@ -820,6 +834,8 @@ void mlx5e_build_ptys2ethtool_map(void);
u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
void *accel_priv, select_queue_fallback_t fallback);
netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
+netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5e_tx_wqe *wqe, u16 pi);
void mlx5e_completion_event(struct mlx5_core_cq *mcq);
void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event);
@@ -935,6 +951,18 @@ static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ft_field_support.inner_ip_version));
}
+static inline void mlx5e_sq_fetch_wqe(struct mlx5e_txqsq *sq,
+ struct mlx5e_tx_wqe **wqe,
+ u16 *pi)
+{
+ struct mlx5_wq_cyc *wq;
+
+ wq = &sq->wq;
+ *pi = sq->pc & wq->sz_m1;
+ *wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
+ memset(*wqe, 0, sizeof(**wqe));
+}
+
static inline
struct mlx5e_tx_wqe *mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
{
@@ -1107,4 +1135,5 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
u16 max_channels, u16 mtu);
u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev);
void mlx5e_rx_dim_work(struct work_struct *work);
+void mlx5e_tx_dim_work(struct work_struct *work);
#endif /* __MLX5_EN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
new file mode 100644
index 000000000000..68fcb40a2847
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5E_EN_ACCEL_H__
+#define __MLX5E_EN_ACCEL_H__
+
+#ifdef CONFIG_MLX5_ACCEL
+
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include "en_accel/ipsec_rxtx.h"
+#include "en_accel/tls_rxtx.h"
+#include "en.h"
+
+static inline struct sk_buff *mlx5e_accel_handle_tx(struct sk_buff *skb,
+ struct mlx5e_txqsq *sq,
+ struct net_device *dev,
+ struct mlx5e_tx_wqe **wqe,
+ u16 *pi)
+{
+#ifdef CONFIG_MLX5_EN_TLS
+ if (sq->state & BIT(MLX5E_SQ_STATE_TLS)) {
+ skb = mlx5e_tls_handle_tx_skb(dev, sq, skb, wqe, pi);
+ if (unlikely(!skb))
+ return NULL;
+ }
+#endif
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+ if (sq->state & BIT(MLX5E_SQ_STATE_IPSEC)) {
+ skb = mlx5e_ipsec_handle_tx_skb(dev, *wqe, skb);
+ if (unlikely(!skb))
+ return NULL;
+ }
+#endif
+
+ return skb;
+}
+
+#endif /* CONFIG_MLX5_ACCEL */
+
+#endif /* __MLX5E_EN_ACCEL_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index 1198fc1eba4c..93bf10e6508c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -45,9 +45,6 @@
#define MLX5E_IPSEC_SADB_RX_BITS 10
#define MLX5E_IPSEC_ESN_SCOPE_MID 0x80000000L
-#define MLX5E_METADATA_ETHER_TYPE (0x8CE4)
-#define MLX5E_METADATA_ETHER_LEN 8
-
struct mlx5e_priv;
struct mlx5e_ipsec_sw_stats {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
new file mode 100644
index 000000000000..d167845271c3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/netdevice.h>
+#include <net/ipv6.h>
+#include "en_accel/tls.h"
+#include "accel/tls.h"
+
+static void mlx5e_tls_set_ipv4_flow(void *flow, struct sock *sk)
+{
+ struct inet_sock *inet = inet_sk(sk);
+
+ MLX5_SET(tls_flow, flow, ipv6, 0);
+ memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &inet->inet_daddr, MLX5_FLD_SZ_BYTES(ipv4_layout, ipv4));
+ memcpy(MLX5_ADDR_OF(tls_flow, flow, src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &inet->inet_rcv_saddr, MLX5_FLD_SZ_BYTES(ipv4_layout, ipv4));
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static void mlx5e_tls_set_ipv6_flow(void *flow, struct sock *sk)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ MLX5_SET(tls_flow, flow, ipv6, 1);
+ memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &sk->sk_v6_daddr, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+ memcpy(MLX5_ADDR_OF(tls_flow, flow, src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &np->saddr, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+}
+#endif
+
+static void mlx5e_tls_set_flow_tcp_ports(void *flow, struct sock *sk)
+{
+ struct inet_sock *inet = inet_sk(sk);
+
+ memcpy(MLX5_ADDR_OF(tls_flow, flow, src_port), &inet->inet_sport,
+ MLX5_FLD_SZ_BYTES(tls_flow, src_port));
+ memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_port), &inet->inet_dport,
+ MLX5_FLD_SZ_BYTES(tls_flow, dst_port));
+}
+
+static int mlx5e_tls_set_flow(void *flow, struct sock *sk, u32 caps)
+{
+ switch (sk->sk_family) {
+ case AF_INET:
+ mlx5e_tls_set_ipv4_flow(flow, sk);
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ if (!sk->sk_ipv6only &&
+ ipv6_addr_type(&sk->sk_v6_daddr) == IPV6_ADDR_MAPPED) {
+ mlx5e_tls_set_ipv4_flow(flow, sk);
+ break;
+ }
+ if (!(caps & MLX5_ACCEL_TLS_IPV6))
+ goto error_out;
+
+ mlx5e_tls_set_ipv6_flow(flow, sk);
+ break;
+#endif
+ default:
+ goto error_out;
+ }
+
+ mlx5e_tls_set_flow_tcp_ports(flow, sk);
+ return 0;
+error_out:
+ return -EINVAL;
+}
+
+static int mlx5e_tls_add(struct net_device *netdev, struct sock *sk,
+ enum tls_offload_ctx_dir direction,
+ struct tls_crypto_info *crypto_info,
+ u32 start_offload_tcp_sn)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 caps = mlx5_accel_tls_device_caps(mdev);
+ int ret = -ENOMEM;
+ void *flow;
+
+ if (direction != TLS_OFFLOAD_CTX_DIR_TX)
+ return -EINVAL;
+
+ flow = kzalloc(MLX5_ST_SZ_BYTES(tls_flow), GFP_KERNEL);
+ if (!flow)
+ return ret;
+
+ ret = mlx5e_tls_set_flow(flow, sk, caps);
+ if (ret)
+ goto free_flow;
+
+ if (direction == TLS_OFFLOAD_CTX_DIR_TX) {
+ struct mlx5e_tls_offload_context *tx_ctx =
+ mlx5e_get_tls_tx_context(tls_ctx);
+ u32 swid;
+
+ ret = mlx5_accel_tls_add_tx_flow(mdev, flow, crypto_info,
+ start_offload_tcp_sn, &swid);
+ if (ret < 0)
+ goto free_flow;
+
+ tx_ctx->swid = htonl(swid);
+ tx_ctx->expected_seq = start_offload_tcp_sn;
+ }
+
+ return 0;
+free_flow:
+ kfree(flow);
+ return ret;
+}
+
+static void mlx5e_tls_del(struct net_device *netdev,
+ struct tls_context *tls_ctx,
+ enum tls_offload_ctx_dir direction)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ if (direction == TLS_OFFLOAD_CTX_DIR_TX) {
+ u32 swid = ntohl(mlx5e_get_tls_tx_context(tls_ctx)->swid);
+
+ mlx5_accel_tls_del_tx_flow(priv->mdev, swid);
+ } else {
+ netdev_err(netdev, "unsupported direction %d\n", direction);
+ }
+}
+
+static const struct tlsdev_ops mlx5e_tls_ops = {
+ .tls_dev_add = mlx5e_tls_add,
+ .tls_dev_del = mlx5e_tls_del,
+};
+
+void mlx5e_tls_build_netdev(struct mlx5e_priv *priv)
+{
+ struct net_device *netdev = priv->netdev;
+
+ if (!mlx5_accel_is_tls_device(priv->mdev))
+ return;
+
+ netdev->features |= NETIF_F_HW_TLS_TX;
+ netdev->hw_features |= NETIF_F_HW_TLS_TX;
+ netdev->tlsdev_ops = &mlx5e_tls_ops;
+}
+
+int mlx5e_tls_init(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tls *tls = kzalloc(sizeof(*tls), GFP_KERNEL);
+
+ if (!tls)
+ return -ENOMEM;
+
+ priv->tls = tls;
+ return 0;
+}
+
+void mlx5e_tls_cleanup(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tls *tls = priv->tls;
+
+ if (!tls)
+ return;
+
+ kfree(tls);
+ priv->tls = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
new file mode 100644
index 000000000000..b6162178f621
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#ifndef __MLX5E_TLS_H__
+#define __MLX5E_TLS_H__
+
+#ifdef CONFIG_MLX5_EN_TLS
+
+#include <net/tls.h>
+#include "en.h"
+
+struct mlx5e_tls_sw_stats {
+ atomic64_t tx_tls_drop_metadata;
+ atomic64_t tx_tls_drop_resync_alloc;
+ atomic64_t tx_tls_drop_no_sync_data;
+ atomic64_t tx_tls_drop_bypass_required;
+};
+
+struct mlx5e_tls {
+ struct mlx5e_tls_sw_stats sw_stats;
+};
+
+struct mlx5e_tls_offload_context {
+ struct tls_offload_context base;
+ u32 expected_seq;
+ __be32 swid;
+};
+
+static inline struct mlx5e_tls_offload_context *
+mlx5e_get_tls_tx_context(struct tls_context *tls_ctx)
+{
+ BUILD_BUG_ON(sizeof(struct mlx5e_tls_offload_context) >
+ TLS_OFFLOAD_CONTEXT_SIZE);
+ return container_of(tls_offload_ctx(tls_ctx),
+ struct mlx5e_tls_offload_context,
+ base);
+}
+
+void mlx5e_tls_build_netdev(struct mlx5e_priv *priv);
+int mlx5e_tls_init(struct mlx5e_priv *priv);
+void mlx5e_tls_cleanup(struct mlx5e_priv *priv);
+
+int mlx5e_tls_get_count(struct mlx5e_priv *priv);
+int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data);
+int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data);
+
+#else
+
+static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) { }
+static inline int mlx5e_tls_init(struct mlx5e_priv *priv) { return 0; }
+static inline void mlx5e_tls_cleanup(struct mlx5e_priv *priv) { }
+static inline int mlx5e_tls_get_count(struct mlx5e_priv *priv) { return 0; }
+static inline int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data) { return 0; }
+static inline int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data) { return 0; }
+
+#endif
+
+#endif /* __MLX5E_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
new file mode 100644
index 000000000000..ad2790fb5966
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
@@ -0,0 +1,278 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include "en_accel/tls.h"
+#include "en_accel/tls_rxtx.h"
+
+#define SYNDROME_OFFLOAD_REQUIRED 32
+#define SYNDROME_SYNC 33
+
+struct sync_info {
+ u64 rcd_sn;
+ s32 sync_len;
+ int nr_frags;
+ skb_frag_t frags[MAX_SKB_FRAGS];
+};
+
+struct mlx5e_tls_metadata {
+ /* One byte of syndrome followed by 3 bytes of swid */
+ __be32 syndrome_swid;
+ __be16 first_seq;
+ /* packet type ID field */
+ __be16 ethertype;
+} __packed;
+
+static int mlx5e_tls_add_metadata(struct sk_buff *skb, __be32 swid)
+{
+ struct mlx5e_tls_metadata *pet;
+ struct ethhdr *eth;
+
+ if (skb_cow_head(skb, sizeof(struct mlx5e_tls_metadata)))
+ return -ENOMEM;
+
+ eth = (struct ethhdr *)skb_push(skb, sizeof(struct mlx5e_tls_metadata));
+ skb->mac_header -= sizeof(struct mlx5e_tls_metadata);
+ pet = (struct mlx5e_tls_metadata *)(eth + 1);
+
+ memmove(skb->data, skb->data + sizeof(struct mlx5e_tls_metadata),
+ 2 * ETH_ALEN);
+
+ eth->h_proto = cpu_to_be16(MLX5E_METADATA_ETHER_TYPE);
+ pet->syndrome_swid = htonl(SYNDROME_OFFLOAD_REQUIRED << 24) | swid;
+
+ return 0;
+}
+
+static int mlx5e_tls_get_sync_data(struct mlx5e_tls_offload_context *context,
+ u32 tcp_seq, struct sync_info *info)
+{
+ int remaining, i = 0, ret = -EINVAL;
+ struct tls_record_info *record;
+ unsigned long flags;
+ s32 sync_size;
+
+ spin_lock_irqsave(&context->base.lock, flags);
+ record = tls_get_record(&context->base, tcp_seq, &info->rcd_sn);
+
+ if (unlikely(!record))
+ goto out;
+
+ sync_size = tcp_seq - tls_record_start_seq(record);
+ info->sync_len = sync_size;
+ if (unlikely(sync_size < 0)) {
+ if (tls_record_is_start_marker(record))
+ goto done;
+
+ goto out;
+ }
+
+ remaining = sync_size;
+ while (remaining > 0) {
+ info->frags[i] = record->frags[i];
+ __skb_frag_ref(&info->frags[i]);
+ remaining -= skb_frag_size(&info->frags[i]);
+
+ if (remaining < 0)
+ skb_frag_size_add(&info->frags[i], remaining);
+
+ i++;
+ }
+ info->nr_frags = i;
+done:
+ ret = 0;
+out:
+ spin_unlock_irqrestore(&context->base.lock, flags);
+ return ret;
+}
+
+static void mlx5e_tls_complete_sync_skb(struct sk_buff *skb,
+ struct sk_buff *nskb, u32 tcp_seq,
+ int headln, __be64 rcd_sn)
+{
+ struct mlx5e_tls_metadata *pet;
+ u8 syndrome = SYNDROME_SYNC;
+ struct iphdr *iph;
+ struct tcphdr *th;
+ int data_len, mss;
+
+ nskb->dev = skb->dev;
+ skb_reset_mac_header(nskb);
+ skb_set_network_header(nskb, skb_network_offset(skb));
+ skb_set_transport_header(nskb, skb_transport_offset(skb));
+ memcpy(nskb->data, skb->data, headln);
+ memcpy(nskb->data + headln, &rcd_sn, sizeof(rcd_sn));
+
+ iph = ip_hdr(nskb);
+ iph->tot_len = htons(nskb->len - skb_network_offset(nskb));
+ th = tcp_hdr(nskb);
+ data_len = nskb->len - headln;
+ tcp_seq -= data_len;
+ th->seq = htonl(tcp_seq);
+
+ mss = nskb->dev->mtu - (headln - skb_network_offset(nskb));
+ skb_shinfo(nskb)->gso_size = 0;
+ if (data_len > mss) {
+ skb_shinfo(nskb)->gso_size = mss;
+ skb_shinfo(nskb)->gso_segs = DIV_ROUND_UP(data_len, mss);
+ }
+ skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type;
+
+ pet = (struct mlx5e_tls_metadata *)(nskb->data + sizeof(struct ethhdr));
+ memcpy(pet, &syndrome, sizeof(syndrome));
+ pet->first_seq = htons(tcp_seq);
+
+ /* MLX5 devices don't care about the checksum partial start, offset
+ * and pseudo header
+ */
+ nskb->ip_summed = CHECKSUM_PARTIAL;
+
+ nskb->xmit_more = 1;
+ nskb->queue_mapping = skb->queue_mapping;
+}
+
+static struct sk_buff *
+mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context *context,
+ struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5e_tx_wqe **wqe,
+ u16 *pi,
+ struct mlx5e_tls *tls)
+{
+ u32 tcp_seq = ntohl(tcp_hdr(skb)->seq);
+ struct sync_info info;
+ struct sk_buff *nskb;
+ int linear_len = 0;
+ int headln;
+ int i;
+
+ sq->stats.tls_ooo++;
+
+ if (mlx5e_tls_get_sync_data(context, tcp_seq, &info)) {
+ /* We might get here if a retransmission reaches the driver
+ * after the relevant record is acked.
+ * It should be safe to drop the packet in this case
+ */
+ atomic64_inc(&tls->sw_stats.tx_tls_drop_no_sync_data);
+ goto err_out;
+ }
+
+ if (unlikely(info.sync_len < 0)) {
+ u32 payload;
+
+ headln = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ payload = skb->len - headln;
+ if (likely(payload <= -info.sync_len))
+ /* SKB payload doesn't require offload
+ */
+ return skb;
+
+ atomic64_inc(&tls->sw_stats.tx_tls_drop_bypass_required);
+ goto err_out;
+ }
+
+ if (unlikely(mlx5e_tls_add_metadata(skb, context->swid))) {
+ atomic64_inc(&tls->sw_stats.tx_tls_drop_metadata);
+ goto err_out;
+ }
+
+ headln = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ linear_len += headln + sizeof(info.rcd_sn);
+ nskb = alloc_skb(linear_len, GFP_ATOMIC);
+ if (unlikely(!nskb)) {
+ atomic64_inc(&tls->sw_stats.tx_tls_drop_resync_alloc);
+ goto err_out;
+ }
+
+ context->expected_seq = tcp_seq + skb->len - headln;
+ skb_put(nskb, linear_len);
+ for (i = 0; i < info.nr_frags; i++)
+ skb_shinfo(nskb)->frags[i] = info.frags[i];
+
+ skb_shinfo(nskb)->nr_frags = info.nr_frags;
+ nskb->data_len = info.sync_len;
+ nskb->len += info.sync_len;
+ sq->stats.tls_resync_bytes += nskb->len;
+ mlx5e_tls_complete_sync_skb(skb, nskb, tcp_seq, headln,
+ cpu_to_be64(info.rcd_sn));
+ mlx5e_sq_xmit(sq, nskb, *wqe, *pi);
+ mlx5e_sq_fetch_wqe(sq, wqe, pi);
+ return skb;
+
+err_out:
+ dev_kfree_skb_any(skb);
+ return NULL;
+}
+
+struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev,
+ struct mlx5e_txqsq *sq,
+ struct sk_buff *skb,
+ struct mlx5e_tx_wqe **wqe,
+ u16 *pi)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_tls_offload_context *context;
+ struct tls_context *tls_ctx;
+ u32 expected_seq;
+ int datalen;
+ u32 skb_seq;
+
+ if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))
+ goto out;
+
+ datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb));
+ if (!datalen)
+ goto out;
+
+ tls_ctx = tls_get_ctx(skb->sk);
+ if (unlikely(tls_ctx->netdev != netdev))
+ goto out;
+
+ skb_seq = ntohl(tcp_hdr(skb)->seq);
+ context = mlx5e_get_tls_tx_context(tls_ctx);
+ expected_seq = context->expected_seq;
+
+ if (unlikely(expected_seq != skb_seq)) {
+ skb = mlx5e_tls_handle_ooo(context, sq, skb, wqe, pi, priv->tls);
+ goto out;
+ }
+
+ if (unlikely(mlx5e_tls_add_metadata(skb, context->swid))) {
+ atomic64_inc(&priv->tls->sw_stats.tx_tls_drop_metadata);
+ dev_kfree_skb_any(skb);
+ skb = NULL;
+ goto out;
+ }
+
+ context->expected_seq = skb_seq + datalen;
+out:
+ return skb;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
new file mode 100644
index 000000000000..405dfd302225
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5E_TLS_RXTX_H__
+#define __MLX5E_TLS_RXTX_H__
+
+#ifdef CONFIG_MLX5_EN_TLS
+
+#include <linux/skbuff.h>
+#include "en.h"
+
+struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev,
+ struct mlx5e_txqsq *sq,
+ struct sk_buff *skb,
+ struct mlx5e_tx_wqe **wqe,
+ u16 *pi);
+
+#endif /* CONFIG_MLX5_EN_TLS */
+
+#endif /* __MLX5E_TLS_RXTX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c
new file mode 100644
index 000000000000..01468ec27446
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/ethtool.h>
+#include <net/sock.h>
+
+#include "en.h"
+#include "accel/tls.h"
+#include "fpga/sdk.h"
+#include "en_accel/tls.h"
+
+static const struct counter_desc mlx5e_tls_sw_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_metadata) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_resync_alloc) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_no_sync_data) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_bypass_required) },
+};
+
+#define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \
+ atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset))
+
+#define NUM_TLS_SW_COUNTERS ARRAY_SIZE(mlx5e_tls_sw_stats_desc)
+
+int mlx5e_tls_get_count(struct mlx5e_priv *priv)
+{
+ if (!priv->tls)
+ return 0;
+
+ return NUM_TLS_SW_COUNTERS;
+}
+
+int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data)
+{
+ unsigned int i, idx = 0;
+
+ if (!priv->tls)
+ return 0;
+
+ for (i = 0; i < NUM_TLS_SW_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ mlx5e_tls_sw_stats_desc[i].format);
+
+ return NUM_TLS_SW_COUNTERS;
+}
+
+int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data)
+{
+ int i, idx = 0;
+
+ if (!priv->tls)
+ return 0;
+
+ for (i = 0; i < NUM_TLS_SW_COUNTERS; i++)
+ data[idx++] =
+ MLX5E_READ_CTR_ATOMIC64(&priv->tls->sw_stats,
+ mlx5e_tls_sw_stats_desc, i);
+
+ return NUM_TLS_SW_COUNTERS;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c
index 602851ab5b14..d67adf70a97b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c
@@ -33,16 +33,30 @@
#include <linux/net_dim.h>
#include "en.h"
+static void
+mlx5e_complete_dim_work(struct net_dim *dim, struct net_dim_cq_moder moder,
+ struct mlx5_core_dev *mdev, struct mlx5_core_cq *mcq)
+{
+ mlx5_core_modify_cq_moderation(mdev, mcq, moder.usec, moder.pkts);
+ dim->state = NET_DIM_START_MEASURE;
+}
+
void mlx5e_rx_dim_work(struct work_struct *work)
{
- struct net_dim *dim = container_of(work, struct net_dim,
- work);
+ struct net_dim *dim = container_of(work, struct net_dim, work);
struct mlx5e_rq *rq = container_of(dim, struct mlx5e_rq, dim);
- struct net_dim_cq_moder cur_profile = net_dim_get_profile(dim->mode,
- dim->profile_ix);
+ struct net_dim_cq_moder cur_moder =
+ net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
- mlx5_core_modify_cq_moderation(rq->mdev, &rq->cq.mcq,
- cur_profile.usec, cur_profile.pkts);
+ mlx5e_complete_dim_work(dim, cur_moder, rq->mdev, &rq->cq.mcq);
+}
- dim->state = NET_DIM_START_MEASURE;
+void mlx5e_tx_dim_work(struct work_struct *work)
+{
+ struct net_dim *dim = container_of(work, struct net_dim, work);
+ struct mlx5e_txqsq *sq = container_of(dim, struct mlx5e_txqsq, dim);
+ struct net_dim_cq_moder cur_moder =
+ net_dim_get_tx_moderation(dim->mode, dim->profile_ix);
+
+ mlx5e_complete_dim_work(dim, cur_moder, sq->cq.mdev, &sq->cq.mcq);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 37fd0245b6c1..2b786c4d3dab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -389,14 +389,20 @@ static int mlx5e_set_channels(struct net_device *dev,
int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv,
struct ethtool_coalesce *coal)
{
+ struct net_dim_cq_moder *rx_moder, *tx_moder;
+
if (!MLX5_CAP_GEN(priv->mdev, cq_moderation))
return -EOPNOTSUPP;
- coal->rx_coalesce_usecs = priv->channels.params.rx_cq_moderation.usec;
- coal->rx_max_coalesced_frames = priv->channels.params.rx_cq_moderation.pkts;
- coal->tx_coalesce_usecs = priv->channels.params.tx_cq_moderation.usec;
- coal->tx_max_coalesced_frames = priv->channels.params.tx_cq_moderation.pkts;
- coal->use_adaptive_rx_coalesce = priv->channels.params.rx_dim_enabled;
+ rx_moder = &priv->channels.params.rx_cq_moderation;
+ coal->rx_coalesce_usecs = rx_moder->usec;
+ coal->rx_max_coalesced_frames = rx_moder->pkts;
+ coal->use_adaptive_rx_coalesce = priv->channels.params.rx_dim_enabled;
+
+ tx_moder = &priv->channels.params.tx_cq_moderation;
+ coal->tx_coalesce_usecs = tx_moder->usec;
+ coal->tx_max_coalesced_frames = tx_moder->pkts;
+ coal->use_adaptive_tx_coalesce = priv->channels.params.tx_dim_enabled;
return 0;
}
@@ -438,6 +444,7 @@ mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesc
int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
struct ethtool_coalesce *coal)
{
+ struct net_dim_cq_moder *rx_moder, *tx_moder;
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5e_channels new_channels = {};
int err = 0;
@@ -463,11 +470,15 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
mutex_lock(&priv->state_lock);
new_channels.params = priv->channels.params;
- new_channels.params.tx_cq_moderation.usec = coal->tx_coalesce_usecs;
- new_channels.params.tx_cq_moderation.pkts = coal->tx_max_coalesced_frames;
- new_channels.params.rx_cq_moderation.usec = coal->rx_coalesce_usecs;
- new_channels.params.rx_cq_moderation.pkts = coal->rx_max_coalesced_frames;
- new_channels.params.rx_dim_enabled = !!coal->use_adaptive_rx_coalesce;
+ rx_moder = &new_channels.params.rx_cq_moderation;
+ rx_moder->usec = coal->rx_coalesce_usecs;
+ rx_moder->pkts = coal->rx_max_coalesced_frames;
+ new_channels.params.rx_dim_enabled = !!coal->use_adaptive_rx_coalesce;
+
+ tx_moder = &new_channels.params.tx_cq_moderation;
+ tx_moder->usec = coal->tx_coalesce_usecs;
+ tx_moder->pkts = coal->tx_max_coalesced_frames;
+ new_channels.params.tx_dim_enabled = !!coal->use_adaptive_tx_coalesce;
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
priv->channels.params = new_channels.params;
@@ -475,7 +486,9 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
}
/* we are opened */
- reset = !!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled;
+ reset = (!!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled) ||
+ (!!coal->use_adaptive_tx_coalesce != priv->channels.params.tx_dim_enabled);
+
if (!reset) {
mlx5e_set_priv_channels_coalesce(priv, coal);
priv->channels.params = new_channels.params;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index b29c1d93f058..553eb63753ff 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -35,13 +35,16 @@
#include <linux/mlx5/fs.h>
#include <net/vxlan.h>
#include <linux/bpf.h>
+#include <net/page_pool.h>
#include "eswitch.h"
#include "en.h"
#include "en_tc.h"
#include "en_rep.h"
#include "en_accel/ipsec.h"
#include "en_accel/ipsec_rxtx.h"
+#include "en_accel/tls.h"
#include "accel/ipsec.h"
+#include "accel/tls.h"
#include "vxlan.h"
struct mlx5e_rq_param {
@@ -389,10 +392,11 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
struct mlx5e_rq_param *rqp,
struct mlx5e_rq *rq)
{
+ struct page_pool_params pp_params = { 0 };
struct mlx5_core_dev *mdev = c->mdev;
void *rqc = rqp->rqc;
void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
- u32 byte_count;
+ u32 byte_count, pool_size;
int npages;
int wq_sz;
int err;
@@ -432,9 +436,12 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params);
+ pool_size = 1 << params->log_rq_mtu_frames;
switch (rq->wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+
+ pool_size = MLX5_MPWRQ_PAGES_PER_WQE << mlx5e_mpwqe_get_log_rq_size(params);
rq->post_wqes = mlx5e_post_rx_mpwqes;
rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
@@ -512,6 +519,32 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
rq->mkey_be = c->mkey_be;
}
+ /* Create a page_pool and register it with rxq */
+ pp_params.order = rq->buff.page_order;
+ pp_params.flags = 0; /* No-internal DMA mapping in page_pool */
+ pp_params.pool_size = pool_size;
+ pp_params.nid = cpu_to_node(c->cpu);
+ pp_params.dev = c->pdev;
+ pp_params.dma_dir = rq->buff.map_dir;
+
+ /* page_pool can be used even when there is no rq->xdp_prog,
+ * given page_pool does not handle DMA mapping there is no
+ * required state to clear. And page_pool gracefully handle
+ * elevated refcnt.
+ */
+ rq->page_pool = page_pool_create(&pp_params);
+ if (IS_ERR(rq->page_pool)) {
+ if (rq->wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
+ kfree(rq->wqe.frag_info);
+ err = PTR_ERR(rq->page_pool);
+ rq->page_pool = NULL;
+ goto err_rq_wq_destroy;
+ }
+ err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
+ MEM_TYPE_PAGE_POOL, rq->page_pool);
+ if (err)
+ goto err_rq_wq_destroy;
+
for (i = 0; i < wq_sz; i++) {
struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
@@ -548,6 +581,8 @@ err_rq_wq_destroy:
if (rq->xdp_prog)
bpf_prog_put(rq->xdp_prog);
xdp_rxq_info_unreg(&rq->xdp_rxq);
+ if (rq->page_pool)
+ page_pool_destroy(rq->page_pool);
mlx5_wq_destroy(&rq->wq_ctrl);
return err;
@@ -561,6 +596,8 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
bpf_prog_put(rq->xdp_prog);
xdp_rxq_info_unreg(&rq->xdp_rxq);
+ if (rq->page_pool)
+ page_pool_destroy(rq->page_pool);
switch (rq->wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
@@ -979,6 +1016,8 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
INIT_WORK(&sq->recover.recover_work, mlx5e_sq_recover);
if (MLX5_IPSEC_DEV(c->priv->mdev))
set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
+ if (mlx5_accel_is_tls_device(c->priv->mdev))
+ set_bit(MLX5E_SQ_STATE_TLS, &sq->state);
param->wq.db_numa_node = cpu_to_node(c->cpu);
err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
@@ -990,6 +1029,9 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
if (err)
goto err_sq_wq_destroy;
+ INIT_WORK(&sq->dim.work, mlx5e_tx_dim_work);
+ sq->dim.mode = params->tx_cq_moderation.cq_period_mode;
+
sq->edge = (sq->wq.sz_m1 + 1) - MLX5_SEND_WQE_MAX_WQEBBS;
return 0;
@@ -1153,6 +1195,9 @@ static int mlx5e_open_txqsq(struct mlx5e_channel *c,
if (tx_rate)
mlx5e_set_sq_maxrate(c->netdev, sq, tx_rate);
+ if (params->tx_dim_enabled)
+ sq->state |= BIT(MLX5E_SQ_STATE_AM);
+
return 0;
err_free_txqsq:
@@ -4049,18 +4094,48 @@ static bool slow_pci_heuristic(struct mlx5_core_dev *mdev)
link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw;
}
-void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
+static struct net_dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode)
{
- params->tx_cq_moderation.cq_period_mode = cq_period_mode;
+ struct net_dim_cq_moder moder;
- params->tx_cq_moderation.pkts =
- MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
- params->tx_cq_moderation.usec =
- MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
+ moder.cq_period_mode = cq_period_mode;
+ moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
+ moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
+ if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
+ moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE;
+
+ return moder;
+}
+static struct net_dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode)
+{
+ struct net_dim_cq_moder moder;
+
+ moder.cq_period_mode = cq_period_mode;
+ moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
+ moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
- params->tx_cq_moderation.usec =
- MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE;
+ moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE;
+
+ return moder;
+}
+
+static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode)
+{
+ return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ?
+ NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE :
+ NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+}
+
+void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
+{
+ if (params->tx_dim_enabled) {
+ u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode);
+
+ params->tx_cq_moderation = net_dim_get_def_tx_moderation(dim_period_mode);
+ } else {
+ params->tx_cq_moderation = mlx5e_get_def_tx_moderation(cq_period_mode);
+ }
MLX5E_SET_PFLAG(params, MLX5E_PFLAG_TX_CQE_BASED_MODER,
params->tx_cq_moderation.cq_period_mode ==
@@ -4069,28 +4144,12 @@ void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
{
- params->rx_cq_moderation.cq_period_mode = cq_period_mode;
-
- params->rx_cq_moderation.pkts =
- MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
- params->rx_cq_moderation.usec =
- MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
-
- if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
- params->rx_cq_moderation.usec =
- MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE;
-
if (params->rx_dim_enabled) {
- switch (cq_period_mode) {
- case MLX5_CQ_PERIOD_MODE_START_FROM_CQE:
- params->rx_cq_moderation =
- net_dim_get_def_profile(NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE);
- break;
- case MLX5_CQ_PERIOD_MODE_START_FROM_EQE:
- default:
- params->rx_cq_moderation =
- net_dim_get_def_profile(NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE);
- }
+ u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode);
+
+ params->rx_cq_moderation = net_dim_get_def_rx_moderation(dim_period_mode);
+ } else {
+ params->rx_cq_moderation = mlx5e_get_def_rx_moderation(cq_period_mode);
}
MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER,
@@ -4154,6 +4213,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
+ params->tx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
mlx5e_set_rx_cq_mode_params(params, rx_cq_period_mode);
mlx5e_set_tx_cq_mode_params(params, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
@@ -4320,6 +4380,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
#endif
mlx5e_ipsec_build_netdev(priv);
+ mlx5e_tls_build_netdev(priv);
}
static void mlx5e_create_q_counters(struct mlx5e_priv *priv)
@@ -4361,12 +4422,16 @@ static void mlx5e_nic_init(struct mlx5_core_dev *mdev,
err = mlx5e_ipsec_init(priv);
if (err)
mlx5_core_err(mdev, "IPSec initialization failed, %d\n", err);
+ err = mlx5e_tls_init(priv);
+ if (err)
+ mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
mlx5e_build_nic_netdev(netdev);
mlx5e_vxlan_init(priv);
}
static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
{
+ mlx5e_tls_cleanup(priv);
mlx5e_ipsec_cleanup(priv);
mlx5e_vxlan_cleanup(priv);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 176645762e49..7bbf0db27a01 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -37,6 +37,7 @@
#include <linux/bpf_trace.h>
#include <net/busy_poll.h>
#include <net/ip6_checksum.h>
+#include <net/page_pool.h>
#include "en.h"
#include "en_tc.h"
#include "eswitch.h"
@@ -221,7 +222,7 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
if (mlx5e_rx_cache_get(rq, dma_info))
return 0;
- dma_info->page = dev_alloc_pages(rq->buff.page_order);
+ dma_info->page = page_pool_dev_alloc_pages(rq->page_pool);
if (unlikely(!dma_info->page))
return -ENOMEM;
@@ -236,15 +237,26 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
return 0;
}
+static void mlx5e_page_dma_unmap(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info)
+{
+ dma_unmap_page(rq->pdev, dma_info->addr, RQ_PAGE_SIZE(rq),
+ rq->buff.map_dir);
+}
+
void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
bool recycle)
{
- if (likely(recycle) && mlx5e_rx_cache_put(rq, dma_info))
- return;
+ if (likely(recycle)) {
+ if (mlx5e_rx_cache_put(rq, dma_info))
+ return;
- dma_unmap_page(rq->pdev, dma_info->addr, RQ_PAGE_SIZE(rq),
- rq->buff.map_dir);
- put_page(dma_info->page);
+ mlx5e_page_dma_unmap(rq, dma_info);
+ page_pool_recycle_direct(rq->page_pool, dma_info->page);
+ } else {
+ mlx5e_page_dma_unmap(rq, dma_info);
+ put_page(dma_info->page);
+ }
}
static inline bool mlx5e_page_reuse(struct mlx5e_rq *rq,
@@ -800,9 +812,10 @@ static inline int mlx5e_xdp_handle(struct mlx5e_rq *rq,
struct mlx5e_dma_info *di,
void *va, u16 *rx_headroom, u32 *len)
{
- const struct bpf_prog *prog = READ_ONCE(rq->xdp_prog);
+ struct bpf_prog *prog = READ_ONCE(rq->xdp_prog);
struct xdp_buff xdp;
u32 act;
+ int err;
if (!prog)
return false;
@@ -823,6 +836,15 @@ static inline int mlx5e_xdp_handle(struct mlx5e_rq *rq,
if (unlikely(!mlx5e_xmit_xdp_frame(rq, di, &xdp)))
trace_xdp_exception(rq->netdev, prog, act);
return true;
+ case XDP_REDIRECT:
+ /* When XDP enabled then page-refcnt==1 here */
+ err = xdp_do_redirect(rq->netdev, &xdp, prog);
+ if (!err) {
+ __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
+ rq->xdpsq.db.redirect_flush = true;
+ mlx5e_page_dma_unmap(rq, di);
+ }
+ return true;
default:
bpf_warn_invalid_xdp_action(act);
case XDP_ABORTED:
@@ -868,6 +890,7 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset,
frag_size, DMA_FROM_DEVICE);
+ prefetchw(va); /* xdp_frame data area */
prefetch(data);
wi->offset += frag_size;
@@ -1140,6 +1163,11 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
xdpsq->db.doorbell = false;
}
+ if (xdpsq->db.redirect_flush) {
+ xdp_do_flush_map();
+ xdpsq->db.redirect_flush = false;
+ }
+
mlx5_cqwq_update_db_record(&cq->wq);
/* ensure cq space is freed before enabling more cqes */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index b08c94422907..e17919c0af08 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -32,6 +32,7 @@
#include "en.h"
#include "en_accel/ipsec.h"
+#include "en_accel/tls.h"
static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) },
@@ -43,6 +44,12 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_packets) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_added_vlan_packets) },
+
+#ifdef CONFIG_MLX5_EN_TLS
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ooo) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_resync_bytes) },
+#endif
+
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_removed_vlan_packets) },
@@ -161,6 +168,10 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
s->tx_csum_partial_inner += sq_stats->csum_partial_inner;
s->tx_csum_none += sq_stats->csum_none;
s->tx_csum_partial += sq_stats->csum_partial;
+#ifdef CONFIG_MLX5_EN_TLS
+ s->tx_tls_ooo += sq_stats->tls_ooo;
+ s->tx_tls_resync_bytes += sq_stats->tls_resync_bytes;
+#endif
}
}
@@ -1065,6 +1076,22 @@ static void mlx5e_grp_ipsec_update_stats(struct mlx5e_priv *priv)
mlx5e_ipsec_update_stats(priv);
}
+static int mlx5e_grp_tls_get_num_stats(struct mlx5e_priv *priv)
+{
+ return mlx5e_tls_get_count(priv);
+}
+
+static int mlx5e_grp_tls_fill_strings(struct mlx5e_priv *priv, u8 *data,
+ int idx)
+{
+ return idx + mlx5e_tls_get_strings(priv, data + idx * ETH_GSTRING_LEN);
+}
+
+static int mlx5e_grp_tls_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx)
+{
+ return idx + mlx5e_tls_get_stats(priv, data + idx);
+}
+
static const struct counter_desc rq_stats_desc[] = {
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) },
@@ -1268,6 +1295,11 @@ const struct mlx5e_stats_grp mlx5e_stats_grps[] = {
.update_stats = mlx5e_grp_ipsec_update_stats,
},
{
+ .get_num_stats = mlx5e_grp_tls_get_num_stats,
+ .fill_strings = mlx5e_grp_tls_fill_strings,
+ .fill_stats = mlx5e_grp_tls_fill_stats,
+ },
+ {
.get_num_stats = mlx5e_grp_channels_get_num_stats,
.fill_strings = mlx5e_grp_channels_fill_strings,
.fill_stats = mlx5e_grp_channels_fill_stats,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 53111a2df587..a36e6a87066b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -93,6 +93,11 @@ struct mlx5e_sw_stats {
u64 rx_cache_waive;
u64 ch_eq_rearm;
+#ifdef CONFIG_MLX5_EN_TLS
+ u64 tx_tls_ooo;
+ u64 tx_tls_resync_bytes;
+#endif
+
/* Special handling counters */
u64 link_down_events_phy;
};
@@ -194,6 +199,10 @@ struct mlx5e_sq_stats {
u64 csum_partial_inner;
u64 added_vlan_packets;
u64 nop;
+#ifdef CONFIG_MLX5_EN_TLS
+ u64 tls_ooo;
+ u64 tls_resync_bytes;
+#endif
/* less likely accessed in data path */
u64 csum_none;
u64 stopped;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 5532aa3675c7..047614d2eda2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -35,12 +35,21 @@
#include <net/dsfield.h>
#include "en.h"
#include "ipoib/ipoib.h"
-#include "en_accel/ipsec_rxtx.h"
+#include "en_accel/en_accel.h"
#include "lib/clock.h"
#define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS
+
+#ifndef CONFIG_MLX5_EN_TLS
#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\
MLX5E_SQ_NOPS_ROOM)
+#else
+/* TLS offload requires MLX5E_SQ_STOP_ROOM to have
+ * enough room for a resync SKB, a normal SKB and a NOP
+ */
+#define MLX5E_SQ_STOP_ROOM (2 * MLX5_SEND_WQE_MAX_WQEBBS +\
+ MLX5E_SQ_NOPS_ROOM)
+#endif
static inline void mlx5e_tx_dma_unmap(struct device *pdev,
struct mlx5e_sq_dma *dma)
@@ -329,8 +338,8 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
}
}
-static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
- struct mlx5e_tx_wqe *wqe, u16 pi)
+netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5e_tx_wqe *wqe, u16 pi)
{
struct mlx5e_tx_wqe_info *wi = &sq->db.wqe_info[pi];
@@ -401,21 +410,19 @@ err_drop:
netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct mlx5e_priv *priv = netdev_priv(dev);
- struct mlx5e_txqsq *sq = priv->txq2sq[skb_get_queue_mapping(skb)];
- struct mlx5_wq_cyc *wq = &sq->wq;
- u16 pi = sq->pc & wq->sz_m1;
- struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ struct mlx5e_tx_wqe *wqe;
+ struct mlx5e_txqsq *sq;
+ u16 pi;
- memset(wqe, 0, sizeof(*wqe));
+ sq = priv->txq2sq[skb_get_queue_mapping(skb)];
+ mlx5e_sq_fetch_wqe(sq, &wqe, &pi);
-#ifdef CONFIG_MLX5_EN_IPSEC
- if (sq->state & BIT(MLX5E_SQ_STATE_IPSEC)) {
- skb = mlx5e_ipsec_handle_tx_skb(dev, wqe, skb);
- if (unlikely(!skb))
- return NETDEV_TX_OK;
- }
+#ifdef CONFIG_MLX5_ACCEL
+ /* might send skbs and update wqe and pi */
+ skb = mlx5e_accel_handle_tx(skb, sq, dev, &wqe, &pi);
+ if (unlikely(!skb))
+ return NETDEV_TX_OK;
#endif
-
return mlx5e_sq_xmit(sq, skb, wqe, pi);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index f292bb346985..900661d45c8a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -44,6 +44,30 @@ static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c)
return cpumask_test_cpu(current_cpu, aff);
}
+static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq)
+{
+ struct net_dim_sample dim_sample;
+
+ if (unlikely(!MLX5E_TEST_BIT(sq->state, MLX5E_SQ_STATE_AM)))
+ return;
+
+ net_dim_sample(sq->cq.event_ctr, sq->stats.packets, sq->stats.bytes,
+ &dim_sample);
+ net_dim(&sq->dim, dim_sample);
+}
+
+static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq)
+{
+ struct net_dim_sample dim_sample;
+
+ if (unlikely(!MLX5E_TEST_BIT(rq->state, MLX5E_RQ_STATE_AM)))
+ return;
+
+ net_dim_sample(rq->cq.event_ctr, rq->stats.packets, rq->stats.bytes,
+ &dim_sample);
+ net_dim(&rq->dim, dim_sample);
+}
+
int mlx5e_napi_poll(struct napi_struct *napi, int budget)
{
struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel,
@@ -75,18 +99,13 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
if (unlikely(!napi_complete_done(napi, work_done)))
return work_done;
- for (i = 0; i < c->num_tc; i++)
+ for (i = 0; i < c->num_tc; i++) {
+ mlx5e_handle_tx_dim(&c->sq[i]);
mlx5e_cq_arm(&c->sq[i].cq);
-
- if (MLX5E_TEST_BIT(c->rq.state, MLX5E_RQ_STATE_AM)) {
- struct net_dim_sample dim_sample;
- net_dim_sample(c->rq.cq.event_ctr,
- c->rq.stats.packets,
- c->rq.stats.bytes,
- &dim_sample);
- net_dim(&c->rq.dim, dim_sample);
}
+ mlx5e_handle_rx_dim(&c->rq);
+
mlx5e_cq_arm(&c->rq.cq);
mlx5e_cq_arm(&c->icosq.cq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
index 82405ed84725..3e2355c8df3f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
@@ -53,6 +53,7 @@ struct mlx5_fpga_device {
} conn_res;
struct mlx5_fpga_ipsec *ipsec;
+ struct mlx5_fpga_tls *tls;
};
#define mlx5_fpga_dbg(__adev, format, ...) \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index 0f5da499a223..3c4f1f326e13 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -43,9 +43,6 @@
#include "fpga/sdk.h"
#include "fpga/core.h"
-#define SBU_QP_QUEUE_SIZE 8
-#define MLX5_FPGA_IPSEC_CMD_TIMEOUT_MSEC (60 * 1000)
-
enum mlx5_fpga_ipsec_cmd_status {
MLX5_FPGA_IPSEC_CMD_PENDING,
MLX5_FPGA_IPSEC_CMD_SEND_FAIL,
@@ -258,7 +255,7 @@ static int mlx5_fpga_ipsec_cmd_wait(void *ctx)
{
struct mlx5_fpga_ipsec_cmd_context *context = ctx;
unsigned long timeout =
- msecs_to_jiffies(MLX5_FPGA_IPSEC_CMD_TIMEOUT_MSEC);
+ msecs_to_jiffies(MLX5_FPGA_CMD_TIMEOUT_MSEC);
int res;
res = wait_for_completion_timeout(&context->complete, timeout);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h
index baa537e54a49..a0573cc2fc9b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h
@@ -41,6 +41,8 @@
* DOC: Innova SDK
* This header defines the in-kernel API for Innova FPGA client drivers.
*/
+#define SBU_QP_QUEUE_SIZE 8
+#define MLX5_FPGA_CMD_TIMEOUT_MSEC (60 * 1000)
enum mlx5_fpga_access_type {
MLX5_FPGA_ACCESS_TYPE_I2C = 0x0,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
new file mode 100644
index 000000000000..21048013826c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
@@ -0,0 +1,562 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/mlx5/device.h>
+#include "fpga/tls.h"
+#include "fpga/cmd.h"
+#include "fpga/sdk.h"
+#include "fpga/core.h"
+#include "accel/tls.h"
+
+struct mlx5_fpga_tls_command_context;
+
+typedef void (*mlx5_fpga_tls_command_complete)
+ (struct mlx5_fpga_conn *conn, struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_tls_command_context *ctx,
+ struct mlx5_fpga_dma_buf *resp);
+
+struct mlx5_fpga_tls_command_context {
+ struct list_head list;
+ /* There is no guarantee on the order between the TX completion
+ * and the command response.
+ * The TX completion is going to touch cmd->buf even in
+ * the case of successful transmission.
+ * So instead of requiring separate allocations for cmd
+ * and cmd->buf we've decided to use a reference counter
+ */
+ refcount_t ref;
+ struct mlx5_fpga_dma_buf buf;
+ mlx5_fpga_tls_command_complete complete;
+};
+
+static void
+mlx5_fpga_tls_put_command_ctx(struct mlx5_fpga_tls_command_context *ctx)
+{
+ if (refcount_dec_and_test(&ctx->ref))
+ kfree(ctx);
+}
+
+static void mlx5_fpga_tls_cmd_complete(struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_dma_buf *resp)
+{
+ struct mlx5_fpga_conn *conn = fdev->tls->conn;
+ struct mlx5_fpga_tls_command_context *ctx;
+ struct mlx5_fpga_tls *tls = fdev->tls;
+ unsigned long flags;
+
+ spin_lock_irqsave(&tls->pending_cmds_lock, flags);
+ ctx = list_first_entry(&tls->pending_cmds,
+ struct mlx5_fpga_tls_command_context, list);
+ list_del(&ctx->list);
+ spin_unlock_irqrestore(&tls->pending_cmds_lock, flags);
+ ctx->complete(conn, fdev, ctx, resp);
+}
+
+static void mlx5_fpga_cmd_send_complete(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_dma_buf *buf,
+ u8 status)
+{
+ struct mlx5_fpga_tls_command_context *ctx =
+ container_of(buf, struct mlx5_fpga_tls_command_context, buf);
+
+ mlx5_fpga_tls_put_command_ctx(ctx);
+
+ if (unlikely(status))
+ mlx5_fpga_tls_cmd_complete(fdev, NULL);
+}
+
+static void mlx5_fpga_tls_cmd_send(struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_tls_command_context *cmd,
+ mlx5_fpga_tls_command_complete complete)
+{
+ struct mlx5_fpga_tls *tls = fdev->tls;
+ unsigned long flags;
+ int ret;
+
+ refcount_set(&cmd->ref, 2);
+ cmd->complete = complete;
+ cmd->buf.complete = mlx5_fpga_cmd_send_complete;
+
+ spin_lock_irqsave(&tls->pending_cmds_lock, flags);
+ /* mlx5_fpga_sbu_conn_sendmsg is called under pending_cmds_lock
+ * to make sure commands are inserted to the tls->pending_cmds list
+ * and the command QP in the same order.
+ */
+ ret = mlx5_fpga_sbu_conn_sendmsg(tls->conn, &cmd->buf);
+ if (likely(!ret))
+ list_add_tail(&cmd->list, &tls->pending_cmds);
+ else
+ complete(tls->conn, fdev, cmd, NULL);
+ spin_unlock_irqrestore(&tls->pending_cmds_lock, flags);
+}
+
+/* Start of context identifiers range (inclusive) */
+#define SWID_START 0
+/* End of context identifiers range (exclusive) */
+#define SWID_END BIT(24)
+
+static int mlx5_fpga_tls_alloc_swid(struct idr *idr, spinlock_t *idr_spinlock,
+ void *ptr)
+{
+ int ret;
+
+ /* TLS metadata format is 1 byte for syndrome followed
+ * by 3 bytes of swid (software ID)
+ * swid must not exceed 3 bytes.
+ * See tls_rxtx.c:insert_pet() for details
+ */
+ BUILD_BUG_ON((SWID_END - 1) & 0xFF000000);
+
+ idr_preload(GFP_KERNEL);
+ spin_lock_irq(idr_spinlock);
+ ret = idr_alloc(idr, ptr, SWID_START, SWID_END, GFP_ATOMIC);
+ spin_unlock_irq(idr_spinlock);
+ idr_preload_end();
+
+ return ret;
+}
+
+static void mlx5_fpga_tls_release_swid(struct idr *idr,
+ spinlock_t *idr_spinlock, u32 swid)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(idr_spinlock, flags);
+ idr_remove(idr, swid);
+ spin_unlock_irqrestore(idr_spinlock, flags);
+}
+
+struct mlx5_teardown_stream_context {
+ struct mlx5_fpga_tls_command_context cmd;
+ u32 swid;
+};
+
+static void
+mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_tls_command_context *cmd,
+ struct mlx5_fpga_dma_buf *resp)
+{
+ struct mlx5_teardown_stream_context *ctx =
+ container_of(cmd, struct mlx5_teardown_stream_context, cmd);
+
+ if (resp) {
+ u32 syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome);
+
+ if (syndrome)
+ mlx5_fpga_err(fdev,
+ "Teardown stream failed with syndrome = %d",
+ syndrome);
+ else
+ mlx5_fpga_tls_release_swid(&fdev->tls->tx_idr,
+ &fdev->tls->idr_spinlock,
+ ctx->swid);
+ }
+ mlx5_fpga_tls_put_command_ctx(cmd);
+}
+
+static void mlx5_fpga_tls_flow_to_cmd(void *flow, void *cmd)
+{
+ memcpy(MLX5_ADDR_OF(tls_cmd, cmd, src_port), flow,
+ MLX5_BYTE_OFF(tls_flow, ipv6));
+
+ MLX5_SET(tls_cmd, cmd, ipv6, MLX5_GET(tls_flow, flow, ipv6));
+ MLX5_SET(tls_cmd, cmd, direction_sx,
+ MLX5_GET(tls_flow, flow, direction_sx));
+}
+
+void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev, void *flow,
+ u32 swid, gfp_t flags)
+{
+ struct mlx5_teardown_stream_context *ctx;
+ struct mlx5_fpga_dma_buf *buf;
+ void *cmd;
+
+ ctx = kzalloc(sizeof(*ctx) + MLX5_TLS_COMMAND_SIZE, flags);
+ if (!ctx)
+ return;
+
+ buf = &ctx->cmd.buf;
+ cmd = (ctx + 1);
+ MLX5_SET(tls_cmd, cmd, command_type, CMD_TEARDOWN_STREAM);
+ MLX5_SET(tls_cmd, cmd, swid, swid);
+
+ mlx5_fpga_tls_flow_to_cmd(flow, cmd);
+ kfree(flow);
+
+ buf->sg[0].data = cmd;
+ buf->sg[0].size = MLX5_TLS_COMMAND_SIZE;
+
+ ctx->swid = swid;
+ mlx5_fpga_tls_cmd_send(mdev->fpga, &ctx->cmd,
+ mlx5_fpga_tls_teardown_completion);
+}
+
+void mlx5_fpga_tls_del_tx_flow(struct mlx5_core_dev *mdev, u32 swid,
+ gfp_t flags)
+{
+ struct mlx5_fpga_tls *tls = mdev->fpga->tls;
+ void *flow;
+
+ rcu_read_lock();
+ flow = idr_find(&tls->tx_idr, swid);
+ rcu_read_unlock();
+
+ if (!flow) {
+ mlx5_fpga_err(mdev->fpga, "No flow information for swid %u\n",
+ swid);
+ return;
+ }
+
+ mlx5_fpga_tls_send_teardown_cmd(mdev, flow, swid, flags);
+}
+
+enum mlx5_fpga_setup_stream_status {
+ MLX5_FPGA_CMD_PENDING,
+ MLX5_FPGA_CMD_SEND_FAILED,
+ MLX5_FPGA_CMD_RESPONSE_RECEIVED,
+ MLX5_FPGA_CMD_ABANDONED,
+};
+
+struct mlx5_setup_stream_context {
+ struct mlx5_fpga_tls_command_context cmd;
+ atomic_t status;
+ u32 syndrome;
+ struct completion comp;
+};
+
+static void
+mlx5_fpga_tls_setup_completion(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_tls_command_context *cmd,
+ struct mlx5_fpga_dma_buf *resp)
+{
+ struct mlx5_setup_stream_context *ctx =
+ container_of(cmd, struct mlx5_setup_stream_context, cmd);
+ int status = MLX5_FPGA_CMD_SEND_FAILED;
+ void *tls_cmd = ctx + 1;
+
+ /* If we failed to send to command resp == NULL */
+ if (resp) {
+ ctx->syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome);
+ status = MLX5_FPGA_CMD_RESPONSE_RECEIVED;
+ }
+
+ status = atomic_xchg_release(&ctx->status, status);
+ if (likely(status != MLX5_FPGA_CMD_ABANDONED)) {
+ complete(&ctx->comp);
+ return;
+ }
+
+ mlx5_fpga_err(fdev, "Command was abandoned, syndrome = %u\n",
+ ctx->syndrome);
+
+ if (!ctx->syndrome) {
+ /* The process was killed while waiting for the context to be
+ * added, and the add completed successfully.
+ * We need to destroy the HW context, and we can't can't reuse
+ * the command context because we might not have received
+ * the tx completion yet.
+ */
+ mlx5_fpga_tls_del_tx_flow(fdev->mdev,
+ MLX5_GET(tls_cmd, tls_cmd, swid),
+ GFP_ATOMIC);
+ }
+
+ mlx5_fpga_tls_put_command_ctx(cmd);
+}
+
+static int mlx5_fpga_tls_setup_stream_cmd(struct mlx5_core_dev *mdev,
+ struct mlx5_setup_stream_context *ctx)
+{
+ struct mlx5_fpga_dma_buf *buf;
+ void *cmd = ctx + 1;
+ int status, ret = 0;
+
+ buf = &ctx->cmd.buf;
+ buf->sg[0].data = cmd;
+ buf->sg[0].size = MLX5_TLS_COMMAND_SIZE;
+ MLX5_SET(tls_cmd, cmd, command_type, CMD_SETUP_STREAM);
+
+ init_completion(&ctx->comp);
+ atomic_set(&ctx->status, MLX5_FPGA_CMD_PENDING);
+ ctx->syndrome = -1;
+
+ mlx5_fpga_tls_cmd_send(mdev->fpga, &ctx->cmd,
+ mlx5_fpga_tls_setup_completion);
+ wait_for_completion_killable(&ctx->comp);
+
+ status = atomic_xchg_acquire(&ctx->status, MLX5_FPGA_CMD_ABANDONED);
+ if (unlikely(status == MLX5_FPGA_CMD_PENDING))
+ /* ctx is going to be released in mlx5_fpga_tls_setup_completion */
+ return -EINTR;
+
+ if (unlikely(ctx->syndrome))
+ ret = -ENOMEM;
+
+ mlx5_fpga_tls_put_command_ctx(&ctx->cmd);
+ return ret;
+}
+
+static void mlx5_fpga_tls_hw_qp_recv_cb(void *cb_arg,
+ struct mlx5_fpga_dma_buf *buf)
+{
+ struct mlx5_fpga_device *fdev = (struct mlx5_fpga_device *)cb_arg;
+
+ mlx5_fpga_tls_cmd_complete(fdev, buf);
+}
+
+bool mlx5_fpga_is_tls_device(struct mlx5_core_dev *mdev)
+{
+ if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga))
+ return false;
+
+ if (MLX5_CAP_FPGA(mdev, ieee_vendor_id) !=
+ MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX)
+ return false;
+
+ if (MLX5_CAP_FPGA(mdev, sandbox_product_id) !=
+ MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_TLS)
+ return false;
+
+ if (MLX5_CAP_FPGA(mdev, sandbox_product_version) != 0)
+ return false;
+
+ return true;
+}
+
+static int mlx5_fpga_tls_get_caps(struct mlx5_fpga_device *fdev,
+ u32 *p_caps)
+{
+ int err, cap_size = MLX5_ST_SZ_BYTES(tls_extended_cap);
+ u32 caps = 0;
+ void *buf;
+
+ buf = kzalloc(cap_size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ err = mlx5_fpga_get_sbu_caps(fdev, cap_size, buf);
+ if (err)
+ goto out;
+
+ if (MLX5_GET(tls_extended_cap, buf, tx))
+ caps |= MLX5_ACCEL_TLS_TX;
+ if (MLX5_GET(tls_extended_cap, buf, rx))
+ caps |= MLX5_ACCEL_TLS_RX;
+ if (MLX5_GET(tls_extended_cap, buf, tls_v12))
+ caps |= MLX5_ACCEL_TLS_V12;
+ if (MLX5_GET(tls_extended_cap, buf, tls_v13))
+ caps |= MLX5_ACCEL_TLS_V13;
+ if (MLX5_GET(tls_extended_cap, buf, lro))
+ caps |= MLX5_ACCEL_TLS_LRO;
+ if (MLX5_GET(tls_extended_cap, buf, ipv6))
+ caps |= MLX5_ACCEL_TLS_IPV6;
+
+ if (MLX5_GET(tls_extended_cap, buf, aes_gcm_128))
+ caps |= MLX5_ACCEL_TLS_AES_GCM128;
+ if (MLX5_GET(tls_extended_cap, buf, aes_gcm_256))
+ caps |= MLX5_ACCEL_TLS_AES_GCM256;
+
+ *p_caps = caps;
+ err = 0;
+out:
+ kfree(buf);
+ return err;
+}
+
+int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+ struct mlx5_fpga_conn_attr init_attr = {0};
+ struct mlx5_fpga_conn *conn;
+ struct mlx5_fpga_tls *tls;
+ int err = 0;
+
+ if (!mlx5_fpga_is_tls_device(mdev) || !fdev)
+ return 0;
+
+ tls = kzalloc(sizeof(*tls), GFP_KERNEL);
+ if (!tls)
+ return -ENOMEM;
+
+ err = mlx5_fpga_tls_get_caps(fdev, &tls->caps);
+ if (err)
+ goto error;
+
+ if (!(tls->caps & (MLX5_ACCEL_TLS_TX | MLX5_ACCEL_TLS_V12 |
+ MLX5_ACCEL_TLS_AES_GCM128))) {
+ err = -ENOTSUPP;
+ goto error;
+ }
+
+ init_attr.rx_size = SBU_QP_QUEUE_SIZE;
+ init_attr.tx_size = SBU_QP_QUEUE_SIZE;
+ init_attr.recv_cb = mlx5_fpga_tls_hw_qp_recv_cb;
+ init_attr.cb_arg = fdev;
+ conn = mlx5_fpga_sbu_conn_create(fdev, &init_attr);
+ if (IS_ERR(conn)) {
+ err = PTR_ERR(conn);
+ mlx5_fpga_err(fdev, "Error creating TLS command connection %d\n",
+ err);
+ goto error;
+ }
+
+ tls->conn = conn;
+ spin_lock_init(&tls->pending_cmds_lock);
+ INIT_LIST_HEAD(&tls->pending_cmds);
+
+ idr_init(&tls->tx_idr);
+ spin_lock_init(&tls->idr_spinlock);
+ fdev->tls = tls;
+ return 0;
+
+error:
+ kfree(tls);
+ return err;
+}
+
+void mlx5_fpga_tls_cleanup(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+
+ if (!fdev || !fdev->tls)
+ return;
+
+ mlx5_fpga_sbu_conn_destroy(fdev->tls->conn);
+ kfree(fdev->tls);
+ fdev->tls = NULL;
+}
+
+static void mlx5_fpga_tls_set_aes_gcm128_ctx(void *cmd,
+ struct tls_crypto_info *info,
+ __be64 *rcd_sn)
+{
+ struct tls12_crypto_info_aes_gcm_128 *crypto_info =
+ (struct tls12_crypto_info_aes_gcm_128 *)info;
+
+ memcpy(MLX5_ADDR_OF(tls_cmd, cmd, tls_rcd_sn), crypto_info->rec_seq,
+ TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE);
+
+ memcpy(MLX5_ADDR_OF(tls_cmd, cmd, tls_implicit_iv),
+ crypto_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
+ memcpy(MLX5_ADDR_OF(tls_cmd, cmd, encryption_key),
+ crypto_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE);
+
+ /* in AES-GCM 128 we need to write the key twice */
+ memcpy(MLX5_ADDR_OF(tls_cmd, cmd, encryption_key) +
+ TLS_CIPHER_AES_GCM_128_KEY_SIZE,
+ crypto_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE);
+
+ MLX5_SET(tls_cmd, cmd, alg, MLX5_TLS_ALG_AES_GCM_128);
+}
+
+static int mlx5_fpga_tls_set_key_material(void *cmd, u32 caps,
+ struct tls_crypto_info *crypto_info)
+{
+ __be64 rcd_sn;
+
+ switch (crypto_info->cipher_type) {
+ case TLS_CIPHER_AES_GCM_128:
+ if (!(caps & MLX5_ACCEL_TLS_AES_GCM128))
+ return -EINVAL;
+ mlx5_fpga_tls_set_aes_gcm128_ctx(cmd, crypto_info, &rcd_sn);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+ struct tls_crypto_info *crypto_info, u32 swid,
+ u32 tcp_sn)
+{
+ u32 caps = mlx5_fpga_tls_device_caps(mdev);
+ struct mlx5_setup_stream_context *ctx;
+ int ret = -ENOMEM;
+ size_t cmd_size;
+ void *cmd;
+
+ cmd_size = MLX5_TLS_COMMAND_SIZE + sizeof(*ctx);
+ ctx = kzalloc(cmd_size, GFP_KERNEL);
+ if (!ctx)
+ goto out;
+
+ cmd = ctx + 1;
+ ret = mlx5_fpga_tls_set_key_material(cmd, caps, crypto_info);
+ if (ret)
+ goto free_ctx;
+
+ mlx5_fpga_tls_flow_to_cmd(flow, cmd);
+
+ MLX5_SET(tls_cmd, cmd, swid, swid);
+ MLX5_SET(tls_cmd, cmd, tcp_sn, tcp_sn);
+
+ return mlx5_fpga_tls_setup_stream_cmd(mdev, ctx);
+
+free_ctx:
+ kfree(ctx);
+out:
+ return ret;
+}
+
+int mlx5_fpga_tls_add_tx_flow(struct mlx5_core_dev *mdev, void *flow,
+ struct tls_crypto_info *crypto_info,
+ u32 start_offload_tcp_sn, u32 *p_swid)
+{
+ struct mlx5_fpga_tls *tls = mdev->fpga->tls;
+ int ret = -ENOMEM;
+ u32 swid;
+
+ ret = mlx5_fpga_tls_alloc_swid(&tls->tx_idr, &tls->idr_spinlock, flow);
+ if (ret < 0)
+ return ret;
+
+ swid = ret;
+ MLX5_SET(tls_flow, flow, direction_sx, 1);
+
+ ret = mlx5_fpga_tls_add_flow(mdev, flow, crypto_info, swid,
+ start_offload_tcp_sn);
+ if (ret && ret != -EINTR)
+ goto free_swid;
+
+ *p_swid = swid;
+ return 0;
+free_swid:
+ mlx5_fpga_tls_release_swid(&tls->tx_idr, &tls->idr_spinlock, swid);
+
+ return ret;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h
new file mode 100644
index 000000000000..800a214e4e49
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5_FPGA_TLS_H__
+#define __MLX5_FPGA_TLS_H__
+
+#include <linux/mlx5/driver.h>
+
+#include <net/tls.h>
+#include "fpga/core.h"
+
+struct mlx5_fpga_tls {
+ struct list_head pending_cmds;
+ spinlock_t pending_cmds_lock; /* Protects pending_cmds */
+ u32 caps;
+ struct mlx5_fpga_conn *conn;
+
+ struct idr tx_idr;
+ spinlock_t idr_spinlock; /* protects the IDR */
+};
+
+int mlx5_fpga_tls_add_tx_flow(struct mlx5_core_dev *mdev, void *flow,
+ struct tls_crypto_info *crypto_info,
+ u32 start_offload_tcp_sn, u32 *p_swid);
+
+void mlx5_fpga_tls_del_tx_flow(struct mlx5_core_dev *mdev, u32 swid,
+ gfp_t flags);
+
+bool mlx5_fpga_is_tls_device(struct mlx5_core_dev *mdev);
+int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev);
+void mlx5_fpga_tls_cleanup(struct mlx5_core_dev *mdev);
+
+static inline u32 mlx5_fpga_tls_device_caps(struct mlx5_core_dev *mdev)
+{
+ return mdev->fpga->tls->caps;
+}
+
+#endif /* __MLX5_FPGA_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 63a8ea31601c..b865597630ba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -60,6 +60,7 @@
#include "fpga/core.h"
#include "fpga/ipsec.h"
#include "accel/ipsec.h"
+#include "accel/tls.h"
#include "lib/clock.h"
MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
@@ -1190,6 +1191,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
goto err_ipsec_start;
}
+ err = mlx5_accel_tls_init(dev);
+ if (err) {
+ dev_err(&pdev->dev, "TLS device start failed %d\n", err);
+ goto err_tls_start;
+ }
+
err = mlx5_init_fs(dev);
if (err) {
dev_err(&pdev->dev, "Failed to init flow steering\n");
@@ -1231,6 +1238,9 @@ err_sriov:
mlx5_cleanup_fs(dev);
err_fs:
+ mlx5_accel_tls_cleanup(dev);
+
+err_tls_start:
mlx5_accel_ipsec_cleanup(dev);
err_ipsec_start:
@@ -1306,6 +1316,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
mlx5_sriov_detach(dev);
mlx5_cleanup_fs(dev);
mlx5_accel_ipsec_cleanup(dev);
+ mlx5_accel_tls_cleanup(dev);
mlx5_fpga_device_stop(dev);
mlx5_irq_clear_affinity_hints(dev);
free_comp_eqs(dev);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
index 479511cf79bc..8da91b023b13 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/cmd.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
@@ -424,10 +424,15 @@ MLXSW_ITEM32(cmd_mbox, query_aq_cap, log_max_rdq_sz, 0x04, 24, 8);
MLXSW_ITEM32(cmd_mbox, query_aq_cap, max_num_rdqs, 0x04, 0, 8);
/* cmd_mbox_query_aq_cap_log_max_cq_sz
- * Log (base 2) of max CQEs allowed on CQ.
+ * Log (base 2) of the Maximum CQEs allowed in a CQ for CQEv0 and CQEv1.
*/
MLXSW_ITEM32(cmd_mbox, query_aq_cap, log_max_cq_sz, 0x08, 24, 8);
+/* cmd_mbox_query_aq_cap_log_max_cqv2_sz
+ * Log (base 2) of the Maximum CQEs allowed in a CQ for CQEv2.
+ */
+MLXSW_ITEM32(cmd_mbox, query_aq_cap, log_max_cqv2_sz, 0x08, 16, 8);
+
/* cmd_mbox_query_aq_cap_max_num_cqs
* Maximum number of CQs.
*/
@@ -662,6 +667,12 @@ MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_hash_single_size, 0x0C, 25, 1);
*/
MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_hash_double_size, 0x0C, 26, 1);
+/* cmd_mbox_config_set_cqe_version
+ * Capability bit. Setting a bit to 1 configures the profile
+ * according to the mailbox contents.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, set_cqe_version, 0x08, 0, 1);
+
/* cmd_mbox_config_profile_max_vepa_channels
* Maximum number of VEPA channels per port (0 through 16)
* 0 - multi-channel VEPA is disabled
@@ -841,6 +852,14 @@ MLXSW_ITEM32_INDEXED(cmd_mbox, config_profile, swid_config_type,
MLXSW_ITEM32_INDEXED(cmd_mbox, config_profile, swid_config_properties,
0x60, 0, 8, 0x08, 0x00, false);
+/* cmd_mbox_config_profile_cqe_version
+ * CQE version:
+ * 0: CQE version is 0
+ * 1: CQE version is either 1 or 2
+ * CQE ver 1 or 2 is configured by Completion Queue Context field cqe_ver.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, cqe_version, 0xB0, 0, 8);
+
/* ACCESS_REG - Access EMAD Supported Register
* ----------------------------------
* OpMod == 0 (N/A), INMmod == 0 (N/A)
@@ -1032,11 +1051,15 @@ static inline int mlxsw_cmd_sw2hw_cq(struct mlxsw_core *mlxsw_core,
0, cq_number, in_mbox, MLXSW_CMD_MBOX_SIZE);
}
-/* cmd_mbox_sw2hw_cq_cv
+enum mlxsw_cmd_mbox_sw2hw_cq_cqe_ver {
+ MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_1,
+ MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_2,
+};
+
+/* cmd_mbox_sw2hw_cq_cqe_ver
* CQE Version.
- * 0 - CQE Version 0, 1 - CQE Version 1
*/
-MLXSW_ITEM32(cmd_mbox, sw2hw_cq, cv, 0x00, 28, 4);
+MLXSW_ITEM32(cmd_mbox, sw2hw_cq, cqe_ver, 0x00, 28, 4);
/* cmd_mbox_sw2hw_cq_c_eqn
* Event Queue this CQ reports completion events to.
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 3a9381977d6d..db794a1a3a7e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -117,6 +117,7 @@ struct mlxsw_pci_queue {
struct {
u32 comp_sdq_count;
u32 comp_rdq_count;
+ enum mlxsw_pci_cqe_v v;
} cq;
struct {
u32 ev_cmd_count;
@@ -155,6 +156,8 @@ struct mlxsw_pci {
} cmd;
struct mlxsw_bus_info bus_info;
const struct pci_device_id *id;
+ enum mlxsw_pci_cqe_v max_cqe_ver; /* Maximal supported CQE version */
+ u8 num_sdq_cqs; /* Number of CQs used for SDQs */
};
static void mlxsw_pci_queue_tasklet_schedule(struct mlxsw_pci_queue *q)
@@ -202,24 +205,6 @@ static bool mlxsw_pci_elem_hw_owned(struct mlxsw_pci_queue *q, bool owner_bit)
return owner_bit != !!(q->consumer_counter & q->count);
}
-static char *
-mlxsw_pci_queue_sw_elem_get(struct mlxsw_pci_queue *q,
- u32 (*get_elem_owner_func)(const char *))
-{
- struct mlxsw_pci_queue_elem_info *elem_info;
- char *elem;
- bool owner_bit;
-
- elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
- elem = elem_info->elem;
- owner_bit = get_elem_owner_func(elem);
- if (mlxsw_pci_elem_hw_owned(q, owner_bit))
- return NULL;
- q->consumer_counter++;
- rmb(); /* make sure we read owned bit before the rest of elem */
- return elem;
-}
-
static struct mlxsw_pci_queue_type_group *
mlxsw_pci_queue_type_group_get(struct mlxsw_pci *mlxsw_pci,
enum mlxsw_pci_queue_type q_type)
@@ -494,6 +479,17 @@ static void mlxsw_pci_rdq_fini(struct mlxsw_pci *mlxsw_pci,
}
}
+static void mlxsw_pci_cq_pre_init(struct mlxsw_pci *mlxsw_pci,
+ struct mlxsw_pci_queue *q)
+{
+ q->u.cq.v = mlxsw_pci->max_cqe_ver;
+
+ /* For SDQ it is pointless to use CQEv2, so use CQEv1 instead */
+ if (q->u.cq.v == MLXSW_PCI_CQE_V2 &&
+ q->num < mlxsw_pci->num_sdq_cqs)
+ q->u.cq.v = MLXSW_PCI_CQE_V1;
+}
+
static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
struct mlxsw_pci_queue *q)
{
@@ -505,10 +501,16 @@ static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
for (i = 0; i < q->count; i++) {
char *elem = mlxsw_pci_queue_elem_get(q, i);
- mlxsw_pci_cqe_owner_set(elem, 1);
+ mlxsw_pci_cqe_owner_set(q->u.cq.v, elem, 1);
}
- mlxsw_cmd_mbox_sw2hw_cq_cv_set(mbox, 0); /* CQE ver 0 */
+ if (q->u.cq.v == MLXSW_PCI_CQE_V1)
+ mlxsw_cmd_mbox_sw2hw_cq_cqe_ver_set(mbox,
+ MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_1);
+ else if (q->u.cq.v == MLXSW_PCI_CQE_V2)
+ mlxsw_cmd_mbox_sw2hw_cq_cqe_ver_set(mbox,
+ MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_2);
+
mlxsw_cmd_mbox_sw2hw_cq_c_eqn_set(mbox, MLXSW_PCI_EQ_COMP_NUM);
mlxsw_cmd_mbox_sw2hw_cq_st_set(mbox, 0);
mlxsw_cmd_mbox_sw2hw_cq_log_cq_size_set(mbox, ilog2(q->count));
@@ -559,7 +561,7 @@ static void mlxsw_pci_cqe_sdq_handle(struct mlxsw_pci *mlxsw_pci,
static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
struct mlxsw_pci_queue *q,
u16 consumer_counter_limit,
- char *cqe)
+ enum mlxsw_pci_cqe_v cqe_v, char *cqe)
{
struct pci_dev *pdev = mlxsw_pci->pdev;
struct mlxsw_pci_queue_elem_info *elem_info;
@@ -579,10 +581,11 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
if (q->consumer_counter++ != consumer_counter_limit)
dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in RDQ\n");
- if (mlxsw_pci_cqe_lag_get(cqe)) {
+ if (mlxsw_pci_cqe_lag_get(cqe_v, cqe)) {
rx_info.is_lag = true;
- rx_info.u.lag_id = mlxsw_pci_cqe_lag_id_get(cqe);
- rx_info.lag_port_index = mlxsw_pci_cqe_lag_port_index_get(cqe);
+ rx_info.u.lag_id = mlxsw_pci_cqe_lag_id_get(cqe_v, cqe);
+ rx_info.lag_port_index =
+ mlxsw_pci_cqe_lag_subport_get(cqe_v, cqe);
} else {
rx_info.is_lag = false;
rx_info.u.sys_port = mlxsw_pci_cqe_system_port_get(cqe);
@@ -591,7 +594,7 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
rx_info.trap_id = mlxsw_pci_cqe_trap_id_get(cqe);
byte_count = mlxsw_pci_cqe_byte_count_get(cqe);
- if (mlxsw_pci_cqe_crc_get(cqe))
+ if (mlxsw_pci_cqe_crc_get(cqe_v, cqe))
byte_count -= ETH_FCS_LEN;
skb_put(skb, byte_count);
mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info);
@@ -608,7 +611,18 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
static char *mlxsw_pci_cq_sw_cqe_get(struct mlxsw_pci_queue *q)
{
- return mlxsw_pci_queue_sw_elem_get(q, mlxsw_pci_cqe_owner_get);
+ struct mlxsw_pci_queue_elem_info *elem_info;
+ char *elem;
+ bool owner_bit;
+
+ elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
+ elem = elem_info->elem;
+ owner_bit = mlxsw_pci_cqe_owner_get(q->u.cq.v, elem);
+ if (mlxsw_pci_elem_hw_owned(q, owner_bit))
+ return NULL;
+ q->consumer_counter++;
+ rmb(); /* make sure we read owned bit before the rest of elem */
+ return elem;
}
static void mlxsw_pci_cq_tasklet(unsigned long data)
@@ -621,8 +635,8 @@ static void mlxsw_pci_cq_tasklet(unsigned long data)
while ((cqe = mlxsw_pci_cq_sw_cqe_get(q))) {
u16 wqe_counter = mlxsw_pci_cqe_wqe_counter_get(cqe);
- u8 sendq = mlxsw_pci_cqe_sr_get(cqe);
- u8 dqn = mlxsw_pci_cqe_dqn_get(cqe);
+ u8 sendq = mlxsw_pci_cqe_sr_get(q->u.cq.v, cqe);
+ u8 dqn = mlxsw_pci_cqe_dqn_get(q->u.cq.v, cqe);
if (sendq) {
struct mlxsw_pci_queue *sdq;
@@ -636,7 +650,7 @@ static void mlxsw_pci_cq_tasklet(unsigned long data)
rdq = mlxsw_pci_rdq_get(mlxsw_pci, dqn);
mlxsw_pci_cqe_rdq_handle(mlxsw_pci, rdq,
- wqe_counter, cqe);
+ wqe_counter, q->u.cq.v, cqe);
q->u.cq.comp_rdq_count++;
}
if (++items == credits)
@@ -648,6 +662,18 @@ static void mlxsw_pci_cq_tasklet(unsigned long data)
}
}
+static u16 mlxsw_pci_cq_elem_count(const struct mlxsw_pci_queue *q)
+{
+ return q->u.cq.v == MLXSW_PCI_CQE_V2 ? MLXSW_PCI_CQE2_COUNT :
+ MLXSW_PCI_CQE01_COUNT;
+}
+
+static u8 mlxsw_pci_cq_elem_size(const struct mlxsw_pci_queue *q)
+{
+ return q->u.cq.v == MLXSW_PCI_CQE_V2 ? MLXSW_PCI_CQE2_SIZE :
+ MLXSW_PCI_CQE01_SIZE;
+}
+
static int mlxsw_pci_eq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
struct mlxsw_pci_queue *q)
{
@@ -696,7 +722,18 @@ static void mlxsw_pci_eq_cmd_event(struct mlxsw_pci *mlxsw_pci, char *eqe)
static char *mlxsw_pci_eq_sw_eqe_get(struct mlxsw_pci_queue *q)
{
- return mlxsw_pci_queue_sw_elem_get(q, mlxsw_pci_eqe_owner_get);
+ struct mlxsw_pci_queue_elem_info *elem_info;
+ char *elem;
+ bool owner_bit;
+
+ elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
+ elem = elem_info->elem;
+ owner_bit = mlxsw_pci_eqe_owner_get(elem);
+ if (mlxsw_pci_elem_hw_owned(q, owner_bit))
+ return NULL;
+ q->consumer_counter++;
+ rmb(); /* make sure we read owned bit before the rest of elem */
+ return elem;
}
static void mlxsw_pci_eq_tasklet(unsigned long data)
@@ -749,11 +786,15 @@ static void mlxsw_pci_eq_tasklet(unsigned long data)
struct mlxsw_pci_queue_ops {
const char *name;
enum mlxsw_pci_queue_type type;
+ void (*pre_init)(struct mlxsw_pci *mlxsw_pci,
+ struct mlxsw_pci_queue *q);
int (*init)(struct mlxsw_pci *mlxsw_pci, char *mbox,
struct mlxsw_pci_queue *q);
void (*fini)(struct mlxsw_pci *mlxsw_pci,
struct mlxsw_pci_queue *q);
void (*tasklet)(unsigned long data);
+ u16 (*elem_count_f)(const struct mlxsw_pci_queue *q);
+ u8 (*elem_size_f)(const struct mlxsw_pci_queue *q);
u16 elem_count;
u8 elem_size;
};
@@ -776,11 +817,12 @@ static const struct mlxsw_pci_queue_ops mlxsw_pci_rdq_ops = {
static const struct mlxsw_pci_queue_ops mlxsw_pci_cq_ops = {
.type = MLXSW_PCI_QUEUE_TYPE_CQ,
+ .pre_init = mlxsw_pci_cq_pre_init,
.init = mlxsw_pci_cq_init,
.fini = mlxsw_pci_cq_fini,
.tasklet = mlxsw_pci_cq_tasklet,
- .elem_count = MLXSW_PCI_CQE_COUNT,
- .elem_size = MLXSW_PCI_CQE_SIZE
+ .elem_count_f = mlxsw_pci_cq_elem_count,
+ .elem_size_f = mlxsw_pci_cq_elem_size
};
static const struct mlxsw_pci_queue_ops mlxsw_pci_eq_ops = {
@@ -800,10 +842,15 @@ static int mlxsw_pci_queue_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
int i;
int err;
- spin_lock_init(&q->lock);
q->num = q_num;
- q->count = q_ops->elem_count;
- q->elem_size = q_ops->elem_size;
+ if (q_ops->pre_init)
+ q_ops->pre_init(mlxsw_pci, q);
+
+ spin_lock_init(&q->lock);
+ q->count = q_ops->elem_count_f ? q_ops->elem_count_f(q) :
+ q_ops->elem_count;
+ q->elem_size = q_ops->elem_size_f ? q_ops->elem_size_f(q) :
+ q_ops->elem_size;
q->type = q_ops->type;
q->pci = mlxsw_pci;
@@ -832,7 +879,7 @@ static int mlxsw_pci_queue_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
elem_info = mlxsw_pci_queue_elem_info_get(q, i);
elem_info->elem =
- __mlxsw_pci_queue_elem_get(q, q_ops->elem_size, i);
+ __mlxsw_pci_queue_elem_get(q, q->elem_size, i);
}
mlxsw_cmd_mbox_zero(mbox);
@@ -912,6 +959,7 @@ static int mlxsw_pci_aqs_init(struct mlxsw_pci *mlxsw_pci, char *mbox)
u8 rdq_log2sz;
u8 num_cqs;
u8 cq_log2sz;
+ u8 cqv2_log2sz;
u8 num_eqs;
u8 eq_log2sz;
int err;
@@ -927,6 +975,7 @@ static int mlxsw_pci_aqs_init(struct mlxsw_pci *mlxsw_pci, char *mbox)
rdq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_rdq_sz_get(mbox);
num_cqs = mlxsw_cmd_mbox_query_aq_cap_max_num_cqs_get(mbox);
cq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_cq_sz_get(mbox);
+ cqv2_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_cqv2_sz_get(mbox);
num_eqs = mlxsw_cmd_mbox_query_aq_cap_max_num_eqs_get(mbox);
eq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_eq_sz_get(mbox);
@@ -938,12 +987,16 @@ static int mlxsw_pci_aqs_init(struct mlxsw_pci *mlxsw_pci, char *mbox)
if ((1 << sdq_log2sz != MLXSW_PCI_WQE_COUNT) ||
(1 << rdq_log2sz != MLXSW_PCI_WQE_COUNT) ||
- (1 << cq_log2sz != MLXSW_PCI_CQE_COUNT) ||
+ (1 << cq_log2sz != MLXSW_PCI_CQE01_COUNT) ||
+ (mlxsw_pci->max_cqe_ver == MLXSW_PCI_CQE_V2 &&
+ (1 << cqv2_log2sz != MLXSW_PCI_CQE2_COUNT)) ||
(1 << eq_log2sz != MLXSW_PCI_EQE_COUNT)) {
dev_err(&pdev->dev, "Unsupported number of async queue descriptors\n");
return -EINVAL;
}
+ mlxsw_pci->num_sdq_cqs = num_sdqs;
+
err = mlxsw_pci_queue_group_init(mlxsw_pci, mbox, &mlxsw_pci_eq_ops,
num_eqs);
if (err) {
@@ -1184,6 +1237,11 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox,
mlxsw_pci_config_profile_swid_config(mlxsw_pci, mbox, i,
&profile->swid_config[i]);
+ if (mlxsw_pci->max_cqe_ver > MLXSW_PCI_CQE_V0) {
+ mlxsw_cmd_mbox_config_profile_set_cqe_version_set(mbox, 1);
+ mlxsw_cmd_mbox_config_profile_cqe_version_set(mbox, 1);
+ }
+
return mlxsw_cmd_config_profile_set(mlxsw_pci->core, mbox);
}
@@ -1378,6 +1436,21 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
if (err)
goto err_query_resources;
+ if (MLXSW_CORE_RES_VALID(mlxsw_core, CQE_V2) &&
+ MLXSW_CORE_RES_GET(mlxsw_core, CQE_V2))
+ mlxsw_pci->max_cqe_ver = MLXSW_PCI_CQE_V2;
+ else if (MLXSW_CORE_RES_VALID(mlxsw_core, CQE_V1) &&
+ MLXSW_CORE_RES_GET(mlxsw_core, CQE_V1))
+ mlxsw_pci->max_cqe_ver = MLXSW_PCI_CQE_V1;
+ else if ((MLXSW_CORE_RES_VALID(mlxsw_core, CQE_V0) &&
+ MLXSW_CORE_RES_GET(mlxsw_core, CQE_V0)) ||
+ !MLXSW_CORE_RES_VALID(mlxsw_core, CQE_V0)) {
+ mlxsw_pci->max_cqe_ver = MLXSW_PCI_CQE_V0;
+ } else {
+ dev_err(&pdev->dev, "Invalid supported CQE version combination reported\n");
+ goto err_cqe_v_check;
+ }
+
err = mlxsw_pci_config_profile(mlxsw_pci, mbox, profile, res);
if (err)
goto err_config_profile;
@@ -1400,6 +1473,7 @@ err_request_eq_irq:
mlxsw_pci_aqs_fini(mlxsw_pci);
err_aqs_init:
err_config_profile:
+err_cqe_v_check:
err_query_resources:
err_boardinfo:
mlxsw_pci_fw_area_fini(mlxsw_pci);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
index fb082ad21b00..963155f6a17a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
@@ -82,10 +82,12 @@
#define MLXSW_PCI_AQ_PAGES 8
#define MLXSW_PCI_AQ_SIZE (MLXSW_PCI_PAGE_SIZE * MLXSW_PCI_AQ_PAGES)
#define MLXSW_PCI_WQE_SIZE 32 /* 32 bytes per element */
-#define MLXSW_PCI_CQE_SIZE 16 /* 16 bytes per element */
+#define MLXSW_PCI_CQE01_SIZE 16 /* 16 bytes per element */
+#define MLXSW_PCI_CQE2_SIZE 32 /* 32 bytes per element */
#define MLXSW_PCI_EQE_SIZE 16 /* 16 bytes per element */
#define MLXSW_PCI_WQE_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_WQE_SIZE)
-#define MLXSW_PCI_CQE_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_CQE_SIZE)
+#define MLXSW_PCI_CQE01_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_CQE01_SIZE)
+#define MLXSW_PCI_CQE2_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_CQE2_SIZE)
#define MLXSW_PCI_EQE_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_EQE_SIZE)
#define MLXSW_PCI_EQE_UPDATE_COUNT 0x80
@@ -126,10 +128,48 @@ MLXSW_ITEM16_INDEXED(pci, wqe, byte_count, 0x02, 0, 14, 0x02, 0x00, false);
*/
MLXSW_ITEM64_INDEXED(pci, wqe, address, 0x08, 0, 64, 0x8, 0x0, false);
+enum mlxsw_pci_cqe_v {
+ MLXSW_PCI_CQE_V0,
+ MLXSW_PCI_CQE_V1,
+ MLXSW_PCI_CQE_V2,
+};
+
+#define mlxsw_pci_cqe_item_helpers(name, v0, v1, v2) \
+static inline u32 mlxsw_pci_cqe_##name##_get(enum mlxsw_pci_cqe_v v, char *cqe) \
+{ \
+ switch (v) { \
+ default: \
+ case MLXSW_PCI_CQE_V0: \
+ return mlxsw_pci_cqe##v0##_##name##_get(cqe); \
+ case MLXSW_PCI_CQE_V1: \
+ return mlxsw_pci_cqe##v1##_##name##_get(cqe); \
+ case MLXSW_PCI_CQE_V2: \
+ return mlxsw_pci_cqe##v2##_##name##_get(cqe); \
+ } \
+} \
+static inline void mlxsw_pci_cqe_##name##_set(enum mlxsw_pci_cqe_v v, \
+ char *cqe, u32 val) \
+{ \
+ switch (v) { \
+ default: \
+ case MLXSW_PCI_CQE_V0: \
+ mlxsw_pci_cqe##v0##_##name##_set(cqe, val); \
+ break; \
+ case MLXSW_PCI_CQE_V1: \
+ mlxsw_pci_cqe##v1##_##name##_set(cqe, val); \
+ break; \
+ case MLXSW_PCI_CQE_V2: \
+ mlxsw_pci_cqe##v2##_##name##_set(cqe, val); \
+ break; \
+ } \
+}
+
/* pci_cqe_lag
* Packet arrives from a port which is a LAG
*/
-MLXSW_ITEM32(pci, cqe, lag, 0x00, 23, 1);
+MLXSW_ITEM32(pci, cqe0, lag, 0x00, 23, 1);
+MLXSW_ITEM32(pci, cqe12, lag, 0x00, 24, 1);
+mlxsw_pci_cqe_item_helpers(lag, 0, 12, 12);
/* pci_cqe_system_port/lag_id
* When lag=0: System port on which the packet was received
@@ -138,8 +178,12 @@ MLXSW_ITEM32(pci, cqe, lag, 0x00, 23, 1);
* bits [3:0] sub_port on which the packet was received
*/
MLXSW_ITEM32(pci, cqe, system_port, 0x00, 0, 16);
-MLXSW_ITEM32(pci, cqe, lag_id, 0x00, 4, 12);
-MLXSW_ITEM32(pci, cqe, lag_port_index, 0x00, 0, 4);
+MLXSW_ITEM32(pci, cqe0, lag_id, 0x00, 4, 12);
+MLXSW_ITEM32(pci, cqe12, lag_id, 0x00, 0, 16);
+mlxsw_pci_cqe_item_helpers(lag_id, 0, 12, 12);
+MLXSW_ITEM32(pci, cqe0, lag_subport, 0x00, 0, 4);
+MLXSW_ITEM32(pci, cqe12, lag_subport, 0x00, 16, 8);
+mlxsw_pci_cqe_item_helpers(lag_subport, 0, 12, 12);
/* pci_cqe_wqe_counter
* WQE count of the WQEs completed on the associated dqn
@@ -162,28 +206,38 @@ MLXSW_ITEM32(pci, cqe, trap_id, 0x08, 0, 9);
* Length include CRC. Indicates the length field includes
* the packet's CRC.
*/
-MLXSW_ITEM32(pci, cqe, crc, 0x0C, 8, 1);
+MLXSW_ITEM32(pci, cqe0, crc, 0x0C, 8, 1);
+MLXSW_ITEM32(pci, cqe12, crc, 0x0C, 9, 1);
+mlxsw_pci_cqe_item_helpers(crc, 0, 12, 12);
/* pci_cqe_e
* CQE with Error.
*/
-MLXSW_ITEM32(pci, cqe, e, 0x0C, 7, 1);
+MLXSW_ITEM32(pci, cqe0, e, 0x0C, 7, 1);
+MLXSW_ITEM32(pci, cqe12, e, 0x00, 27, 1);
+mlxsw_pci_cqe_item_helpers(e, 0, 12, 12);
/* pci_cqe_sr
* 1 - Send Queue
* 0 - Receive Queue
*/
-MLXSW_ITEM32(pci, cqe, sr, 0x0C, 6, 1);
+MLXSW_ITEM32(pci, cqe0, sr, 0x0C, 6, 1);
+MLXSW_ITEM32(pci, cqe12, sr, 0x00, 26, 1);
+mlxsw_pci_cqe_item_helpers(sr, 0, 12, 12);
/* pci_cqe_dqn
* Descriptor Queue (DQ) Number.
*/
-MLXSW_ITEM32(pci, cqe, dqn, 0x0C, 1, 5);
+MLXSW_ITEM32(pci, cqe0, dqn, 0x0C, 1, 5);
+MLXSW_ITEM32(pci, cqe12, dqn, 0x0C, 1, 6);
+mlxsw_pci_cqe_item_helpers(dqn, 0, 12, 12);
/* pci_cqe_owner
* Ownership bit.
*/
-MLXSW_ITEM32(pci, cqe, owner, 0x0C, 0, 1);
+MLXSW_ITEM32(pci, cqe01, owner, 0x0C, 0, 1);
+MLXSW_ITEM32(pci, cqe2, owner, 0x1C, 0, 1);
+mlxsw_pci_cqe_item_helpers(owner, 01, 01, 2);
/* pci_eqe_event_type
* Event type.
diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h
index 087aad52c195..fd9299ccec72 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/resources.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h
@@ -43,6 +43,9 @@ enum mlxsw_res_id {
MLXSW_RES_ID_KVD_SINGLE_MIN_SIZE,
MLXSW_RES_ID_KVD_DOUBLE_MIN_SIZE,
MLXSW_RES_ID_MAX_TRAP_GROUPS,
+ MLXSW_RES_ID_CQE_V0,
+ MLXSW_RES_ID_CQE_V1,
+ MLXSW_RES_ID_CQE_V2,
MLXSW_RES_ID_COUNTER_POOL_SIZE,
MLXSW_RES_ID_MAX_SPAN,
MLXSW_RES_ID_COUNTER_SIZE_PACKETS_BYTES,
@@ -81,6 +84,9 @@ static u16 mlxsw_res_ids[] = {
[MLXSW_RES_ID_KVD_SINGLE_MIN_SIZE] = 0x1002,
[MLXSW_RES_ID_KVD_DOUBLE_MIN_SIZE] = 0x1003,
[MLXSW_RES_ID_MAX_TRAP_GROUPS] = 0x2201,
+ [MLXSW_RES_ID_CQE_V0] = 0x2210,
+ [MLXSW_RES_ID_CQE_V1] = 0x2211,
+ [MLXSW_RES_ID_CQE_V2] = 0x2212,
[MLXSW_RES_ID_COUNTER_POOL_SIZE] = 0x2410,
[MLXSW_RES_ID_MAX_SPAN] = 0x2420,
[MLXSW_RES_ID_COUNTER_SIZE_PACKETS_BYTES] = 0x2443,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index ca38a30fbe91..94132f6cec61 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -441,29 +441,29 @@ static void mlxsw_sp_txhdr_construct(struct sk_buff *skb,
mlxsw_tx_hdr_type_set(txhdr, MLXSW_TXHDR_TYPE_CONTROL);
}
-int mlxsw_sp_port_vid_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid,
- u8 state)
+enum mlxsw_reg_spms_state mlxsw_sp_stp_spms_state(u8 state)
{
- struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
- enum mlxsw_reg_spms_state spms_state;
- char *spms_pl;
- int err;
-
switch (state) {
case BR_STATE_FORWARDING:
- spms_state = MLXSW_REG_SPMS_STATE_FORWARDING;
- break;
+ return MLXSW_REG_SPMS_STATE_FORWARDING;
case BR_STATE_LEARNING:
- spms_state = MLXSW_REG_SPMS_STATE_LEARNING;
- break;
+ return MLXSW_REG_SPMS_STATE_LEARNING;
case BR_STATE_LISTENING: /* fall-through */
case BR_STATE_DISABLED: /* fall-through */
case BR_STATE_BLOCKING:
- spms_state = MLXSW_REG_SPMS_STATE_DISCARDING;
- break;
+ return MLXSW_REG_SPMS_STATE_DISCARDING;
default:
BUG();
}
+}
+
+int mlxsw_sp_port_vid_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid,
+ u8 state)
+{
+ enum mlxsw_reg_spms_state spms_state = mlxsw_sp_stp_spms_state(state);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char *spms_pl;
+ int err;
spms_pl = kmalloc(MLXSW_REG_SPMS_LEN, GFP_KERNEL);
if (!spms_pl)
@@ -3666,6 +3666,15 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
goto err_lag_init;
}
+ /* Initialize SPAN before router and switchdev, so that those components
+ * can call mlxsw_sp_span_respin().
+ */
+ err = mlxsw_sp_span_init(mlxsw_sp);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n");
+ goto err_span_init;
+ }
+
err = mlxsw_sp_switchdev_init(mlxsw_sp);
if (err) {
dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize switchdev\n");
@@ -3684,15 +3693,6 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
goto err_afa_init;
}
- err = mlxsw_sp_span_init(mlxsw_sp);
- if (err) {
- dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n");
- goto err_span_init;
- }
-
- /* Initialize router after SPAN is initialized, so that the FIB and
- * neighbor event handlers can issue SPAN respin.
- */
err = mlxsw_sp_router_init(mlxsw_sp);
if (err) {
dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n");
@@ -3739,14 +3739,14 @@ err_acl_init:
err_netdev_notifier:
mlxsw_sp_router_fini(mlxsw_sp);
err_router_init:
- mlxsw_sp_span_fini(mlxsw_sp);
-err_span_init:
mlxsw_sp_afa_fini(mlxsw_sp);
err_afa_init:
mlxsw_sp_counter_pool_fini(mlxsw_sp);
err_counter_pool_init:
mlxsw_sp_switchdev_fini(mlxsw_sp);
err_switchdev_init:
+ mlxsw_sp_span_fini(mlxsw_sp);
+err_span_init:
mlxsw_sp_lag_fini(mlxsw_sp);
err_lag_init:
mlxsw_sp_buffers_fini(mlxsw_sp);
@@ -3768,10 +3768,10 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
mlxsw_sp_acl_fini(mlxsw_sp);
unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
mlxsw_sp_router_fini(mlxsw_sp);
- mlxsw_sp_span_fini(mlxsw_sp);
mlxsw_sp_afa_fini(mlxsw_sp);
mlxsw_sp_counter_pool_fini(mlxsw_sp);
mlxsw_sp_switchdev_fini(mlxsw_sp);
+ mlxsw_sp_span_fini(mlxsw_sp);
mlxsw_sp_lag_fini(mlxsw_sp);
mlxsw_sp_buffers_fini(mlxsw_sp);
mlxsw_sp_traps_fini(mlxsw_sp);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 804d4d2c8031..4a519d8edec8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -364,6 +364,7 @@ int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu,
int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port,
enum mlxsw_reg_qeec_hr hr, u8 index,
u8 next_index, u32 maxrate);
+enum mlxsw_reg_spms_state mlxsw_sp_stp_spms_state(u8 stp_state);
int mlxsw_sp_port_vid_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid,
u8 state);
int mlxsw_sp_port_vp_mode_set(struct mlxsw_sp_port *mlxsw_sp_port, bool enable);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 1904c0323d39..8028d221aece 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -442,7 +442,7 @@ struct mlxsw_sp_fib6_entry {
struct mlxsw_sp_rt6 {
struct list_head list;
- struct rt6_info *rt;
+ struct fib6_info *rt;
};
struct mlxsw_sp_lpm_tree {
@@ -2770,9 +2770,9 @@ mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
struct in6_addr *gw;
int ifindex, weight;
- ifindex = mlxsw_sp_rt6->rt->dst.dev->ifindex;
- weight = mlxsw_sp_rt6->rt->rt6i_nh_weight;
- gw = &mlxsw_sp_rt6->rt->rt6i_gateway;
+ ifindex = mlxsw_sp_rt6->rt->fib6_nh.nh_dev->ifindex;
+ weight = mlxsw_sp_rt6->rt->fib6_nh.nh_weight;
+ gw = &mlxsw_sp_rt6->rt->fib6_nh.nh_gw;
if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
weight))
return false;
@@ -2838,7 +2838,7 @@ mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
struct net_device *dev;
list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
- dev = mlxsw_sp_rt6->rt->dst.dev;
+ dev = mlxsw_sp_rt6->rt->fib6_nh.nh_dev;
val ^= dev->ifindex;
}
@@ -3834,11 +3834,11 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
for (i = 0; i < nh_grp->count; i++) {
struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
- struct rt6_info *rt = mlxsw_sp_rt6->rt;
+ struct fib6_info *rt = mlxsw_sp_rt6->rt;
- if (nh->rif && nh->rif->dev == rt->dst.dev &&
+ if (nh->rif && nh->rif->dev == rt->fib6_nh.nh_dev &&
ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
- &rt->rt6i_gateway))
+ &rt->fib6_nh.nh_gw))
return nh;
continue;
}
@@ -3895,7 +3895,7 @@ mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
- list)->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
+ list)->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
return;
}
@@ -3905,9 +3905,9 @@ mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
if (nh && nh->offloaded)
- mlxsw_sp_rt6->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
+ mlxsw_sp_rt6->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
else
- mlxsw_sp_rt6->rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
+ mlxsw_sp_rt6->rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
}
}
@@ -3920,9 +3920,9 @@ mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
common);
list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
- struct rt6_info *rt = mlxsw_sp_rt6->rt;
+ struct fib6_info *rt = mlxsw_sp_rt6->rt;
- rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
+ rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
}
}
@@ -4699,29 +4699,29 @@ static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
}
-static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt)
+static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
{
/* Packets with link-local destination IP arriving to the router
* are trapped to the CPU, so no need to program specific routes
* for them.
*/
- if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LINKLOCAL)
+ if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_LINKLOCAL)
return true;
/* Multicast routes aren't supported, so ignore them. Neighbour
* Discovery packets are specifically trapped.
*/
- if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_MULTICAST)
+ if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
return true;
/* Cloned routes are irrelevant in the forwarding path. */
- if (rt->rt6i_flags & RTF_CACHE)
+ if (rt->fib6_flags & RTF_CACHE)
return true;
return false;
}
-static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt)
+static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
{
struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
@@ -4734,18 +4734,18 @@ static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt)
* memory.
*/
mlxsw_sp_rt6->rt = rt;
- rt6_hold(rt);
+ fib6_info_hold(rt);
return mlxsw_sp_rt6;
}
#if IS_ENABLED(CONFIG_IPV6)
-static void mlxsw_sp_rt6_release(struct rt6_info *rt)
+static void mlxsw_sp_rt6_release(struct fib6_info *rt)
{
- rt6_release(rt);
+ fib6_info_release(rt);
}
#else
-static void mlxsw_sp_rt6_release(struct rt6_info *rt)
+static void mlxsw_sp_rt6_release(struct fib6_info *rt)
{
}
#endif
@@ -4756,13 +4756,13 @@ static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
kfree(mlxsw_sp_rt6);
}
-static bool mlxsw_sp_fib6_rt_can_mp(const struct rt6_info *rt)
+static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
{
/* RTF_CACHE routes are ignored */
- return (rt->rt6i_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
+ return (rt->fib6_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
}
-static struct rt6_info *
+static struct fib6_info *
mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
{
return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
@@ -4771,7 +4771,7 @@ mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
static struct mlxsw_sp_fib6_entry *
mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
- const struct rt6_info *nrt, bool replace)
+ const struct fib6_info *nrt, bool replace)
{
struct mlxsw_sp_fib6_entry *fib6_entry;
@@ -4779,21 +4779,21 @@ mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
return NULL;
list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
- struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
+ struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
/* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
* virtual router.
*/
- if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
+ if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
continue;
- if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
+ if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
break;
- if (rt->rt6i_metric < nrt->rt6i_metric)
+ if (rt->fib6_metric < nrt->fib6_metric)
continue;
- if (rt->rt6i_metric == nrt->rt6i_metric &&
+ if (rt->fib6_metric == nrt->fib6_metric &&
mlxsw_sp_fib6_rt_can_mp(rt))
return fib6_entry;
- if (rt->rt6i_metric > nrt->rt6i_metric)
+ if (rt->fib6_metric > nrt->fib6_metric)
break;
}
@@ -4802,7 +4802,7 @@ mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
static struct mlxsw_sp_rt6 *
mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
- const struct rt6_info *rt)
+ const struct fib6_info *rt)
{
struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
@@ -4815,21 +4815,21 @@ mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
}
static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
- const struct rt6_info *rt,
+ const struct fib6_info *rt,
enum mlxsw_sp_ipip_type *ret)
{
- return rt->dst.dev &&
- mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->dst.dev, ret);
+ return rt->fib6_nh.nh_dev &&
+ mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.nh_dev, ret);
}
static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group *nh_grp,
struct mlxsw_sp_nexthop *nh,
- const struct rt6_info *rt)
+ const struct fib6_info *rt)
{
const struct mlxsw_sp_ipip_ops *ipip_ops;
struct mlxsw_sp_ipip_entry *ipip_entry;
- struct net_device *dev = rt->dst.dev;
+ struct net_device *dev = rt->fib6_nh.nh_dev;
struct mlxsw_sp_rif *rif;
int err;
@@ -4870,13 +4870,13 @@ static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group *nh_grp,
struct mlxsw_sp_nexthop *nh,
- const struct rt6_info *rt)
+ const struct fib6_info *rt)
{
- struct net_device *dev = rt->dst.dev;
+ struct net_device *dev = rt->fib6_nh.nh_dev;
nh->nh_grp = nh_grp;
- nh->nh_weight = rt->rt6i_nh_weight;
- memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr));
+ nh->nh_weight = rt->fib6_nh.nh_weight;
+ memcpy(&nh->gw_addr, &rt->fib6_nh.nh_gw, sizeof(nh->gw_addr));
mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
@@ -4897,9 +4897,9 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
}
static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
- const struct rt6_info *rt)
+ const struct fib6_info *rt)
{
- return rt->rt6i_flags & RTF_GATEWAY ||
+ return rt->fib6_flags & RTF_GATEWAY ||
mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
}
@@ -4928,7 +4928,7 @@ mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
nh_grp->count = fib6_entry->nrt6;
for (i = 0; i < nh_grp->count; i++) {
- struct rt6_info *rt = mlxsw_sp_rt6->rt;
+ struct fib6_info *rt = mlxsw_sp_rt6->rt;
nh = &nh_grp->nexthops[i];
err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
@@ -5040,7 +5040,7 @@ err_nexthop6_group_get:
static int
mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib6_entry *fib6_entry,
- struct rt6_info *rt)
+ struct fib6_info *rt)
{
struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
int err;
@@ -5068,7 +5068,7 @@ err_nexthop6_group_update:
static void
mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib6_entry *fib6_entry,
- struct rt6_info *rt)
+ struct fib6_info *rt)
{
struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
@@ -5084,7 +5084,7 @@ mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_entry *fib_entry,
- const struct rt6_info *rt)
+ const struct fib6_info *rt)
{
/* Packets hitting RTF_REJECT routes need to be discarded by the
* stack. We can rely on their destination device not having a
@@ -5092,9 +5092,9 @@ static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
* local, which will cause them to be trapped with a lower
* priority than packets that need to be locally received.
*/
- if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
+ if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
- else if (rt->rt6i_flags & RTF_REJECT)
+ else if (rt->fib6_flags & RTF_REJECT)
fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
@@ -5118,7 +5118,7 @@ mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
static struct mlxsw_sp_fib6_entry *
mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_node *fib_node,
- struct rt6_info *rt)
+ struct fib6_info *rt)
{
struct mlxsw_sp_fib6_entry *fib6_entry;
struct mlxsw_sp_fib_entry *fib_entry;
@@ -5168,25 +5168,25 @@ static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
static struct mlxsw_sp_fib6_entry *
mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
- const struct rt6_info *nrt, bool replace)
+ const struct fib6_info *nrt, bool replace)
{
struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
- struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
+ struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
- if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
+ if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
continue;
- if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
+ if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
break;
- if (replace && rt->rt6i_metric == nrt->rt6i_metric) {
+ if (replace && rt->fib6_metric == nrt->fib6_metric) {
if (mlxsw_sp_fib6_rt_can_mp(rt) ==
mlxsw_sp_fib6_rt_can_mp(nrt))
return fib6_entry;
if (mlxsw_sp_fib6_rt_can_mp(nrt))
fallback = fallback ?: fib6_entry;
}
- if (rt->rt6i_metric > nrt->rt6i_metric)
+ if (rt->fib6_metric > nrt->fib6_metric)
return fallback ?: fib6_entry;
}
@@ -5198,7 +5198,7 @@ mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
bool replace)
{
struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
- struct rt6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
+ struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
struct mlxsw_sp_fib6_entry *fib6_entry;
fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
@@ -5213,9 +5213,9 @@ mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
struct mlxsw_sp_fib6_entry *last;
list_for_each_entry(last, &fib_node->entry_list, common.list) {
- struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(last);
+ struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(last);
- if (nrt->rt6i_table->tb6_id > rt->rt6i_table->tb6_id)
+ if (nrt->fib6_table->tb6_id > rt->fib6_table->tb6_id)
break;
fib6_entry = last;
}
@@ -5268,29 +5268,29 @@ mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
static struct mlxsw_sp_fib6_entry *
mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
- const struct rt6_info *rt)
+ const struct fib6_info *rt)
{
struct mlxsw_sp_fib6_entry *fib6_entry;
struct mlxsw_sp_fib_node *fib_node;
struct mlxsw_sp_fib *fib;
struct mlxsw_sp_vr *vr;
- vr = mlxsw_sp_vr_find(mlxsw_sp, rt->rt6i_table->tb6_id);
+ vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
if (!vr)
return NULL;
fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
- fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->rt6i_dst.addr,
- sizeof(rt->rt6i_dst.addr),
- rt->rt6i_dst.plen);
+ fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
+ sizeof(rt->fib6_dst.addr),
+ rt->fib6_dst.plen);
if (!fib_node)
return NULL;
list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
- struct rt6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
+ struct fib6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
- if (rt->rt6i_table->tb6_id == iter_rt->rt6i_table->tb6_id &&
- rt->rt6i_metric == iter_rt->rt6i_metric &&
+ if (rt->fib6_table->tb6_id == iter_rt->fib6_table->tb6_id &&
+ rt->fib6_metric == iter_rt->fib6_metric &&
mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
return fib6_entry;
}
@@ -5316,7 +5316,7 @@ static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
}
static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
- struct rt6_info *rt, bool replace)
+ struct fib6_info *rt, bool replace)
{
struct mlxsw_sp_fib6_entry *fib6_entry;
struct mlxsw_sp_fib_node *fib_node;
@@ -5325,16 +5325,16 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
if (mlxsw_sp->router->aborted)
return 0;
- if (rt->rt6i_src.plen)
+ if (rt->fib6_src.plen)
return -EINVAL;
if (mlxsw_sp_fib6_rt_should_ignore(rt))
return 0;
- fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->rt6i_table->tb6_id,
- &rt->rt6i_dst.addr,
- sizeof(rt->rt6i_dst.addr),
- rt->rt6i_dst.plen,
+ fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
+ &rt->fib6_dst.addr,
+ sizeof(rt->fib6_dst.addr),
+ rt->fib6_dst.plen,
MLXSW_SP_L3_PROTO_IPV6);
if (IS_ERR(fib_node))
return PTR_ERR(fib_node);
@@ -5373,7 +5373,7 @@ err_fib6_entry_nexthop_add:
}
static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
- struct rt6_info *rt)
+ struct fib6_info *rt)
{
struct mlxsw_sp_fib6_entry *fib6_entry;
struct mlxsw_sp_fib_node *fib_node;
@@ -5836,7 +5836,7 @@ static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
fen6_info = container_of(info, struct fib6_entry_notifier_info,
info);
fib_work->fen6_info = *fen6_info;
- rt6_hold(fib_work->fen6_info.rt);
+ fib6_info_hold(fib_work->fen6_info.rt);
break;
}
}
@@ -5882,24 +5882,24 @@ static int mlxsw_sp_router_fib_rule_event(unsigned long event,
switch (info->family) {
case AF_INET:
if (!fib4_rule_default(rule) && !rule->l3mdev)
- err = -1;
+ err = -EOPNOTSUPP;
break;
case AF_INET6:
if (!fib6_rule_default(rule) && !rule->l3mdev)
- err = -1;
+ err = -EOPNOTSUPP;
break;
case RTNL_FAMILY_IPMR:
if (!ipmr_rule_default(rule) && !rule->l3mdev)
- err = -1;
+ err = -EOPNOTSUPP;
break;
case RTNL_FAMILY_IP6MR:
if (!ip6mr_rule_default(rule) && !rule->l3mdev)
- err = -1;
+ err = -EOPNOTSUPP;
break;
}
if (err < 0)
- NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported. Aborting offload");
+ NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
return err;
}
@@ -5926,8 +5926,15 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
case FIB_EVENT_RULE_DEL:
err = mlxsw_sp_router_fib_rule_event(event, info,
router->mlxsw_sp);
- if (!err)
- return NOTIFY_DONE;
+ if (!err || info->extack)
+ return notifier_from_errno(err);
+ break;
+ case FIB_EVENT_ENTRY_ADD:
+ if (router->aborted) {
+ NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
+ return notifier_from_errno(-EINVAL);
+ }
+ break;
}
fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index 65a77708ff61..cd9071ee19ad 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -32,6 +32,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#include <linux/if_bridge.h>
#include <linux/list.h>
#include <net/arp.h>
#include <net/gre.h>
@@ -39,8 +40,9 @@
#include <net/ip6_tunnel.h>
#include "spectrum.h"
-#include "spectrum_span.h"
#include "spectrum_ipip.h"
+#include "spectrum_span.h"
+#include "spectrum_switchdev.h"
int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
{
@@ -167,6 +169,72 @@ mlxsw_sp_span_entry_unoffloadable(struct mlxsw_sp_span_parms *sparmsp)
return 0;
}
+static struct net_device *
+mlxsw_sp_span_entry_bridge_8021q(const struct net_device *br_dev,
+ unsigned char *dmac,
+ u16 *p_vid)
+{
+ struct bridge_vlan_info vinfo;
+ struct net_device *edev;
+ u16 pvid;
+
+ if (WARN_ON(br_vlan_get_pvid(br_dev, &pvid)))
+ return NULL;
+ if (!pvid)
+ return NULL;
+
+ edev = br_fdb_find_port(br_dev, dmac, pvid);
+ if (!edev)
+ return NULL;
+
+ if (br_vlan_get_info(edev, pvid, &vinfo))
+ return NULL;
+ if (!(vinfo.flags & BRIDGE_VLAN_INFO_UNTAGGED))
+ *p_vid = pvid;
+ return edev;
+}
+
+static struct net_device *
+mlxsw_sp_span_entry_bridge_8021d(const struct net_device *br_dev,
+ unsigned char *dmac)
+{
+ return br_fdb_find_port(br_dev, dmac, 0);
+}
+
+static struct net_device *
+mlxsw_sp_span_entry_bridge(const struct net_device *br_dev,
+ unsigned char dmac[ETH_ALEN],
+ u16 *p_vid)
+{
+ struct mlxsw_sp_bridge_port *bridge_port;
+ enum mlxsw_reg_spms_state spms_state;
+ struct mlxsw_sp_port *port;
+ struct net_device *dev;
+ u8 stp_state;
+
+ if (br_vlan_enabled(br_dev))
+ dev = mlxsw_sp_span_entry_bridge_8021q(br_dev, dmac, p_vid);
+ else
+ dev = mlxsw_sp_span_entry_bridge_8021d(br_dev, dmac);
+ if (!dev)
+ return NULL;
+
+ port = mlxsw_sp_port_dev_lower_find(dev);
+ if (!port)
+ return NULL;
+
+ bridge_port = mlxsw_sp_bridge_port_find(port->mlxsw_sp->bridge, dev);
+ if (!bridge_port)
+ return NULL;
+
+ stp_state = mlxsw_sp_bridge_port_stp_state(bridge_port);
+ spms_state = mlxsw_sp_stp_spms_state(stp_state);
+ if (spms_state != MLXSW_REG_SPMS_STATE_FORWARDING)
+ return NULL;
+
+ return dev;
+}
+
static __maybe_unused int
mlxsw_sp_span_entry_tunnel_parms_common(struct net_device *l3edev,
union mlxsw_sp_l3addr saddr,
@@ -177,13 +245,22 @@ mlxsw_sp_span_entry_tunnel_parms_common(struct net_device *l3edev,
struct mlxsw_sp_span_parms *sparmsp)
{
unsigned char dmac[ETH_ALEN];
+ u16 vid = 0;
if (mlxsw_sp_l3addr_is_zero(gw))
gw = daddr;
- if (!l3edev || !mlxsw_sp_port_dev_check(l3edev) ||
- mlxsw_sp_span_dmac(tbl, &gw, l3edev, dmac))
- return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+ if (!l3edev || mlxsw_sp_span_dmac(tbl, &gw, l3edev, dmac))
+ goto unoffloadable;
+
+ if (netif_is_bridge_master(l3edev)) {
+ l3edev = mlxsw_sp_span_entry_bridge(l3edev, dmac, &vid);
+ if (!l3edev)
+ goto unoffloadable;
+ }
+
+ if (!mlxsw_sp_port_dev_check(l3edev))
+ goto unoffloadable;
sparmsp->dest_port = netdev_priv(l3edev);
sparmsp->ttl = ttl;
@@ -191,7 +268,11 @@ mlxsw_sp_span_entry_tunnel_parms_common(struct net_device *l3edev,
memcpy(sparmsp->smac, l3edev->dev_addr, ETH_ALEN);
sparmsp->saddr = saddr;
sparmsp->daddr = daddr;
+ sparmsp->vid = vid;
return 0;
+
+unoffloadable:
+ return mlxsw_sp_span_entry_unoffloadable(sparmsp);
}
#if IS_ENABLED(CONFIG_NET_IPGRE)
@@ -268,9 +349,10 @@ mlxsw_sp_span_entry_gretap4_configure(struct mlxsw_sp_span_entry *span_entry,
/* Create a new port analayzer entry for local_port. */
mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+ mlxsw_reg_mpat_eth_rspan_pack(mpat_pl, sparms.vid);
mlxsw_reg_mpat_eth_rspan_l2_pack(mpat_pl,
MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER,
- sparms.dmac, false);
+ sparms.dmac, !!sparms.vid);
mlxsw_reg_mpat_eth_rspan_l3_ipv4_pack(mpat_pl,
sparms.ttl, sparms.smac,
be32_to_cpu(sparms.saddr.addr4),
@@ -368,9 +450,10 @@ mlxsw_sp_span_entry_gretap6_configure(struct mlxsw_sp_span_entry *span_entry,
/* Create a new port analayzer entry for local_port. */
mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+ mlxsw_reg_mpat_eth_rspan_pack(mpat_pl, sparms.vid);
mlxsw_reg_mpat_eth_rspan_l2_pack(mpat_pl,
MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER,
- sparms.dmac, false);
+ sparms.dmac, !!sparms.vid);
mlxsw_reg_mpat_eth_rspan_l3_ipv6_pack(mpat_pl, sparms.ttl, sparms.smac,
sparms.saddr.addr6,
sparms.daddr.addr6);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
index 4b87ec20e658..14a6de904db1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
@@ -63,6 +63,7 @@ struct mlxsw_sp_span_parms {
unsigned char smac[ETH_ALEN];
union mlxsw_sp_l3addr daddr;
union mlxsw_sp_l3addr saddr;
+ u16 vid;
};
struct mlxsw_sp_span_entry_ops;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 4ed01182a82c..8c9cf8ee9398 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -49,7 +49,9 @@
#include <linux/netlink.h>
#include <net/switchdev.h>
+#include "spectrum_span.h"
#include "spectrum_router.h"
+#include "spectrum_switchdev.h"
#include "spectrum.h"
#include "core.h"
#include "reg.h"
@@ -239,7 +241,7 @@ __mlxsw_sp_bridge_port_find(const struct mlxsw_sp_bridge_device *bridge_device,
return NULL;
}
-static struct mlxsw_sp_bridge_port *
+struct mlxsw_sp_bridge_port *
mlxsw_sp_bridge_port_find(struct mlxsw_sp_bridge *bridge,
struct net_device *brport_dev)
{
@@ -922,6 +924,9 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev,
break;
}
+ if (switchdev_trans_ph_commit(trans))
+ mlxsw_sp_span_respin(mlxsw_sp_port->mlxsw_sp);
+
return err;
}
@@ -1646,18 +1651,57 @@ mlxsw_sp_port_mrouter_update_mdb(struct mlxsw_sp_port *mlxsw_sp_port,
}
}
+struct mlxsw_sp_span_respin_work {
+ struct work_struct work;
+ struct mlxsw_sp *mlxsw_sp;
+};
+
+static void mlxsw_sp_span_respin_work(struct work_struct *work)
+{
+ struct mlxsw_sp_span_respin_work *respin_work =
+ container_of(work, struct mlxsw_sp_span_respin_work, work);
+
+ rtnl_lock();
+ mlxsw_sp_span_respin(respin_work->mlxsw_sp);
+ rtnl_unlock();
+ kfree(respin_work);
+}
+
+static void mlxsw_sp_span_respin_schedule(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_span_respin_work *respin_work;
+
+ respin_work = kzalloc(sizeof(*respin_work), GFP_ATOMIC);
+ if (!respin_work)
+ return;
+
+ INIT_WORK(&respin_work->work, mlxsw_sp_span_respin_work);
+ respin_work->mlxsw_sp = mlxsw_sp;
+
+ mlxsw_core_schedule_work(&respin_work->work);
+}
+
static int mlxsw_sp_port_obj_add(struct net_device *dev,
const struct switchdev_obj *obj,
struct switchdev_trans *trans)
{
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ const struct switchdev_obj_port_vlan *vlan;
int err = 0;
switch (obj->id) {
case SWITCHDEV_OBJ_ID_PORT_VLAN:
- err = mlxsw_sp_port_vlans_add(mlxsw_sp_port,
- SWITCHDEV_OBJ_PORT_VLAN(obj),
- trans);
+ vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
+ err = mlxsw_sp_port_vlans_add(mlxsw_sp_port, vlan, trans);
+
+ if (switchdev_trans_ph_commit(trans)) {
+ /* The event is emitted before the changes are actually
+ * applied to the bridge. Therefore schedule the respin
+ * call for later, so that the respin logic sees the
+ * updated bridge state.
+ */
+ mlxsw_sp_span_respin_schedule(mlxsw_sp_port->mlxsw_sp);
+ }
break;
case SWITCHDEV_OBJ_ID_PORT_MDB:
err = mlxsw_sp_port_mdb_add(mlxsw_sp_port,
@@ -1806,6 +1850,8 @@ static int mlxsw_sp_port_obj_del(struct net_device *dev,
break;
}
+ mlxsw_sp_span_respin(mlxsw_sp_port->mlxsw_sp);
+
return err;
}
@@ -2222,6 +2268,8 @@ static void mlxsw_sp_switchdev_event_work(struct work_struct *work)
switch (switchdev_work->event) {
case SWITCHDEV_FDB_ADD_TO_DEVICE:
fdb_info = &switchdev_work->fdb_info;
+ if (!fdb_info->added_by_user)
+ break;
err = mlxsw_sp_port_fdb_set(mlxsw_sp_port, fdb_info, true);
if (err)
break;
@@ -2231,10 +2279,20 @@ static void mlxsw_sp_switchdev_event_work(struct work_struct *work)
break;
case SWITCHDEV_FDB_DEL_TO_DEVICE:
fdb_info = &switchdev_work->fdb_info;
+ if (!fdb_info->added_by_user)
+ break;
mlxsw_sp_port_fdb_set(mlxsw_sp_port, fdb_info, false);
break;
+ case SWITCHDEV_FDB_ADD_TO_BRIDGE: /* fall through */
+ case SWITCHDEV_FDB_DEL_TO_BRIDGE:
+ /* These events are only used to potentially update an existing
+ * SPAN mirror.
+ */
+ break;
}
+ mlxsw_sp_span_respin(mlxsw_sp_port->mlxsw_sp);
+
out:
rtnl_unlock();
kfree(switchdev_work->fdb_info.addr);
@@ -2263,7 +2321,9 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused,
switch (event) {
case SWITCHDEV_FDB_ADD_TO_DEVICE: /* fall through */
- case SWITCHDEV_FDB_DEL_TO_DEVICE:
+ case SWITCHDEV_FDB_DEL_TO_DEVICE: /* fall through */
+ case SWITCHDEV_FDB_ADD_TO_BRIDGE: /* fall through */
+ case SWITCHDEV_FDB_DEL_TO_BRIDGE:
memcpy(&switchdev_work->fdb_info, ptr,
sizeof(switchdev_work->fdb_info));
switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC);
@@ -2295,6 +2355,12 @@ static struct notifier_block mlxsw_sp_switchdev_notifier = {
.notifier_call = mlxsw_sp_switchdev_event,
};
+u8
+mlxsw_sp_bridge_port_stp_state(struct mlxsw_sp_bridge_port *bridge_port)
+{
+ return bridge_port->stp_state;
+}
+
static int mlxsw_sp_fdb_init(struct mlxsw_sp *mlxsw_sp)
{
struct mlxsw_sp_bridge *bridge = mlxsw_sp->bridge;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.h
new file mode 100644
index 000000000000..bc44d5effc28
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.h
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/netdevice.h>
+
+struct mlxsw_sp_bridge;
+struct mlxsw_sp_bridge_port;
+
+struct mlxsw_sp_bridge_port *
+mlxsw_sp_bridge_port_find(struct mlxsw_sp_bridge *bridge,
+ struct net_device *brport_dev);
+
+u8 mlxsw_sp_bridge_port_stp_state(struct mlxsw_sp_bridge_port *bridge_port);
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 29b4e5f8c102..65f0791cae0c 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -1214,45 +1214,83 @@ wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
return 0;
}
-static int
-wrp_cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
- enum br_mask br_mask, bool swap)
+static const struct jmp_code_map {
+ enum br_mask br_mask;
+ bool swap;
+} jmp_code_map[] = {
+ [BPF_JGT >> 4] = { BR_BLO, true },
+ [BPF_JGE >> 4] = { BR_BHS, false },
+ [BPF_JLT >> 4] = { BR_BLO, false },
+ [BPF_JLE >> 4] = { BR_BHS, true },
+ [BPF_JSGT >> 4] = { BR_BLT, true },
+ [BPF_JSGE >> 4] = { BR_BGE, false },
+ [BPF_JSLT >> 4] = { BR_BLT, false },
+ [BPF_JSLE >> 4] = { BR_BGE, true },
+};
+
+static const struct jmp_code_map *nfp_jmp_code_get(struct nfp_insn_meta *meta)
+{
+ unsigned int op;
+
+ op = BPF_OP(meta->insn.code) >> 4;
+ /* br_mask of 0 is BR_BEQ which we don't use in jump code table */
+ if (WARN_ONCE(op >= ARRAY_SIZE(jmp_code_map) ||
+ !jmp_code_map[op].br_mask,
+ "no code found for jump instruction"))
+ return NULL;
+
+ return &jmp_code_map[op];
+}
+
+static int cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
const struct bpf_insn *insn = &meta->insn;
u64 imm = insn->imm; /* sign extend */
+ const struct jmp_code_map *code;
+ enum alu_op alu_op, carry_op;
u8 reg = insn->dst_reg * 2;
swreg tmp_reg;
+ code = nfp_jmp_code_get(meta);
+ if (!code)
+ return -EINVAL;
+
+ alu_op = meta->jump_neg_op ? ALU_OP_ADD : ALU_OP_SUB;
+ carry_op = meta->jump_neg_op ? ALU_OP_ADD_C : ALU_OP_SUB_C;
+
tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
- if (!swap)
- emit_alu(nfp_prog, reg_none(), reg_a(reg), ALU_OP_SUB, tmp_reg);
+ if (!code->swap)
+ emit_alu(nfp_prog, reg_none(), reg_a(reg), alu_op, tmp_reg);
else
- emit_alu(nfp_prog, reg_none(), tmp_reg, ALU_OP_SUB, reg_a(reg));
+ emit_alu(nfp_prog, reg_none(), tmp_reg, alu_op, reg_a(reg));
tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
- if (!swap)
+ if (!code->swap)
emit_alu(nfp_prog, reg_none(),
- reg_a(reg + 1), ALU_OP_SUB_C, tmp_reg);
+ reg_a(reg + 1), carry_op, tmp_reg);
else
emit_alu(nfp_prog, reg_none(),
- tmp_reg, ALU_OP_SUB_C, reg_a(reg + 1));
+ tmp_reg, carry_op, reg_a(reg + 1));
- emit_br(nfp_prog, br_mask, insn->off, 0);
+ emit_br(nfp_prog, code->br_mask, insn->off, 0);
return 0;
}
-static int
-wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
- enum br_mask br_mask, bool swap)
+static int cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
const struct bpf_insn *insn = &meta->insn;
+ const struct jmp_code_map *code;
u8 areg, breg;
+ code = nfp_jmp_code_get(meta);
+ if (!code)
+ return -EINVAL;
+
areg = insn->dst_reg * 2;
breg = insn->src_reg * 2;
- if (swap) {
+ if (code->swap) {
areg ^= breg;
breg ^= areg;
areg ^= breg;
@@ -1261,7 +1299,7 @@ wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg));
emit_alu(nfp_prog, reg_none(),
reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1));
- emit_br(nfp_prog, br_mask, insn->off, 0);
+ emit_br(nfp_prog, code->br_mask, insn->off, 0);
return 0;
}
@@ -1400,7 +1438,7 @@ map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
if (!load_lm_ptr)
return 0;
- emit_csr_wr(nfp_prog, stack_reg(nfp_prog), NFP_CSR_ACT_LM_ADDR0);
+ emit_csr_wr(nfp_prog, stack_reg(nfp_prog), NFP_CSR_ACT_LM_ADDR0);
wrp_nops(nfp_prog, 3);
return 0;
@@ -2283,46 +2321,6 @@ static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
return 0;
}
-static int jgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
- return wrp_cmp_imm(nfp_prog, meta, BR_BLO, true);
-}
-
-static int jge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
- return wrp_cmp_imm(nfp_prog, meta, BR_BHS, false);
-}
-
-static int jlt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
- return wrp_cmp_imm(nfp_prog, meta, BR_BLO, false);
-}
-
-static int jle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
- return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true);
-}
-
-static int jsgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
- return wrp_cmp_imm(nfp_prog, meta, BR_BLT, true);
-}
-
-static int jsge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
- return wrp_cmp_imm(nfp_prog, meta, BR_BGE, false);
-}
-
-static int jslt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
- return wrp_cmp_imm(nfp_prog, meta, BR_BLT, false);
-}
-
-static int jsle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
- return wrp_cmp_imm(nfp_prog, meta, BR_BGE, true);
-}
-
static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
const struct bpf_insn *insn = &meta->insn;
@@ -2392,46 +2390,6 @@ static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
return 0;
}
-static int jgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
- return wrp_cmp_reg(nfp_prog, meta, BR_BLO, true);
-}
-
-static int jge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
- return wrp_cmp_reg(nfp_prog, meta, BR_BHS, false);
-}
-
-static int jlt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
- return wrp_cmp_reg(nfp_prog, meta, BR_BLO, false);
-}
-
-static int jle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
- return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true);
-}
-
-static int jsgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
- return wrp_cmp_reg(nfp_prog, meta, BR_BLT, true);
-}
-
-static int jsge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
- return wrp_cmp_reg(nfp_prog, meta, BR_BGE, false);
-}
-
-static int jslt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
- return wrp_cmp_reg(nfp_prog, meta, BR_BLT, false);
-}
-
-static int jsle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
- return wrp_cmp_reg(nfp_prog, meta, BR_BGE, true);
-}
-
static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE);
@@ -2520,25 +2478,25 @@ static const instr_cb_t instr_cb[256] = {
[BPF_ST | BPF_MEM | BPF_DW] = mem_st8,
[BPF_JMP | BPF_JA | BPF_K] = jump,
[BPF_JMP | BPF_JEQ | BPF_K] = jeq_imm,
- [BPF_JMP | BPF_JGT | BPF_K] = jgt_imm,
- [BPF_JMP | BPF_JGE | BPF_K] = jge_imm,
- [BPF_JMP | BPF_JLT | BPF_K] = jlt_imm,
- [BPF_JMP | BPF_JLE | BPF_K] = jle_imm,
- [BPF_JMP | BPF_JSGT | BPF_K] = jsgt_imm,
- [BPF_JMP | BPF_JSGE | BPF_K] = jsge_imm,
- [BPF_JMP | BPF_JSLT | BPF_K] = jslt_imm,
- [BPF_JMP | BPF_JSLE | BPF_K] = jsle_imm,
+ [BPF_JMP | BPF_JGT | BPF_K] = cmp_imm,
+ [BPF_JMP | BPF_JGE | BPF_K] = cmp_imm,
+ [BPF_JMP | BPF_JLT | BPF_K] = cmp_imm,
+ [BPF_JMP | BPF_JLE | BPF_K] = cmp_imm,
+ [BPF_JMP | BPF_JSGT | BPF_K] = cmp_imm,
+ [BPF_JMP | BPF_JSGE | BPF_K] = cmp_imm,
+ [BPF_JMP | BPF_JSLT | BPF_K] = cmp_imm,
+ [BPF_JMP | BPF_JSLE | BPF_K] = cmp_imm,
[BPF_JMP | BPF_JSET | BPF_K] = jset_imm,
[BPF_JMP | BPF_JNE | BPF_K] = jne_imm,
[BPF_JMP | BPF_JEQ | BPF_X] = jeq_reg,
- [BPF_JMP | BPF_JGT | BPF_X] = jgt_reg,
- [BPF_JMP | BPF_JGE | BPF_X] = jge_reg,
- [BPF_JMP | BPF_JLT | BPF_X] = jlt_reg,
- [BPF_JMP | BPF_JLE | BPF_X] = jle_reg,
- [BPF_JMP | BPF_JSGT | BPF_X] = jsgt_reg,
- [BPF_JMP | BPF_JSGE | BPF_X] = jsge_reg,
- [BPF_JMP | BPF_JSLT | BPF_X] = jslt_reg,
- [BPF_JMP | BPF_JSLE | BPF_X] = jsle_reg,
+ [BPF_JMP | BPF_JGT | BPF_X] = cmp_reg,
+ [BPF_JMP | BPF_JGE | BPF_X] = cmp_reg,
+ [BPF_JMP | BPF_JLT | BPF_X] = cmp_reg,
+ [BPF_JMP | BPF_JLE | BPF_X] = cmp_reg,
+ [BPF_JMP | BPF_JSGT | BPF_X] = cmp_reg,
+ [BPF_JMP | BPF_JSGE | BPF_X] = cmp_reg,
+ [BPF_JMP | BPF_JSLT | BPF_X] = cmp_reg,
+ [BPF_JMP | BPF_JSLE | BPF_X] = cmp_reg,
[BPF_JMP | BPF_JSET | BPF_X] = jset_reg,
[BPF_JMP | BPF_JNE | BPF_X] = jne_reg,
[BPF_JMP | BPF_CALL] = call,
@@ -2777,6 +2735,54 @@ static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog)
}
}
+/* abs(insn.imm) will fit better into unrestricted reg immediate -
+ * convert add/sub of a negative number into a sub/add of a positive one.
+ */
+static void nfp_bpf_opt_neg_add_sub(struct nfp_prog *nfp_prog)
+{
+ struct nfp_insn_meta *meta;
+
+ list_for_each_entry(meta, &nfp_prog->insns, l) {
+ struct bpf_insn insn = meta->insn;
+
+ if (meta->skip)
+ continue;
+
+ if (BPF_CLASS(insn.code) != BPF_ALU &&
+ BPF_CLASS(insn.code) != BPF_ALU64 &&
+ BPF_CLASS(insn.code) != BPF_JMP)
+ continue;
+ if (BPF_SRC(insn.code) != BPF_K)
+ continue;
+ if (insn.imm >= 0)
+ continue;
+
+ if (BPF_CLASS(insn.code) == BPF_JMP) {
+ switch (BPF_OP(insn.code)) {
+ case BPF_JGE:
+ case BPF_JSGE:
+ case BPF_JLT:
+ case BPF_JSLT:
+ meta->jump_neg_op = true;
+ break;
+ default:
+ continue;
+ }
+ } else {
+ if (BPF_OP(insn.code) == BPF_ADD)
+ insn.code = BPF_CLASS(insn.code) | BPF_SUB;
+ else if (BPF_OP(insn.code) == BPF_SUB)
+ insn.code = BPF_CLASS(insn.code) | BPF_ADD;
+ else
+ continue;
+
+ meta->insn.code = insn.code | BPF_K;
+ }
+
+ meta->insn.imm = -insn.imm;
+ }
+}
+
/* Remove masking after load since our load guarantees this is not needed */
static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog)
{
@@ -3212,6 +3218,7 @@ static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
{
nfp_bpf_opt_reg_init(nfp_prog);
+ nfp_bpf_opt_neg_add_sub(nfp_prog);
nfp_bpf_opt_ld_mask(nfp_prog);
nfp_bpf_opt_ld_shift(nfp_prog);
nfp_bpf_opt_ldst_gather(nfp_prog);
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 4981c8944ca3..68b5d326483d 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -236,6 +236,7 @@ struct nfp_bpf_reg_state {
* @xadd_over_16bit: 16bit immediate is not guaranteed
* @xadd_maybe_16bit: 16bit immediate is possible
* @jmp_dst: destination info for jump instructions
+ * @jump_neg_op: jump instruction has inverted immediate, use ADD instead of SUB
* @func_id: function id for call instructions
* @arg1: arg1 for call instructions
* @arg2: arg2 for call instructions
@@ -264,7 +265,10 @@ struct nfp_insn_meta {
bool xadd_maybe_16bit;
};
/* jump */
- struct nfp_insn_meta *jmp_dst;
+ struct {
+ struct nfp_insn_meta *jmp_dst;
+ bool jump_neg_op;
+ };
/* function calls */
struct {
u32 func_id;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index c67e1b54c614..733ff53cc601 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -47,6 +47,7 @@
struct net_device;
struct nfp_app;
+#define NFP_FL_STATS_CTX_DONT_CARE cpu_to_be32(0xffffffff)
#define NFP_FL_STATS_ENTRY_RS BIT(20)
#define NFP_FL_STATS_ELEM_RS 4
#define NFP_FL_REPEATED_HASH_MAX BIT(17)
@@ -189,9 +190,11 @@ struct nfp_fl_payload {
spinlock_t lock; /* lock stats */
struct nfp_fl_stats stats;
__be32 nfp_tun_ipv4_addr;
+ struct net_device *ingress_dev;
char *unmasked_data;
char *mask_data;
char *action_data;
+ bool ingress_offload;
};
struct nfp_fl_stats_frame {
@@ -216,12 +219,14 @@ int nfp_flower_compile_action(struct tc_cls_flower_offload *flow,
struct nfp_fl_payload *nfp_flow);
int nfp_compile_flow_metadata(struct nfp_app *app,
struct tc_cls_flower_offload *flow,
- struct nfp_fl_payload *nfp_flow);
+ struct nfp_fl_payload *nfp_flow,
+ struct net_device *netdev);
int nfp_modify_flow_metadata(struct nfp_app *app,
struct nfp_fl_payload *nfp_flow);
struct nfp_fl_payload *
-nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie);
+nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie,
+ struct net_device *netdev, __be32 host_ctx);
struct nfp_fl_payload *
nfp_flower_remove_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
index db977cf8e933..21668aa435e8 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
@@ -99,14 +99,18 @@ static int nfp_get_stats_entry(struct nfp_app *app, u32 *stats_context_id)
/* Must be called with either RTNL or rcu_read_lock */
struct nfp_fl_payload *
-nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie)
+nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie,
+ struct net_device *netdev, __be32 host_ctx)
{
struct nfp_flower_priv *priv = app->priv;
struct nfp_fl_payload *flower_entry;
hash_for_each_possible_rcu(priv->flow_table, flower_entry, link,
tc_flower_cookie)
- if (flower_entry->tc_flower_cookie == tc_flower_cookie)
+ if (flower_entry->tc_flower_cookie == tc_flower_cookie &&
+ (!netdev || flower_entry->ingress_dev == netdev) &&
+ (host_ctx == NFP_FL_STATS_CTX_DONT_CARE ||
+ flower_entry->meta.host_ctx_id == host_ctx))
return flower_entry;
return NULL;
@@ -121,13 +125,11 @@ nfp_flower_update_stats(struct nfp_app *app, struct nfp_fl_stats_frame *stats)
flower_cookie = be64_to_cpu(stats->stats_cookie);
rcu_read_lock();
- nfp_flow = nfp_flower_search_fl_table(app, flower_cookie);
+ nfp_flow = nfp_flower_search_fl_table(app, flower_cookie, NULL,
+ stats->stats_con_id);
if (!nfp_flow)
goto exit_rcu_unlock;
- if (nfp_flow->meta.host_ctx_id != stats->stats_con_id)
- goto exit_rcu_unlock;
-
spin_lock(&nfp_flow->lock);
nfp_flow->stats.pkts += be32_to_cpu(stats->pkt_count);
nfp_flow->stats.bytes += be64_to_cpu(stats->byte_count);
@@ -317,7 +319,8 @@ nfp_check_mask_remove(struct nfp_app *app, char *mask_data, u32 mask_len,
int nfp_compile_flow_metadata(struct nfp_app *app,
struct tc_cls_flower_offload *flow,
- struct nfp_fl_payload *nfp_flow)
+ struct nfp_fl_payload *nfp_flow,
+ struct net_device *netdev)
{
struct nfp_flower_priv *priv = app->priv;
struct nfp_fl_payload *check_entry;
@@ -348,7 +351,8 @@ int nfp_compile_flow_metadata(struct nfp_app *app,
nfp_flow->stats.bytes = 0;
nfp_flow->stats.used = jiffies;
- check_entry = nfp_flower_search_fl_table(app, flow->cookie);
+ check_entry = nfp_flower_search_fl_table(app, flow->cookie, netdev,
+ NFP_FL_STATS_CTX_DONT_CARE);
if (check_entry) {
if (nfp_release_stats_entry(app, stats_cxt))
return -EINVAL;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 114d2ab02a38..70ec9d821b91 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -345,7 +345,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
}
static struct nfp_fl_payload *
-nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer)
+nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer, bool egress)
{
struct nfp_fl_payload *flow_pay;
@@ -371,6 +371,8 @@ nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer)
flow_pay->meta.flags = 0;
spin_lock_init(&flow_pay->lock);
+ flow_pay->ingress_offload = !egress;
+
return flow_pay;
err_free_mask:
@@ -402,8 +404,20 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
struct nfp_flower_priv *priv = app->priv;
struct nfp_fl_payload *flow_pay;
struct nfp_fl_key_ls *key_layer;
+ struct net_device *ingr_dev;
int err;
+ ingr_dev = egress ? NULL : netdev;
+ flow_pay = nfp_flower_search_fl_table(app, flow->cookie, ingr_dev,
+ NFP_FL_STATS_CTX_DONT_CARE);
+ if (flow_pay) {
+ /* Ignore as duplicate if it has been added by different cb. */
+ if (flow_pay->ingress_offload && egress)
+ return 0;
+ else
+ return -EOPNOTSUPP;
+ }
+
key_layer = kmalloc(sizeof(*key_layer), GFP_KERNEL);
if (!key_layer)
return -ENOMEM;
@@ -413,12 +427,14 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
if (err)
goto err_free_key_ls;
- flow_pay = nfp_flower_allocate_new(key_layer);
+ flow_pay = nfp_flower_allocate_new(key_layer, egress);
if (!flow_pay) {
err = -ENOMEM;
goto err_free_key_ls;
}
+ flow_pay->ingress_dev = egress ? NULL : netdev;
+
err = nfp_flower_compile_flow_match(flow, key_layer, netdev, flow_pay,
tun_type);
if (err)
@@ -428,7 +444,8 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
if (err)
goto err_destroy_flow;
- err = nfp_compile_flow_metadata(app, flow, flow_pay);
+ err = nfp_compile_flow_metadata(app, flow, flow_pay,
+ flow_pay->ingress_dev);
if (err)
goto err_destroy_flow;
@@ -462,6 +479,7 @@ err_free_key_ls:
* @app: Pointer to the APP handle
* @netdev: netdev structure.
* @flow: TC flower classifier offload structure
+ * @egress: Netdev is the egress dev.
*
* Removes a flow from the repeated hash structure and clears the
* action payload.
@@ -470,15 +488,18 @@ err_free_key_ls:
*/
static int
nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev,
- struct tc_cls_flower_offload *flow)
+ struct tc_cls_flower_offload *flow, bool egress)
{
struct nfp_port *port = nfp_port_from_netdev(netdev);
struct nfp_fl_payload *nfp_flow;
+ struct net_device *ingr_dev;
int err;
- nfp_flow = nfp_flower_search_fl_table(app, flow->cookie);
+ ingr_dev = egress ? NULL : netdev;
+ nfp_flow = nfp_flower_search_fl_table(app, flow->cookie, ingr_dev,
+ NFP_FL_STATS_CTX_DONT_CARE);
if (!nfp_flow)
- return -ENOENT;
+ return egress ? 0 : -ENOENT;
err = nfp_modify_flow_metadata(app, nfp_flow);
if (err)
@@ -505,7 +526,9 @@ err_free_flow:
/**
* nfp_flower_get_stats() - Populates flow stats obtained from hardware.
* @app: Pointer to the APP handle
+ * @netdev: Netdev structure.
* @flow: TC flower classifier offload structure
+ * @egress: Netdev is the egress dev.
*
* Populates a flow statistics structure which which corresponds to a
* specific flow.
@@ -513,14 +536,21 @@ err_free_flow:
* Return: negative value on error, 0 if stats populated successfully.
*/
static int
-nfp_flower_get_stats(struct nfp_app *app, struct tc_cls_flower_offload *flow)
+nfp_flower_get_stats(struct nfp_app *app, struct net_device *netdev,
+ struct tc_cls_flower_offload *flow, bool egress)
{
struct nfp_fl_payload *nfp_flow;
+ struct net_device *ingr_dev;
- nfp_flow = nfp_flower_search_fl_table(app, flow->cookie);
+ ingr_dev = egress ? NULL : netdev;
+ nfp_flow = nfp_flower_search_fl_table(app, flow->cookie, ingr_dev,
+ NFP_FL_STATS_CTX_DONT_CARE);
if (!nfp_flow)
return -EINVAL;
+ if (nfp_flow->ingress_offload && egress)
+ return 0;
+
spin_lock_bh(&nfp_flow->lock);
tcf_exts_stats_update(flow->exts, nfp_flow->stats.bytes,
nfp_flow->stats.pkts, nfp_flow->stats.used);
@@ -543,9 +573,9 @@ nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev,
case TC_CLSFLOWER_REPLACE:
return nfp_flower_add_offload(app, netdev, flower, egress);
case TC_CLSFLOWER_DESTROY:
- return nfp_flower_del_offload(app, netdev, flower);
+ return nfp_flower_del_offload(app, netdev, flower, egress);
case TC_CLSFLOWER_STATS:
- return nfp_flower_get_stats(app, flower);
+ return nfp_flower_get_stats(app, netdev, flower, egress);
}
return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c
index c4b1f344b4da..0ade122805ad 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c
@@ -486,6 +486,10 @@ static int nfp_pci_probe(struct pci_dev *pdev,
goto err_disable_msix;
}
+ err = nfp_resource_table_init(pf->cpp);
+ if (err)
+ goto err_cpp_free;
+
pf->hwinfo = nfp_hwinfo_read(pf->cpp);
dev_info(&pdev->dev, "Assembly: %s%s%s-%s CPLD: %s\n",
@@ -548,6 +552,7 @@ err_fw_unload:
vfree(pf->dumpspec);
err_hwinfo_free:
kfree(pf->hwinfo);
+err_cpp_free:
nfp_cpp_free(pf->cpp);
err_disable_msix:
destroy_workqueue(pf->wq);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 1eb6549f2a54..d9111c077699 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1722,7 +1722,7 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
act = bpf_prog_run_xdp(xdp_prog, &xdp);
- pkt_len -= xdp.data - orig_data;
+ pkt_len = xdp.data_end - xdp.data;
pkt_off += xdp.data - orig_data;
switch (act) {
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h
index ced62d112aa2..f44d0a857314 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h
@@ -94,6 +94,8 @@ int nfp_nsp_read_sensors(struct nfp_nsp *state, unsigned int sensor_mask,
/* MAC Statistics Accumulator */
#define NFP_RESOURCE_MAC_STATISTICS "mac.stat"
+int nfp_resource_table_init(struct nfp_cpp *cpp);
+
struct nfp_resource *
nfp_resource_acquire(struct nfp_cpp *cpp, const char *name);
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
index cd678323bacb..a0e336bd1d85 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
@@ -1330,6 +1330,7 @@ struct nfp_cpp *nfp_cpp_from_nfp6000_pcie(struct pci_dev *pdev)
/* Finished with card initialization. */
dev_info(&pdev->dev,
"Netronome Flow Processor NFP4000/NFP6000 PCIe Card Probe\n");
+ pcie_print_link_status(pdev);
nfp = kzalloc(sizeof(*nfp), GFP_KERNEL);
if (!nfp) {
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
index c8f2c064cce3..4e19add1c539 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
@@ -295,6 +295,8 @@ void nfp_cpp_mutex_free(struct nfp_cpp_mutex *mutex);
int nfp_cpp_mutex_lock(struct nfp_cpp_mutex *mutex);
int nfp_cpp_mutex_unlock(struct nfp_cpp_mutex *mutex);
int nfp_cpp_mutex_trylock(struct nfp_cpp_mutex *mutex);
+int nfp_cpp_mutex_reclaim(struct nfp_cpp *cpp, int target,
+ unsigned long long address);
/**
* nfp_cppcore_pcie_unit() - Get PCI Unit of a CPP handle
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c
index cb28ac03e4ca..c88bf673cb76 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c
@@ -59,6 +59,11 @@ static u32 nfp_mutex_unlocked(u16 interface)
return (u32)interface << 16 | 0x0000;
}
+static u32 nfp_mutex_owner(u32 val)
+{
+ return val >> 16;
+}
+
static bool nfp_mutex_is_locked(u32 val)
{
return (val & 0xffff) == 0x000f;
@@ -351,3 +356,43 @@ int nfp_cpp_mutex_trylock(struct nfp_cpp_mutex *mutex)
return nfp_mutex_is_locked(tmp) ? -EBUSY : -EINVAL;
}
+
+/**
+ * nfp_cpp_mutex_reclaim() - Unlock mutex if held by local endpoint
+ * @cpp: NFP CPP handle
+ * @target: NFP CPP target ID (ie NFP_CPP_TARGET_CLS or NFP_CPP_TARGET_MU)
+ * @address: Offset into the address space of the NFP CPP target ID
+ *
+ * Release lock if held by local system. Extreme care is advised, call only
+ * when no local lock users can exist.
+ *
+ * Return: 0 if the lock was OK, 1 if locked by us, -errno on invalid mutex
+ */
+int nfp_cpp_mutex_reclaim(struct nfp_cpp *cpp, int target,
+ unsigned long long address)
+{
+ const u32 mur = NFP_CPP_ID(target, 3, 0); /* atomic_read */
+ const u32 muw = NFP_CPP_ID(target, 4, 0); /* atomic_write */
+ u16 interface = nfp_cpp_interface(cpp);
+ int err;
+ u32 tmp;
+
+ err = nfp_cpp_mutex_validate(interface, &target, address);
+ if (err)
+ return err;
+
+ /* Check lock */
+ err = nfp_cpp_readl(cpp, mur, address, &tmp);
+ if (err < 0)
+ return err;
+
+ if (nfp_mutex_is_unlocked(tmp) || nfp_mutex_owner(tmp) != interface)
+ return 0;
+
+ /* Bust the lock */
+ err = nfp_cpp_writel(cpp, muw, address, nfp_mutex_unlocked(interface));
+ if (err < 0)
+ return err;
+
+ return 1;
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c
index 7e14725055c7..2dd89dba9311 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c
@@ -338,3 +338,62 @@ u64 nfp_resource_size(struct nfp_resource *res)
{
return res->size;
}
+
+/**
+ * nfp_resource_table_init() - Run initial checks on the resource table
+ * @cpp: NFP CPP handle
+ *
+ * Start-of-day init procedure for resource table. Must be called before
+ * any local resource table users may exist.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int nfp_resource_table_init(struct nfp_cpp *cpp)
+{
+ struct nfp_cpp_mutex *dev_mutex;
+ int i, err;
+
+ err = nfp_cpp_mutex_reclaim(cpp, NFP_RESOURCE_TBL_TARGET,
+ NFP_RESOURCE_TBL_BASE);
+ if (err < 0) {
+ nfp_err(cpp, "Error: failed to reclaim resource table mutex\n");
+ return err;
+ }
+ if (err)
+ nfp_warn(cpp, "Warning: busted main resource table mutex\n");
+
+ dev_mutex = nfp_cpp_mutex_alloc(cpp, NFP_RESOURCE_TBL_TARGET,
+ NFP_RESOURCE_TBL_BASE,
+ NFP_RESOURCE_TBL_KEY);
+ if (!dev_mutex)
+ return -ENOMEM;
+
+ if (nfp_cpp_mutex_lock(dev_mutex)) {
+ nfp_err(cpp, "Error: failed to claim resource table mutex\n");
+ nfp_cpp_mutex_free(dev_mutex);
+ return -EINVAL;
+ }
+
+ /* Resource 0 is the dev_mutex, start from 1 */
+ for (i = 1; i < NFP_RESOURCE_TBL_ENTRIES; i++) {
+ u64 addr = NFP_RESOURCE_TBL_BASE +
+ sizeof(struct nfp_resource_entry) * i;
+
+ err = nfp_cpp_mutex_reclaim(cpp, NFP_RESOURCE_TBL_TARGET, addr);
+ if (err < 0) {
+ nfp_err(cpp,
+ "Error: failed to reclaim resource %d mutex\n",
+ i);
+ goto err_unlock;
+ }
+ if (err)
+ nfp_warn(cpp, "Warning: busted resource %d mutex\n", i);
+ }
+
+ err = 0;
+err_unlock:
+ nfp_cpp_mutex_unlock(dev_mutex);
+ nfp_cpp_mutex_free(dev_mutex);
+
+ return err;
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c
index 4926c5532fba..39124b594a36 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_debug.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c
@@ -419,6 +419,7 @@ struct phy_defs {
#define NUM_RSS_MEM_TYPES 5
#define NUM_BIG_RAM_TYPES 3
+#define BIG_RAM_NAME_LEN 3
#define NUM_PHY_TBUS_ADDRESSES 2048
#define PHY_DUMP_SIZE_DWORDS (NUM_PHY_TBUS_ADDRESSES / 2)
@@ -3650,8 +3651,8 @@ static u32 qed_grc_dump_big_ram(struct qed_hwfn *p_hwfn,
BIT(big_ram->is_256b_bit_offset[dev_data->chip_id]) ? 256
: 128;
- strscpy(type_name, big_ram->instance_name, sizeof(type_name));
- strscpy(mem_name, big_ram->instance_name, sizeof(mem_name));
+ strncpy(type_name, big_ram->instance_name, BIG_RAM_NAME_LEN);
+ strncpy(mem_name, big_ram->instance_name, BIG_RAM_NAME_LEN);
/* Dump memory header */
offset += qed_grc_dump_mem_hdr(p_hwfn,
@@ -7778,6 +7779,57 @@ int qed_dbg_igu_fifo_size(struct qed_dev *cdev)
return qed_dbg_feature_size(cdev, DBG_FEATURE_IGU_FIFO);
}
+int qed_dbg_nvm_image_length(struct qed_hwfn *p_hwfn,
+ enum qed_nvm_images image_id, u32 *length)
+{
+ struct qed_nvm_image_att image_att;
+ int rc;
+
+ *length = 0;
+ rc = qed_mcp_get_nvm_image_att(p_hwfn, image_id, &image_att);
+ if (rc)
+ return rc;
+
+ *length = image_att.length;
+
+ return rc;
+}
+
+int qed_dbg_nvm_image(struct qed_dev *cdev, void *buffer,
+ u32 *num_dumped_bytes, enum qed_nvm_images image_id)
+{
+ struct qed_hwfn *p_hwfn =
+ &cdev->hwfns[cdev->dbg_params.engine_for_debug];
+ u32 len_rounded, i;
+ __be32 val;
+ int rc;
+
+ *num_dumped_bytes = 0;
+ rc = qed_dbg_nvm_image_length(p_hwfn, image_id, &len_rounded);
+ if (rc)
+ return rc;
+
+ DP_NOTICE(p_hwfn->cdev,
+ "Collecting a debug feature [\"nvram image %d\"]\n",
+ image_id);
+
+ len_rounded = roundup(len_rounded, sizeof(u32));
+ rc = qed_mcp_get_nvm_image(p_hwfn, image_id, buffer, len_rounded);
+ if (rc)
+ return rc;
+
+ /* QED_NVM_IMAGE_NVM_META image is not swapped like other images */
+ if (image_id != QED_NVM_IMAGE_NVM_META)
+ for (i = 0; i < len_rounded; i += 4) {
+ val = cpu_to_be32(*(u32 *)(buffer + i));
+ *(u32 *)(buffer + i) = val;
+ }
+
+ *num_dumped_bytes = len_rounded;
+
+ return rc;
+}
+
int qed_dbg_protection_override(struct qed_dev *cdev, void *buffer,
u32 *num_dumped_bytes)
{
@@ -7831,6 +7883,9 @@ enum debug_print_features {
IGU_FIFO = 6,
PHY = 7,
FW_ASSERTS = 8,
+ NVM_CFG1 = 9,
+ DEFAULT_CFG = 10,
+ NVM_META = 11,
};
static u32 qed_calc_regdump_header(enum debug_print_features feature,
@@ -7965,13 +8020,61 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer)
DP_ERR(cdev, "qed_dbg_mcp_trace failed. rc = %d\n", rc);
}
+ /* nvm cfg1 */
+ rc = qed_dbg_nvm_image(cdev,
+ (u8 *)buffer + offset + REGDUMP_HEADER_SIZE,
+ &feature_size, QED_NVM_IMAGE_NVM_CFG1);
+ if (!rc) {
+ *(u32 *)((u8 *)buffer + offset) =
+ qed_calc_regdump_header(NVM_CFG1, cur_engine,
+ feature_size, omit_engine);
+ offset += (feature_size + REGDUMP_HEADER_SIZE);
+ } else if (rc != -ENOENT) {
+ DP_ERR(cdev,
+ "qed_dbg_nvm_image failed for image %d (%s), rc = %d\n",
+ QED_NVM_IMAGE_NVM_CFG1, "QED_NVM_IMAGE_NVM_CFG1", rc);
+ }
+
+ /* nvm default */
+ rc = qed_dbg_nvm_image(cdev,
+ (u8 *)buffer + offset + REGDUMP_HEADER_SIZE,
+ &feature_size, QED_NVM_IMAGE_DEFAULT_CFG);
+ if (!rc) {
+ *(u32 *)((u8 *)buffer + offset) =
+ qed_calc_regdump_header(DEFAULT_CFG, cur_engine,
+ feature_size, omit_engine);
+ offset += (feature_size + REGDUMP_HEADER_SIZE);
+ } else if (rc != -ENOENT) {
+ DP_ERR(cdev,
+ "qed_dbg_nvm_image failed for image %d (%s), rc = %d\n",
+ QED_NVM_IMAGE_DEFAULT_CFG, "QED_NVM_IMAGE_DEFAULT_CFG",
+ rc);
+ }
+
+ /* nvm meta */
+ rc = qed_dbg_nvm_image(cdev,
+ (u8 *)buffer + offset + REGDUMP_HEADER_SIZE,
+ &feature_size, QED_NVM_IMAGE_NVM_META);
+ if (!rc) {
+ *(u32 *)((u8 *)buffer + offset) =
+ qed_calc_regdump_header(NVM_META, cur_engine,
+ feature_size, omit_engine);
+ offset += (feature_size + REGDUMP_HEADER_SIZE);
+ } else if (rc != -ENOENT) {
+ DP_ERR(cdev,
+ "qed_dbg_nvm_image failed for image %d (%s), rc = %d\n",
+ QED_NVM_IMAGE_NVM_META, "QED_NVM_IMAGE_NVM_META", rc);
+ }
+
return 0;
}
int qed_dbg_all_data_size(struct qed_dev *cdev)
{
+ struct qed_hwfn *p_hwfn =
+ &cdev->hwfns[cdev->dbg_params.engine_for_debug];
+ u32 regs_len = 0, image_len = 0;
u8 cur_engine, org_engine;
- u32 regs_len = 0;
org_engine = qed_get_debug_engine(cdev);
for (cur_engine = 0; cur_engine < cdev->num_hwfns; cur_engine++) {
@@ -7993,6 +8096,15 @@ int qed_dbg_all_data_size(struct qed_dev *cdev)
/* Engine common */
regs_len += REGDUMP_HEADER_SIZE + qed_dbg_mcp_trace_size(cdev);
+ qed_dbg_nvm_image_length(p_hwfn, QED_NVM_IMAGE_NVM_CFG1, &image_len);
+ if (image_len)
+ regs_len += REGDUMP_HEADER_SIZE + image_len;
+ qed_dbg_nvm_image_length(p_hwfn, QED_NVM_IMAGE_DEFAULT_CFG, &image_len);
+ if (image_len)
+ regs_len += REGDUMP_HEADER_SIZE + image_len;
+ qed_dbg_nvm_image_length(p_hwfn, QED_NVM_IMAGE_NVM_META, &image_len);
+ if (image_len)
+ regs_len += REGDUMP_HEADER_SIZE + image_len;
return regs_len;
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index e874504e8b28..8b1b7e8ca56c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -2850,6 +2850,24 @@ static int qed_fp_cqe_completion(struct qed_dev *dev,
cqe);
}
+static int qed_req_bulletin_update_mac(struct qed_dev *cdev, u8 *mac)
+{
+ int i, ret;
+
+ if (IS_PF(cdev))
+ return 0;
+
+ for_each_hwfn(cdev, i) {
+ struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+
+ ret = qed_vf_pf_bulletin_update_mac(p_hwfn, mac);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
#ifdef CONFIG_QED_SRIOV
extern const struct qed_iov_hv_ops qed_iov_ops_pass;
#endif
@@ -2887,6 +2905,7 @@ static const struct qed_eth_ops qed_eth_ops_pass = {
.ntuple_filter_config = &qed_ntuple_arfs_filter_config,
.configure_arfs_searcher = &qed_configure_arfs_searcher,
.get_coalesce = &qed_get_coalesce,
+ .req_bulletin_update_mac = &qed_req_bulletin_update_mac,
};
const struct qed_eth_ops *qed_get_eth_ops(void)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 9854aa9139af..d1d3787affe8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -1894,15 +1894,8 @@ static int qed_nvm_get_image(struct qed_dev *cdev, enum qed_nvm_images type,
u8 *buf, u16 len)
{
struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
- struct qed_ptt *ptt = qed_ptt_acquire(hwfn);
- int rc;
-
- if (!ptt)
- return -EAGAIN;
- rc = qed_mcp_get_nvm_image(hwfn, ptt, type, buf, len);
- qed_ptt_release(hwfn, ptt);
- return rc;
+ return qed_mcp_get_nvm_image(hwfn, type, buf, len);
}
static int qed_set_coalesce(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index ec0d425766a7..0550f0ee11b3 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -2529,9 +2529,8 @@ err0:
return rc;
}
-static int
+int
qed_mcp_get_nvm_image_att(struct qed_hwfn *p_hwfn,
- struct qed_ptt *p_ptt,
enum qed_nvm_images image_id,
struct qed_nvm_image_att *p_image_att)
{
@@ -2546,6 +2545,15 @@ qed_mcp_get_nvm_image_att(struct qed_hwfn *p_hwfn,
case QED_NVM_IMAGE_FCOE_CFG:
type = NVM_TYPE_FCOE_CFG;
break;
+ case QED_NVM_IMAGE_NVM_CFG1:
+ type = NVM_TYPE_NVM_CFG1;
+ break;
+ case QED_NVM_IMAGE_DEFAULT_CFG:
+ type = NVM_TYPE_DEFAULT_CFG;
+ break;
+ case QED_NVM_IMAGE_NVM_META:
+ type = NVM_TYPE_META;
+ break;
default:
DP_NOTICE(p_hwfn, "Unknown request of image_id %08x\n",
image_id);
@@ -2569,7 +2577,6 @@ qed_mcp_get_nvm_image_att(struct qed_hwfn *p_hwfn,
}
int qed_mcp_get_nvm_image(struct qed_hwfn *p_hwfn,
- struct qed_ptt *p_ptt,
enum qed_nvm_images image_id,
u8 *p_buffer, u32 buffer_len)
{
@@ -2578,7 +2585,7 @@ int qed_mcp_get_nvm_image(struct qed_hwfn *p_hwfn,
memset(p_buffer, 0, buffer_len);
- rc = qed_mcp_get_nvm_image_att(p_hwfn, p_ptt, image_id, &image_att);
+ rc = qed_mcp_get_nvm_image_att(p_hwfn, image_id, &image_att);
if (rc)
return rc;
@@ -2590,9 +2597,6 @@ int qed_mcp_get_nvm_image(struct qed_hwfn *p_hwfn,
return -EINVAL;
}
- /* Each NVM image is suffixed by CRC; Upper-layer has no need for it */
- image_att.length -= 4;
-
if (image_att.length > buffer_len) {
DP_VERBOSE(p_hwfn,
QED_MSG_STORAGE,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 8a5c988d0c3c..3af3896420b9 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -486,7 +486,20 @@ struct qed_nvm_image_att {
* @brief Allows reading a whole nvram image
*
* @param p_hwfn
- * @param p_ptt
+ * @param image_id - image to get attributes for
+ * @param p_image_att - image attributes structure into which to fill data
+ *
+ * @return int - 0 - operation was successful.
+ */
+int
+qed_mcp_get_nvm_image_att(struct qed_hwfn *p_hwfn,
+ enum qed_nvm_images image_id,
+ struct qed_nvm_image_att *p_image_att);
+
+/**
+ * @brief Allows reading a whole nvram image
+ *
+ * @param p_hwfn
* @param image_id - image requested for reading
* @param p_buffer - allocated buffer into which to fill data
* @param buffer_len - length of the allocated buffer.
@@ -494,7 +507,6 @@ struct qed_nvm_image_att {
* @return 0 iff p_buffer now contains the nvram image.
*/
int qed_mcp_get_nvm_image(struct qed_hwfn *p_hwfn,
- struct qed_ptt *p_ptt,
enum qed_nvm_images image_id,
u8 *p_buffer, u32 buffer_len);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index 5acb91b3564c..f01bf52bc381 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -48,7 +48,7 @@ static int qed_sriov_eqe_event(struct qed_hwfn *p_hwfn,
u8 opcode,
__le16 echo,
union event_ring_data *data, u8 fw_return_code);
-
+static int qed_iov_bulletin_set_mac(struct qed_hwfn *p_hwfn, u8 *mac, int vfid);
static u8 qed_vf_calculate_legacy(struct qed_vf_info *p_vf)
{
@@ -1790,7 +1790,8 @@ static int qed_iov_configure_vport_forced(struct qed_hwfn *p_hwfn,
if (!p_vf->vport_instance)
return -EINVAL;
- if (events & BIT(MAC_ADDR_FORCED)) {
+ if ((events & BIT(MAC_ADDR_FORCED)) ||
+ p_vf->p_vf_info.is_trusted_configured) {
/* Since there's no way [currently] of removing the MAC,
* we can always assume this means we need to force it.
*/
@@ -1809,8 +1810,12 @@ static int qed_iov_configure_vport_forced(struct qed_hwfn *p_hwfn,
"PF failed to configure MAC for VF\n");
return rc;
}
-
- p_vf->configured_features |= 1 << MAC_ADDR_FORCED;
+ if (p_vf->p_vf_info.is_trusted_configured)
+ p_vf->configured_features |=
+ BIT(VFPF_BULLETIN_MAC_ADDR);
+ else
+ p_vf->configured_features |=
+ BIT(MAC_ADDR_FORCED);
}
if (events & BIT(VLAN_ADDR_FORCED)) {
@@ -3170,6 +3175,10 @@ static int qed_iov_vf_update_mac_shadow(struct qed_hwfn *p_hwfn,
if (p_vf->bulletin.p_virt->valid_bitmap & BIT(MAC_ADDR_FORCED))
return 0;
+ /* Don't keep track of shadow copy since we don't intend to restore. */
+ if (p_vf->p_vf_info.is_trusted_configured)
+ return 0;
+
/* First remove entries and then add new ones */
if (p_params->opcode == QED_FILTER_REMOVE) {
for (i = 0; i < QED_ETH_VF_NUM_MAC_FILTERS; i++) {
@@ -3244,9 +3253,17 @@ static int qed_iov_chk_ucast(struct qed_hwfn *hwfn,
/* No real decision to make; Store the configured MAC */
if (params->type == QED_FILTER_MAC ||
- params->type == QED_FILTER_MAC_VLAN)
+ params->type == QED_FILTER_MAC_VLAN) {
ether_addr_copy(vf->mac, params->mac);
+ if (vf->is_trusted_configured) {
+ qed_iov_bulletin_set_mac(hwfn, vf->mac, vfid);
+
+ /* Update and post bulleitin again */
+ qed_schedule_iov(hwfn, QED_IOV_WQ_BULLETIN_UPDATE_FLAG);
+ }
+ }
+
return 0;
}
@@ -3803,6 +3820,40 @@ static void qed_iov_get_link(struct qed_hwfn *p_hwfn,
__qed_vf_get_link_caps(p_hwfn, p_caps, p_bulletin);
}
+static int
+qed_iov_vf_pf_bulletin_update_mac(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ struct qed_vf_info *p_vf)
+{
+ struct qed_bulletin_content *p_bulletin = p_vf->bulletin.p_virt;
+ struct qed_iov_vf_mbx *mbx = &p_vf->vf_mbx;
+ struct vfpf_bulletin_update_mac_tlv *p_req;
+ u8 status = PFVF_STATUS_SUCCESS;
+ int rc = 0;
+
+ if (!p_vf->p_vf_info.is_trusted_configured) {
+ DP_VERBOSE(p_hwfn,
+ QED_MSG_IOV,
+ "Blocking bulletin update request from untrusted VF[%d]\n",
+ p_vf->abs_vf_id);
+ status = PFVF_STATUS_NOT_SUPPORTED;
+ rc = -EINVAL;
+ goto send_status;
+ }
+
+ p_req = &mbx->req_virt->bulletin_update_mac;
+ ether_addr_copy(p_bulletin->mac, p_req->mac);
+ DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+ "Updated bulletin of VF[%d] with requested MAC[%pM]\n",
+ p_vf->abs_vf_id, p_req->mac);
+
+send_status:
+ qed_iov_prepare_resp(p_hwfn, p_ptt, p_vf,
+ CHANNEL_TLV_BULLETIN_UPDATE_MAC,
+ sizeof(struct pfvf_def_resp_tlv), status);
+ return rc;
+}
+
static void qed_iov_process_mbx_req(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt, int vfid)
{
@@ -3882,6 +3933,9 @@ static void qed_iov_process_mbx_req(struct qed_hwfn *p_hwfn,
case CHANNEL_TLV_COALESCE_READ:
qed_iov_vf_pf_get_coalesce(p_hwfn, p_ptt, p_vf);
break;
+ case CHANNEL_TLV_BULLETIN_UPDATE_MAC:
+ qed_iov_vf_pf_bulletin_update_mac(p_hwfn, p_ptt, p_vf);
+ break;
}
} else if (qed_iov_tlv_supported(mbx->first_tlv.tl.type)) {
DP_VERBOSE(p_hwfn, QED_MSG_IOV,
@@ -4081,16 +4135,60 @@ static void qed_iov_bulletin_set_forced_mac(struct qed_hwfn *p_hwfn,
return;
}
- feature = 1 << MAC_ADDR_FORCED;
+ if (vf_info->p_vf_info.is_trusted_configured) {
+ feature = BIT(VFPF_BULLETIN_MAC_ADDR);
+ /* Trust mode will disable Forced MAC */
+ vf_info->bulletin.p_virt->valid_bitmap &=
+ ~BIT(MAC_ADDR_FORCED);
+ } else {
+ feature = BIT(MAC_ADDR_FORCED);
+ /* Forced MAC will disable MAC_ADDR */
+ vf_info->bulletin.p_virt->valid_bitmap &=
+ ~BIT(VFPF_BULLETIN_MAC_ADDR);
+ }
+
memcpy(vf_info->bulletin.p_virt->mac, mac, ETH_ALEN);
vf_info->bulletin.p_virt->valid_bitmap |= feature;
- /* Forced MAC will disable MAC_ADDR */
- vf_info->bulletin.p_virt->valid_bitmap &= ~BIT(VFPF_BULLETIN_MAC_ADDR);
qed_iov_configure_vport_forced(p_hwfn, vf_info, feature);
}
+static int qed_iov_bulletin_set_mac(struct qed_hwfn *p_hwfn, u8 *mac, int vfid)
+{
+ struct qed_vf_info *vf_info;
+ u64 feature;
+
+ vf_info = qed_iov_get_vf_info(p_hwfn, (u16)vfid, true);
+ if (!vf_info) {
+ DP_NOTICE(p_hwfn->cdev, "Can not set MAC, invalid vfid [%d]\n",
+ vfid);
+ return -EINVAL;
+ }
+
+ if (vf_info->b_malicious) {
+ DP_NOTICE(p_hwfn->cdev, "Can't set MAC to malicious VF [%d]\n",
+ vfid);
+ return -EINVAL;
+ }
+
+ if (vf_info->bulletin.p_virt->valid_bitmap & BIT(MAC_ADDR_FORCED)) {
+ DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+ "Can not set MAC, Forced MAC is configured\n");
+ return -EINVAL;
+ }
+
+ feature = BIT(VFPF_BULLETIN_MAC_ADDR);
+ ether_addr_copy(vf_info->bulletin.p_virt->mac, mac);
+
+ vf_info->bulletin.p_virt->valid_bitmap |= feature;
+
+ if (vf_info->p_vf_info.is_trusted_configured)
+ qed_iov_configure_vport_forced(p_hwfn, vf_info, feature);
+
+ return 0;
+}
+
static void qed_iov_bulletin_set_forced_vlan(struct qed_hwfn *p_hwfn,
u16 pvid, int vfid)
{
@@ -4204,6 +4302,21 @@ out:
return rc;
}
+static u8 *qed_iov_bulletin_get_mac(struct qed_hwfn *p_hwfn, u16 rel_vf_id)
+{
+ struct qed_vf_info *p_vf;
+
+ p_vf = qed_iov_get_vf_info(p_hwfn, rel_vf_id, true);
+ if (!p_vf || !p_vf->bulletin.p_virt)
+ return NULL;
+
+ if (!(p_vf->bulletin.p_virt->valid_bitmap &
+ BIT(VFPF_BULLETIN_MAC_ADDR)))
+ return NULL;
+
+ return p_vf->bulletin.p_virt->mac;
+}
+
static u8 *qed_iov_bulletin_get_forced_mac(struct qed_hwfn *p_hwfn,
u16 rel_vf_id)
{
@@ -4493,8 +4606,12 @@ static int qed_sriov_pf_set_mac(struct qed_dev *cdev, u8 *mac, int vfid)
if (!vf_info)
continue;
- /* Set the forced MAC, and schedule the IOV task */
- ether_addr_copy(vf_info->forced_mac, mac);
+ /* Set the MAC, and schedule the IOV task */
+ if (vf_info->is_trusted_configured)
+ ether_addr_copy(vf_info->mac, mac);
+ else
+ ether_addr_copy(vf_info->forced_mac, mac);
+
qed_schedule_iov(hwfn, QED_IOV_WQ_SET_UNICAST_FILTER_FLAG);
}
@@ -4802,6 +4919,33 @@ static void qed_handle_vf_msg(struct qed_hwfn *hwfn)
qed_ptt_release(hwfn, ptt);
}
+static bool qed_pf_validate_req_vf_mac(struct qed_hwfn *hwfn,
+ u8 *mac,
+ struct qed_public_vf_info *info)
+{
+ if (info->is_trusted_configured) {
+ if (is_valid_ether_addr(info->mac) &&
+ (!mac || !ether_addr_equal(mac, info->mac)))
+ return true;
+ } else {
+ if (is_valid_ether_addr(info->forced_mac) &&
+ (!mac || !ether_addr_equal(mac, info->forced_mac)))
+ return true;
+ }
+
+ return false;
+}
+
+static void qed_set_bulletin_mac(struct qed_hwfn *hwfn,
+ struct qed_public_vf_info *info,
+ int vfid)
+{
+ if (info->is_trusted_configured)
+ qed_iov_bulletin_set_mac(hwfn, info->mac, vfid);
+ else
+ qed_iov_bulletin_set_forced_mac(hwfn, info->forced_mac, vfid);
+}
+
static void qed_handle_pf_set_vf_unicast(struct qed_hwfn *hwfn)
{
int i;
@@ -4816,18 +4960,20 @@ static void qed_handle_pf_set_vf_unicast(struct qed_hwfn *hwfn)
continue;
/* Update data on bulletin board */
- mac = qed_iov_bulletin_get_forced_mac(hwfn, i);
- if (is_valid_ether_addr(info->forced_mac) &&
- (!mac || !ether_addr_equal(mac, info->forced_mac))) {
+ if (info->is_trusted_configured)
+ mac = qed_iov_bulletin_get_mac(hwfn, i);
+ else
+ mac = qed_iov_bulletin_get_forced_mac(hwfn, i);
+
+ if (qed_pf_validate_req_vf_mac(hwfn, mac, info)) {
DP_VERBOSE(hwfn,
QED_MSG_IOV,
"Handling PF setting of VF MAC to VF 0x%02x [Abs 0x%02x]\n",
i,
hwfn->cdev->p_iov_info->first_vf_in_pf + i);
- /* Update bulletin board with forced MAC */
- qed_iov_bulletin_set_forced_mac(hwfn,
- info->forced_mac, i);
+ /* Update bulletin board with MAC */
+ qed_set_bulletin_mac(hwfn, info, i);
update = true;
}
@@ -4867,6 +5013,72 @@ static void qed_handle_bulletin_post(struct qed_hwfn *hwfn)
qed_ptt_release(hwfn, ptt);
}
+static void qed_update_mac_for_vf_trust_change(struct qed_hwfn *hwfn, int vf_id)
+{
+ struct qed_public_vf_info *vf_info;
+ struct qed_vf_info *vf;
+ u8 *force_mac;
+ int i;
+
+ vf_info = qed_iov_get_public_vf_info(hwfn, vf_id, true);
+ vf = qed_iov_get_vf_info(hwfn, vf_id, true);
+
+ if (!vf_info || !vf)
+ return;
+
+ /* Force MAC converted to generic MAC in case of VF trust on */
+ if (vf_info->is_trusted_configured &&
+ (vf->bulletin.p_virt->valid_bitmap & BIT(MAC_ADDR_FORCED))) {
+ force_mac = qed_iov_bulletin_get_forced_mac(hwfn, vf_id);
+
+ if (force_mac) {
+ /* Clear existing shadow copy of MAC to have a clean
+ * slate.
+ */
+ for (i = 0; i < QED_ETH_VF_NUM_MAC_FILTERS; i++) {
+ if (ether_addr_equal(vf->shadow_config.macs[i],
+ vf_info->mac)) {
+ memset(vf->shadow_config.macs[i], 0,
+ ETH_ALEN);
+ DP_VERBOSE(hwfn, QED_MSG_IOV,
+ "Shadow MAC %pM removed for VF 0x%02x, VF trust mode is ON\n",
+ vf_info->mac, vf_id);
+ break;
+ }
+ }
+
+ ether_addr_copy(vf_info->mac, force_mac);
+ memset(vf_info->forced_mac, 0, ETH_ALEN);
+ vf->bulletin.p_virt->valid_bitmap &=
+ ~BIT(MAC_ADDR_FORCED);
+ qed_schedule_iov(hwfn, QED_IOV_WQ_BULLETIN_UPDATE_FLAG);
+ }
+ }
+
+ /* Update shadow copy with VF MAC when trust mode is turned off */
+ if (!vf_info->is_trusted_configured) {
+ u8 empty_mac[ETH_ALEN];
+
+ memset(empty_mac, 0, ETH_ALEN);
+ for (i = 0; i < QED_ETH_VF_NUM_MAC_FILTERS; i++) {
+ if (ether_addr_equal(vf->shadow_config.macs[i],
+ empty_mac)) {
+ ether_addr_copy(vf->shadow_config.macs[i],
+ vf_info->mac);
+ DP_VERBOSE(hwfn, QED_MSG_IOV,
+ "Shadow is updated with %pM for VF 0x%02x, VF trust mode is OFF\n",
+ vf_info->mac, vf_id);
+ break;
+ }
+ }
+ /* Clear bulletin when trust mode is turned off,
+ * to have a clean slate for next (normal) operations.
+ */
+ qed_iov_bulletin_set_mac(hwfn, empty_mac, vf_id);
+ qed_schedule_iov(hwfn, QED_IOV_WQ_BULLETIN_UPDATE_FLAG);
+ }
+}
+
static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn)
{
struct qed_sp_vport_update_params params;
@@ -4890,6 +5102,9 @@ static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn)
continue;
vf_info->is_trusted_configured = vf_info->is_trusted_request;
+ /* Handle forced MAC mode */
+ qed_update_mac_for_vf_trust_change(hwfn, i);
+
/* Validate that the VF has a configured vport */
vf = qed_iov_get_vf_info(hwfn, i, true);
if (!vf->vport_instance)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c
index 91b5e9f02a62..2d7fcd6a0777 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c
@@ -1375,6 +1375,35 @@ exit:
}
int
+qed_vf_pf_bulletin_update_mac(struct qed_hwfn *p_hwfn,
+ u8 *p_mac)
+{
+ struct qed_vf_iov *p_iov = p_hwfn->vf_iov_info;
+ struct vfpf_bulletin_update_mac_tlv *p_req;
+ struct pfvf_def_resp_tlv *p_resp;
+ int rc;
+
+ if (!p_mac)
+ return -EINVAL;
+
+ /* clear mailbox and prep header tlv */
+ p_req = qed_vf_pf_prep(p_hwfn, CHANNEL_TLV_BULLETIN_UPDATE_MAC,
+ sizeof(*p_req));
+ ether_addr_copy(p_req->mac, p_mac);
+ DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+ "Requesting bulletin update for MAC[%pM]\n", p_mac);
+
+ /* add list termination tlv */
+ qed_add_tlv(p_hwfn, &p_iov->offset, CHANNEL_TLV_LIST_END,
+ sizeof(struct channel_list_end_tlv));
+
+ p_resp = &p_iov->pf2vf_reply->default_resp;
+ rc = qed_send_msg2pf(p_hwfn, &p_resp->hdr.status, sizeof(*p_resp));
+ qed_vf_pf_req_end(p_hwfn, rc);
+ return rc;
+}
+
+int
qed_vf_pf_set_coalesce(struct qed_hwfn *p_hwfn,
u16 rx_coal, u16 tx_coal, struct qed_queue_cid *p_cid)
{
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.h b/drivers/net/ethernet/qlogic/qed/qed_vf.h
index 97d44dfb38ca..4f05d5eb3cf5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.h
@@ -518,6 +518,12 @@ struct pfvf_read_coal_resp_tlv {
u8 padding[6];
};
+struct vfpf_bulletin_update_mac_tlv {
+ struct vfpf_first_tlv first_tlv;
+ u8 mac[ETH_ALEN];
+ u8 padding[2];
+};
+
union vfpf_tlvs {
struct vfpf_first_tlv first_tlv;
struct vfpf_acquire_tlv acquire;
@@ -532,6 +538,7 @@ union vfpf_tlvs {
struct vfpf_update_tunn_param_tlv tunn_param_update;
struct vfpf_update_coalesce update_coalesce;
struct vfpf_read_coal_req_tlv read_coal_req;
+ struct vfpf_bulletin_update_mac_tlv bulletin_update_mac;
struct tlv_buffer_size tlv_buf_size;
};
@@ -650,6 +657,7 @@ enum {
CHANNEL_TLV_COALESCE_UPDATE,
CHANNEL_TLV_QID,
CHANNEL_TLV_COALESCE_READ,
+ CHANNEL_TLV_BULLETIN_UPDATE_MAC,
CHANNEL_TLV_MAX,
/* Required for iterating over vport-update tlvs.
@@ -1042,6 +1050,13 @@ int qed_vf_pf_tunnel_param_update(struct qed_hwfn *p_hwfn,
struct qed_tunnel_info *p_tunn);
u32 qed_vf_hw_bar_size(struct qed_hwfn *p_hwfn, enum BAR_ID bar_id);
+/**
+ * @brief - Ask PF to update the MAC address in it's bulletin board
+ *
+ * @param p_mac - mac address to be updated in bulletin board
+ */
+int qed_vf_pf_bulletin_update_mac(struct qed_hwfn *p_hwfn, u8 *p_mac);
+
#else
static inline void qed_vf_get_link_params(struct qed_hwfn *p_hwfn,
struct qed_mcp_link_params *params)
@@ -1228,6 +1243,12 @@ static inline int qed_vf_pf_tunnel_param_update(struct qed_hwfn *p_hwfn,
return -EINVAL;
}
+static inline int qed_vf_pf_bulletin_update_mac(struct qed_hwfn *p_hwfn,
+ u8 *p_mac)
+{
+ return -EINVAL;
+}
+
static inline u32
qed_vf_hw_bar_size(struct qed_hwfn *p_hwfn,
enum BAR_ID bar_id)
diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c
index 6687e04d1558..43569b1839be 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_filter.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c
@@ -550,8 +550,7 @@ void qede_force_mac(void *dev, u8 *mac, bool forced)
__qede_lock(edev);
- /* MAC hints take effect only if we haven't set one already */
- if (is_valid_ether_addr(edev->ndev->dev_addr) && !forced) {
+ if (!is_valid_ether_addr(mac)) {
__qede_unlock(edev);
return;
}
@@ -1161,6 +1160,10 @@ int qede_set_mac_addr(struct net_device *ndev, void *p)
if (edev->state != QEDE_STATE_OPEN) {
DP_VERBOSE(edev, NETIF_MSG_IFDOWN,
"The device is currently down\n");
+ /* Ask PF to explicitly update a copy in bulletin board */
+ if (IS_VF(edev) && edev->ops->req_bulletin_update_mac)
+ edev->ops->req_bulletin_update_mac(edev->cdev,
+ ndev->dev_addr);
goto out;
}
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 604ae78381ae..6d99b141a7aa 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -84,12 +84,11 @@
The RTL chips use a 64 element hash table based on the Ethernet CRC. */
static const int multicast_filter_limit = 32;
-#define MAX_READ_REQUEST_SHIFT 12
#define TX_DMA_BURST 7 /* Maximum PCI burst, '7' is unlimited */
#define InterFrameGap 0x03 /* 3 means InterFrameGap = the shortest one */
#define R8169_REGS_SIZE 256
-#define R8169_NAPI_WEIGHT 64
+#define R8169_RX_BUF_SIZE (SZ_16K - 1)
#define NUM_TX_DESC 64 /* Number of Tx descriptor registers */
#define NUM_RX_DESC 256U /* Number of Rx descriptor registers */
#define R8169_TX_RING_BYTES (NUM_TX_DESC * sizeof(struct TxDesc))
@@ -172,12 +171,11 @@ enum rtl_tx_desc_version {
#define JUMBO_7K (7*1024 - ETH_HLEN - 2)
#define JUMBO_9K (9*1024 - ETH_HLEN - 2)
-#define _R(NAME,TD,FW,SZ,B) { \
+#define _R(NAME,TD,FW,SZ) { \
.name = NAME, \
.txd_version = TD, \
.fw_name = FW, \
.jumbo_max = SZ, \
- .jumbo_tx_csum = B \
}
static const struct {
@@ -185,135 +183,111 @@ static const struct {
enum rtl_tx_desc_version txd_version;
const char *fw_name;
u16 jumbo_max;
- bool jumbo_tx_csum;
} rtl_chip_infos[] = {
/* PCI devices. */
[RTL_GIGA_MAC_VER_01] =
- _R("RTL8169", RTL_TD_0, NULL, JUMBO_7K, true),
+ _R("RTL8169", RTL_TD_0, NULL, JUMBO_7K),
[RTL_GIGA_MAC_VER_02] =
- _R("RTL8169s", RTL_TD_0, NULL, JUMBO_7K, true),
+ _R("RTL8169s", RTL_TD_0, NULL, JUMBO_7K),
[RTL_GIGA_MAC_VER_03] =
- _R("RTL8110s", RTL_TD_0, NULL, JUMBO_7K, true),
+ _R("RTL8110s", RTL_TD_0, NULL, JUMBO_7K),
[RTL_GIGA_MAC_VER_04] =
- _R("RTL8169sb/8110sb", RTL_TD_0, NULL, JUMBO_7K, true),
+ _R("RTL8169sb/8110sb", RTL_TD_0, NULL, JUMBO_7K),
[RTL_GIGA_MAC_VER_05] =
- _R("RTL8169sc/8110sc", RTL_TD_0, NULL, JUMBO_7K, true),
+ _R("RTL8169sc/8110sc", RTL_TD_0, NULL, JUMBO_7K),
[RTL_GIGA_MAC_VER_06] =
- _R("RTL8169sc/8110sc", RTL_TD_0, NULL, JUMBO_7K, true),
+ _R("RTL8169sc/8110sc", RTL_TD_0, NULL, JUMBO_7K),
/* PCI-E devices. */
[RTL_GIGA_MAC_VER_07] =
- _R("RTL8102e", RTL_TD_1, NULL, JUMBO_1K, true),
+ _R("RTL8102e", RTL_TD_1, NULL, JUMBO_1K),
[RTL_GIGA_MAC_VER_08] =
- _R("RTL8102e", RTL_TD_1, NULL, JUMBO_1K, true),
+ _R("RTL8102e", RTL_TD_1, NULL, JUMBO_1K),
[RTL_GIGA_MAC_VER_09] =
- _R("RTL8102e", RTL_TD_1, NULL, JUMBO_1K, true),
+ _R("RTL8102e", RTL_TD_1, NULL, JUMBO_1K),
[RTL_GIGA_MAC_VER_10] =
- _R("RTL8101e", RTL_TD_0, NULL, JUMBO_1K, true),
+ _R("RTL8101e", RTL_TD_0, NULL, JUMBO_1K),
[RTL_GIGA_MAC_VER_11] =
- _R("RTL8168b/8111b", RTL_TD_0, NULL, JUMBO_4K, false),
+ _R("RTL8168b/8111b", RTL_TD_0, NULL, JUMBO_4K),
[RTL_GIGA_MAC_VER_12] =
- _R("RTL8168b/8111b", RTL_TD_0, NULL, JUMBO_4K, false),
+ _R("RTL8168b/8111b", RTL_TD_0, NULL, JUMBO_4K),
[RTL_GIGA_MAC_VER_13] =
- _R("RTL8101e", RTL_TD_0, NULL, JUMBO_1K, true),
+ _R("RTL8101e", RTL_TD_0, NULL, JUMBO_1K),
[RTL_GIGA_MAC_VER_14] =
- _R("RTL8100e", RTL_TD_0, NULL, JUMBO_1K, true),
+ _R("RTL8100e", RTL_TD_0, NULL, JUMBO_1K),
[RTL_GIGA_MAC_VER_15] =
- _R("RTL8100e", RTL_TD_0, NULL, JUMBO_1K, true),
+ _R("RTL8100e", RTL_TD_0, NULL, JUMBO_1K),
[RTL_GIGA_MAC_VER_16] =
- _R("RTL8101e", RTL_TD_0, NULL, JUMBO_1K, true),
+ _R("RTL8101e", RTL_TD_0, NULL, JUMBO_1K),
[RTL_GIGA_MAC_VER_17] =
- _R("RTL8168b/8111b", RTL_TD_0, NULL, JUMBO_4K, false),
+ _R("RTL8168b/8111b", RTL_TD_0, NULL, JUMBO_4K),
[RTL_GIGA_MAC_VER_18] =
- _R("RTL8168cp/8111cp", RTL_TD_1, NULL, JUMBO_6K, false),
+ _R("RTL8168cp/8111cp", RTL_TD_1, NULL, JUMBO_6K),
[RTL_GIGA_MAC_VER_19] =
- _R("RTL8168c/8111c", RTL_TD_1, NULL, JUMBO_6K, false),
+ _R("RTL8168c/8111c", RTL_TD_1, NULL, JUMBO_6K),
[RTL_GIGA_MAC_VER_20] =
- _R("RTL8168c/8111c", RTL_TD_1, NULL, JUMBO_6K, false),
+ _R("RTL8168c/8111c", RTL_TD_1, NULL, JUMBO_6K),
[RTL_GIGA_MAC_VER_21] =
- _R("RTL8168c/8111c", RTL_TD_1, NULL, JUMBO_6K, false),
+ _R("RTL8168c/8111c", RTL_TD_1, NULL, JUMBO_6K),
[RTL_GIGA_MAC_VER_22] =
- _R("RTL8168c/8111c", RTL_TD_1, NULL, JUMBO_6K, false),
+ _R("RTL8168c/8111c", RTL_TD_1, NULL, JUMBO_6K),
[RTL_GIGA_MAC_VER_23] =
- _R("RTL8168cp/8111cp", RTL_TD_1, NULL, JUMBO_6K, false),
+ _R("RTL8168cp/8111cp", RTL_TD_1, NULL, JUMBO_6K),
[RTL_GIGA_MAC_VER_24] =
- _R("RTL8168cp/8111cp", RTL_TD_1, NULL, JUMBO_6K, false),
+ _R("RTL8168cp/8111cp", RTL_TD_1, NULL, JUMBO_6K),
[RTL_GIGA_MAC_VER_25] =
- _R("RTL8168d/8111d", RTL_TD_1, FIRMWARE_8168D_1,
- JUMBO_9K, false),
+ _R("RTL8168d/8111d", RTL_TD_1, FIRMWARE_8168D_1, JUMBO_9K),
[RTL_GIGA_MAC_VER_26] =
- _R("RTL8168d/8111d", RTL_TD_1, FIRMWARE_8168D_2,
- JUMBO_9K, false),
+ _R("RTL8168d/8111d", RTL_TD_1, FIRMWARE_8168D_2, JUMBO_9K),
[RTL_GIGA_MAC_VER_27] =
- _R("RTL8168dp/8111dp", RTL_TD_1, NULL, JUMBO_9K, false),
+ _R("RTL8168dp/8111dp", RTL_TD_1, NULL, JUMBO_9K),
[RTL_GIGA_MAC_VER_28] =
- _R("RTL8168dp/8111dp", RTL_TD_1, NULL, JUMBO_9K, false),
+ _R("RTL8168dp/8111dp", RTL_TD_1, NULL, JUMBO_9K),
[RTL_GIGA_MAC_VER_29] =
- _R("RTL8105e", RTL_TD_1, FIRMWARE_8105E_1,
- JUMBO_1K, true),
+ _R("RTL8105e", RTL_TD_1, FIRMWARE_8105E_1, JUMBO_1K),
[RTL_GIGA_MAC_VER_30] =
- _R("RTL8105e", RTL_TD_1, FIRMWARE_8105E_1,
- JUMBO_1K, true),
+ _R("RTL8105e", RTL_TD_1, FIRMWARE_8105E_1, JUMBO_1K),
[RTL_GIGA_MAC_VER_31] =
- _R("RTL8168dp/8111dp", RTL_TD_1, NULL, JUMBO_9K, false),
+ _R("RTL8168dp/8111dp", RTL_TD_1, NULL, JUMBO_9K),
[RTL_GIGA_MAC_VER_32] =
- _R("RTL8168e/8111e", RTL_TD_1, FIRMWARE_8168E_1,
- JUMBO_9K, false),
+ _R("RTL8168e/8111e", RTL_TD_1, FIRMWARE_8168E_1, JUMBO_9K),
[RTL_GIGA_MAC_VER_33] =
- _R("RTL8168e/8111e", RTL_TD_1, FIRMWARE_8168E_2,
- JUMBO_9K, false),
+ _R("RTL8168e/8111e", RTL_TD_1, FIRMWARE_8168E_2, JUMBO_9K),
[RTL_GIGA_MAC_VER_34] =
- _R("RTL8168evl/8111evl",RTL_TD_1, FIRMWARE_8168E_3,
- JUMBO_9K, false),
+ _R("RTL8168evl/8111evl",RTL_TD_1, FIRMWARE_8168E_3, JUMBO_9K),
[RTL_GIGA_MAC_VER_35] =
- _R("RTL8168f/8111f", RTL_TD_1, FIRMWARE_8168F_1,
- JUMBO_9K, false),
+ _R("RTL8168f/8111f", RTL_TD_1, FIRMWARE_8168F_1, JUMBO_9K),
[RTL_GIGA_MAC_VER_36] =
- _R("RTL8168f/8111f", RTL_TD_1, FIRMWARE_8168F_2,
- JUMBO_9K, false),
+ _R("RTL8168f/8111f", RTL_TD_1, FIRMWARE_8168F_2, JUMBO_9K),
[RTL_GIGA_MAC_VER_37] =
- _R("RTL8402", RTL_TD_1, FIRMWARE_8402_1,
- JUMBO_1K, true),
+ _R("RTL8402", RTL_TD_1, FIRMWARE_8402_1, JUMBO_1K),
[RTL_GIGA_MAC_VER_38] =
- _R("RTL8411", RTL_TD_1, FIRMWARE_8411_1,
- JUMBO_9K, false),
+ _R("RTL8411", RTL_TD_1, FIRMWARE_8411_1, JUMBO_9K),
[RTL_GIGA_MAC_VER_39] =
- _R("RTL8106e", RTL_TD_1, FIRMWARE_8106E_1,
- JUMBO_1K, true),
+ _R("RTL8106e", RTL_TD_1, FIRMWARE_8106E_1, JUMBO_1K),
[RTL_GIGA_MAC_VER_40] =
- _R("RTL8168g/8111g", RTL_TD_1, FIRMWARE_8168G_2,
- JUMBO_9K, false),
+ _R("RTL8168g/8111g", RTL_TD_1, FIRMWARE_8168G_2, JUMBO_9K),
[RTL_GIGA_MAC_VER_41] =
- _R("RTL8168g/8111g", RTL_TD_1, NULL, JUMBO_9K, false),
+ _R("RTL8168g/8111g", RTL_TD_1, NULL, JUMBO_9K),
[RTL_GIGA_MAC_VER_42] =
- _R("RTL8168g/8111g", RTL_TD_1, FIRMWARE_8168G_3,
- JUMBO_9K, false),
+ _R("RTL8168g/8111g", RTL_TD_1, FIRMWARE_8168G_3, JUMBO_9K),
[RTL_GIGA_MAC_VER_43] =
- _R("RTL8106e", RTL_TD_1, FIRMWARE_8106E_2,
- JUMBO_1K, true),
+ _R("RTL8106e", RTL_TD_1, FIRMWARE_8106E_2, JUMBO_1K),
[RTL_GIGA_MAC_VER_44] =
- _R("RTL8411", RTL_TD_1, FIRMWARE_8411_2,
- JUMBO_9K, false),
+ _R("RTL8411", RTL_TD_1, FIRMWARE_8411_2, JUMBO_9K),
[RTL_GIGA_MAC_VER_45] =
- _R("RTL8168h/8111h", RTL_TD_1, FIRMWARE_8168H_1,
- JUMBO_9K, false),
+ _R("RTL8168h/8111h", RTL_TD_1, FIRMWARE_8168H_1, JUMBO_9K),
[RTL_GIGA_MAC_VER_46] =
- _R("RTL8168h/8111h", RTL_TD_1, FIRMWARE_8168H_2,
- JUMBO_9K, false),
+ _R("RTL8168h/8111h", RTL_TD_1, FIRMWARE_8168H_2, JUMBO_9K),
[RTL_GIGA_MAC_VER_47] =
- _R("RTL8107e", RTL_TD_1, FIRMWARE_8107E_1,
- JUMBO_1K, false),
+ _R("RTL8107e", RTL_TD_1, FIRMWARE_8107E_1, JUMBO_1K),
[RTL_GIGA_MAC_VER_48] =
- _R("RTL8107e", RTL_TD_1, FIRMWARE_8107E_2,
- JUMBO_1K, false),
+ _R("RTL8107e", RTL_TD_1, FIRMWARE_8107E_2, JUMBO_1K),
[RTL_GIGA_MAC_VER_49] =
- _R("RTL8168ep/8111ep", RTL_TD_1, NULL,
- JUMBO_9K, false),
+ _R("RTL8168ep/8111ep", RTL_TD_1, NULL, JUMBO_9K),
[RTL_GIGA_MAC_VER_50] =
- _R("RTL8168ep/8111ep", RTL_TD_1, NULL,
- JUMBO_9K, false),
+ _R("RTL8168ep/8111ep", RTL_TD_1, NULL, JUMBO_9K),
[RTL_GIGA_MAC_VER_51] =
- _R("RTL8168ep/8111ep", RTL_TD_1, NULL,
- JUMBO_9K, false),
+ _R("RTL8168ep/8111ep", RTL_TD_1, NULL, JUMBO_9K),
};
#undef _R
@@ -345,7 +319,6 @@ static const struct pci_device_id rtl8169_pci_tbl[] = {
MODULE_DEVICE_TABLE(pci, rtl8169_pci_tbl);
-static int rx_buf_sz = 16383;
static int use_dac = -1;
static struct {
u32 msg_enable;
@@ -437,13 +410,8 @@ enum rtl8168_8101_registers {
CSIAR = 0x68,
#define CSIAR_FLAG 0x80000000
#define CSIAR_WRITE_CMD 0x80000000
-#define CSIAR_BYTE_ENABLE 0x0f
-#define CSIAR_BYTE_ENABLE_SHIFT 12
-#define CSIAR_ADDR_MASK 0x0fff
-#define CSIAR_FUNC_CARD 0x00000000
-#define CSIAR_FUNC_SDIO 0x00010000
-#define CSIAR_FUNC_NIC 0x00020000
-#define CSIAR_FUNC_NIC2 0x00010000
+#define CSIAR_BYTE_ENABLE 0x0000f000
+#define CSIAR_ADDR_MASK 0x00000fff
PMCH = 0x6f,
EPHYAR = 0x80,
#define EPHYAR_FLAG 0x80000000
@@ -626,6 +594,7 @@ enum rtl_register_content {
RxChkSum = (1 << 5),
PCIDAC = (1 << 4),
PCIMulRW = (1 << 3),
+#define INTT_MASK GENMASK(1, 0)
INTT_0 = 0x0000, // 8168
INTT_1 = 0x0001, // 8168
INTT_2 = 0x0002, // 8168
@@ -716,6 +685,7 @@ enum rtl_rx_desc_bit {
};
#define RsvdMask 0x3fffc000
+#define CPCMD_QUIRK_MASK (Normal_mode | RxVlan | RxChkSum | INTT_MASK)
struct TxDesc {
__le32 opts1;
@@ -778,7 +748,6 @@ struct rtl8169_private {
struct net_device *dev;
struct napi_struct napi;
u32 msg_enable;
- u16 txd_version;
u16 mac_version;
u32 cur_rx; /* Index into the Rx descriptor buffer of next Rx pkt. */
u32 cur_tx; /* Index into the Tx descriptor buffer of next Rx pkt. */
@@ -802,26 +771,16 @@ struct rtl8169_private {
int (*read)(struct rtl8169_private *, int);
} mdio_ops;
- struct pll_power_ops {
- void (*down)(struct rtl8169_private *);
- void (*up)(struct rtl8169_private *);
- } pll_power_ops;
-
struct jumbo_ops {
void (*enable)(struct rtl8169_private *);
void (*disable)(struct rtl8169_private *);
} jumbo_ops;
- struct csi_ops {
- void (*write)(struct rtl8169_private *, int, int);
- u32 (*read)(struct rtl8169_private *, int);
- } csi_ops;
-
int (*set_speed)(struct net_device *, u8 aneg, u16 sp, u8 dpx, u32 adv);
int (*get_link_ksettings)(struct net_device *,
struct ethtool_link_ksettings *);
void (*phy_reset_enable)(struct rtl8169_private *tp);
- void (*hw_start)(struct net_device *);
+ void (*hw_start)(struct rtl8169_private *tp);
unsigned int (*phy_reset_pending)(struct rtl8169_private *tp);
unsigned int (*link_ok)(struct rtl8169_private *tp);
int (*do_ioctl)(struct rtl8169_private *tp, struct mii_ioctl_data *data, int cmd);
@@ -833,14 +792,11 @@ struct rtl8169_private {
struct work_struct work;
} wk;
- unsigned features;
-
struct mii_if_info mii;
dma_addr_t counters_phys_addr;
struct rtl8169_counters *counters;
struct rtl8169_tc_offsets tc_offset;
u32 saved_wolopts;
- u32 opts1_mask;
struct rtl_fw {
const struct firmware *fw;
@@ -1645,23 +1601,8 @@ static u32 __rtl8169_get_wol(struct rtl8169_private *tp)
if (options & LinkUp)
wolopts |= WAKE_PHY;
switch (tp->mac_version) {
- case RTL_GIGA_MAC_VER_34:
- case RTL_GIGA_MAC_VER_35:
- case RTL_GIGA_MAC_VER_36:
- case RTL_GIGA_MAC_VER_37:
- case RTL_GIGA_MAC_VER_38:
- case RTL_GIGA_MAC_VER_40:
- case RTL_GIGA_MAC_VER_41:
- case RTL_GIGA_MAC_VER_42:
- case RTL_GIGA_MAC_VER_43:
- case RTL_GIGA_MAC_VER_44:
- case RTL_GIGA_MAC_VER_45:
- case RTL_GIGA_MAC_VER_46:
- case RTL_GIGA_MAC_VER_47:
- case RTL_GIGA_MAC_VER_48:
- case RTL_GIGA_MAC_VER_49:
- case RTL_GIGA_MAC_VER_50:
- case RTL_GIGA_MAC_VER_51:
+ case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_38:
+ case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
if (rtl_eri_read(tp, 0xdc, ERIAR_EXGMAC) & MagicPacket_v2)
wolopts |= WAKE_MAGIC;
break;
@@ -1722,23 +1663,8 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
switch (tp->mac_version) {
- case RTL_GIGA_MAC_VER_34:
- case RTL_GIGA_MAC_VER_35:
- case RTL_GIGA_MAC_VER_36:
- case RTL_GIGA_MAC_VER_37:
- case RTL_GIGA_MAC_VER_38:
- case RTL_GIGA_MAC_VER_40:
- case RTL_GIGA_MAC_VER_41:
- case RTL_GIGA_MAC_VER_42:
- case RTL_GIGA_MAC_VER_43:
- case RTL_GIGA_MAC_VER_44:
- case RTL_GIGA_MAC_VER_45:
- case RTL_GIGA_MAC_VER_46:
- case RTL_GIGA_MAC_VER_47:
- case RTL_GIGA_MAC_VER_48:
- case RTL_GIGA_MAC_VER_49:
- case RTL_GIGA_MAC_VER_50:
- case RTL_GIGA_MAC_VER_51:
+ case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_38:
+ case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
tmp = ARRAY_SIZE(cfg) - 1;
if (wolopts & WAKE_MAGIC)
rtl_w0w1_eri(tp,
@@ -1960,18 +1886,20 @@ static netdev_features_t rtl8169_fix_features(struct net_device *dev,
features &= ~NETIF_F_ALL_TSO;
if (dev->mtu > JUMBO_1K &&
- !rtl_chip_infos[tp->mac_version].jumbo_tx_csum)
+ tp->mac_version > RTL_GIGA_MAC_VER_06)
features &= ~NETIF_F_IP_CSUM;
return features;
}
-static void __rtl8169_set_features(struct net_device *dev,
- netdev_features_t features)
+static int rtl8169_set_features(struct net_device *dev,
+ netdev_features_t features)
{
struct rtl8169_private *tp = netdev_priv(dev);
u32 rx_config;
+ rtl_lock_work(tp);
+
rx_config = RTL_R32(tp, RxConfig);
if (features & NETIF_F_RXALL)
rx_config |= (AcceptErr | AcceptRunt);
@@ -1990,28 +1918,14 @@ static void __rtl8169_set_features(struct net_device *dev,
else
tp->cp_cmd &= ~RxVlan;
- tp->cp_cmd |= RTL_R16(tp, CPlusCmd) & ~(RxVlan | RxChkSum);
-
RTL_W16(tp, CPlusCmd, tp->cp_cmd);
RTL_R16(tp, CPlusCmd);
-}
-static int rtl8169_set_features(struct net_device *dev,
- netdev_features_t features)
-{
- struct rtl8169_private *tp = netdev_priv(dev);
-
- features &= NETIF_F_RXALL | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_RX;
-
- rtl_lock_work(tp);
- if (features ^ dev->features)
- __rtl8169_set_features(dev, features);
rtl_unlock_work(tp);
return 0;
}
-
static inline u32 rtl8169_tx_vlan_tag(struct sk_buff *skb)
{
return (skb_vlan_tag_present(skb)) ?
@@ -2155,9 +2069,8 @@ DECLARE_RTL_COND(rtl_counters_cond)
return RTL_R32(tp, CounterAddrLow) & (CounterReset | CounterDump);
}
-static bool rtl8169_do_counters(struct net_device *dev, u32 counter_cmd)
+static bool rtl8169_do_counters(struct rtl8169_private *tp, u32 counter_cmd)
{
- struct rtl8169_private *tp = netdev_priv(dev);
dma_addr_t paddr = tp->counters_phys_addr;
u32 cmd;
@@ -2170,10 +2083,8 @@ static bool rtl8169_do_counters(struct net_device *dev, u32 counter_cmd)
return rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000);
}
-static bool rtl8169_reset_counters(struct net_device *dev)
+static bool rtl8169_reset_counters(struct rtl8169_private *tp)
{
- struct rtl8169_private *tp = netdev_priv(dev);
-
/*
* Versions prior to RTL_GIGA_MAC_VER_19 don't support resetting the
* tally counters.
@@ -2181,13 +2092,11 @@ static bool rtl8169_reset_counters(struct net_device *dev)
if (tp->mac_version < RTL_GIGA_MAC_VER_19)
return true;
- return rtl8169_do_counters(dev, CounterReset);
+ return rtl8169_do_counters(tp, CounterReset);
}
-static bool rtl8169_update_counters(struct net_device *dev)
+static bool rtl8169_update_counters(struct rtl8169_private *tp)
{
- struct rtl8169_private *tp = netdev_priv(dev);
-
/*
* Some chips are unable to dump tally counters when the receiver
* is disabled.
@@ -2195,12 +2104,11 @@ static bool rtl8169_update_counters(struct net_device *dev)
if ((RTL_R8(tp, ChipCmd) & CmdRxEnb) == 0)
return true;
- return rtl8169_do_counters(dev, CounterDump);
+ return rtl8169_do_counters(tp, CounterDump);
}
-static bool rtl8169_init_counter_offsets(struct net_device *dev)
+static bool rtl8169_init_counter_offsets(struct rtl8169_private *tp)
{
- struct rtl8169_private *tp = netdev_priv(dev);
struct rtl8169_counters *counters = tp->counters;
bool ret = false;
@@ -2223,10 +2131,10 @@ static bool rtl8169_init_counter_offsets(struct net_device *dev)
return true;
/* If both, reset and update fail, propagate to caller. */
- if (rtl8169_reset_counters(dev))
+ if (rtl8169_reset_counters(tp))
ret = true;
- if (rtl8169_update_counters(dev))
+ if (rtl8169_update_counters(tp))
ret = true;
tp->tc_offset.tx_errors = counters->tx_errors;
@@ -2249,7 +2157,7 @@ static void rtl8169_get_ethtool_stats(struct net_device *dev,
pm_runtime_get_noresume(d);
if (pm_runtime_active(d))
- rtl8169_update_counters(dev);
+ rtl8169_update_counters(tp);
pm_runtime_put_noidle(d);
@@ -2391,7 +2299,7 @@ static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
if (IS_ERR(ci))
return PTR_ERR(ci);
- scale = &ci->scalev[RTL_R16(tp, CPlusCmd) & 3];
+ scale = &ci->scalev[tp->cp_cmd & INTT_MASK];
/* read IntrMitigate and adjust according to scale */
for (w = RTL_R16(tp, IntrMitigate); w; w >>= RTL_COALESCE_SHIFT, p++) {
@@ -2490,7 +2398,7 @@ static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
RTL_W16(tp, IntrMitigate, swab16(w));
- tp->cp_cmd = (tp->cp_cmd & ~3) | cp01;
+ tp->cp_cmd = (tp->cp_cmd & ~INTT_MASK) | cp01;
RTL_W16(tp, CPlusCmd, tp->cp_cmd);
RTL_R16(tp, CPlusCmd);
@@ -2520,7 +2428,7 @@ static const struct ethtool_ops rtl8169_ethtool_ops = {
};
static void rtl8169_get_mac_version(struct rtl8169_private *tp,
- struct net_device *dev, u8 default_version)
+ u8 default_version)
{
/*
* The driver currently handles the 8168Bf and the 8168Be identically
@@ -2560,12 +2468,10 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp,
/* 8168E family. */
{ 0x7c800000, 0x2c800000, RTL_GIGA_MAC_VER_34 },
- { 0x7cf00000, 0x2c200000, RTL_GIGA_MAC_VER_33 },
{ 0x7cf00000, 0x2c100000, RTL_GIGA_MAC_VER_32 },
{ 0x7c800000, 0x2c000000, RTL_GIGA_MAC_VER_33 },
/* 8168D family. */
- { 0x7cf00000, 0x28300000, RTL_GIGA_MAC_VER_26 },
{ 0x7cf00000, 0x28100000, RTL_GIGA_MAC_VER_25 },
{ 0x7c800000, 0x28000000, RTL_GIGA_MAC_VER_26 },
@@ -2575,32 +2481,24 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp,
{ 0x7cf00000, 0x28b00000, RTL_GIGA_MAC_VER_31 },
/* 8168C family. */
- { 0x7cf00000, 0x3cb00000, RTL_GIGA_MAC_VER_24 },
{ 0x7cf00000, 0x3c900000, RTL_GIGA_MAC_VER_23 },
{ 0x7cf00000, 0x3c800000, RTL_GIGA_MAC_VER_18 },
{ 0x7c800000, 0x3c800000, RTL_GIGA_MAC_VER_24 },
{ 0x7cf00000, 0x3c000000, RTL_GIGA_MAC_VER_19 },
{ 0x7cf00000, 0x3c200000, RTL_GIGA_MAC_VER_20 },
{ 0x7cf00000, 0x3c300000, RTL_GIGA_MAC_VER_21 },
- { 0x7cf00000, 0x3c400000, RTL_GIGA_MAC_VER_22 },
{ 0x7c800000, 0x3c000000, RTL_GIGA_MAC_VER_22 },
/* 8168B family. */
{ 0x7cf00000, 0x38000000, RTL_GIGA_MAC_VER_12 },
- { 0x7cf00000, 0x38500000, RTL_GIGA_MAC_VER_17 },
{ 0x7c800000, 0x38000000, RTL_GIGA_MAC_VER_17 },
{ 0x7c800000, 0x30000000, RTL_GIGA_MAC_VER_11 },
/* 8101 family. */
- { 0x7cf00000, 0x44900000, RTL_GIGA_MAC_VER_39 },
{ 0x7c800000, 0x44800000, RTL_GIGA_MAC_VER_39 },
{ 0x7c800000, 0x44000000, RTL_GIGA_MAC_VER_37 },
- { 0x7cf00000, 0x40b00000, RTL_GIGA_MAC_VER_30 },
- { 0x7cf00000, 0x40a00000, RTL_GIGA_MAC_VER_30 },
{ 0x7cf00000, 0x40900000, RTL_GIGA_MAC_VER_29 },
{ 0x7c800000, 0x40800000, RTL_GIGA_MAC_VER_30 },
- { 0x7cf00000, 0x34a00000, RTL_GIGA_MAC_VER_09 },
- { 0x7cf00000, 0x24a00000, RTL_GIGA_MAC_VER_09 },
{ 0x7cf00000, 0x34900000, RTL_GIGA_MAC_VER_08 },
{ 0x7cf00000, 0x24900000, RTL_GIGA_MAC_VER_08 },
{ 0x7cf00000, 0x34800000, RTL_GIGA_MAC_VER_07 },
@@ -2635,8 +2533,8 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp,
tp->mac_version = p->mac_version;
if (tp->mac_version == RTL_GIGA_MAC_NONE) {
- netif_notice(tp, probe, dev,
- "unknown MAC, using family default\n");
+ dev_notice(tp_to_dev(tp),
+ "unknown MAC, using family default\n");
tp->mac_version = default_version;
} else if (tp->mac_version == RTL_GIGA_MAC_VER_42) {
tp->mac_version = tp->mii.supports_gmii ?
@@ -4685,18 +4583,7 @@ static void rtl_init_mdio_ops(struct rtl8169_private *tp)
ops->write = r8168dp_2_mdio_write;
ops->read = r8168dp_2_mdio_read;
break;
- case RTL_GIGA_MAC_VER_40:
- case RTL_GIGA_MAC_VER_41:
- case RTL_GIGA_MAC_VER_42:
- case RTL_GIGA_MAC_VER_43:
- case RTL_GIGA_MAC_VER_44:
- case RTL_GIGA_MAC_VER_45:
- case RTL_GIGA_MAC_VER_46:
- case RTL_GIGA_MAC_VER_47:
- case RTL_GIGA_MAC_VER_48:
- case RTL_GIGA_MAC_VER_49:
- case RTL_GIGA_MAC_VER_50:
- case RTL_GIGA_MAC_VER_51:
+ case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
ops->write = r8168g_mdio_write;
ops->read = r8168g_mdio_read;
break;
@@ -4741,21 +4628,7 @@ static void rtl_wol_suspend_quirk(struct rtl8169_private *tp)
case RTL_GIGA_MAC_VER_32:
case RTL_GIGA_MAC_VER_33:
case RTL_GIGA_MAC_VER_34:
- case RTL_GIGA_MAC_VER_37:
- case RTL_GIGA_MAC_VER_38:
- case RTL_GIGA_MAC_VER_39:
- case RTL_GIGA_MAC_VER_40:
- case RTL_GIGA_MAC_VER_41:
- case RTL_GIGA_MAC_VER_42:
- case RTL_GIGA_MAC_VER_43:
- case RTL_GIGA_MAC_VER_44:
- case RTL_GIGA_MAC_VER_45:
- case RTL_GIGA_MAC_VER_46:
- case RTL_GIGA_MAC_VER_47:
- case RTL_GIGA_MAC_VER_48:
- case RTL_GIGA_MAC_VER_49:
- case RTL_GIGA_MAC_VER_50:
- case RTL_GIGA_MAC_VER_51:
+ case RTL_GIGA_MAC_VER_37 ... RTL_GIGA_MAC_VER_51:
RTL_W32(tp, RxConfig, RTL_R32(tp, RxConfig) |
AcceptBroadcast | AcceptMulticast | AcceptMyPhys);
break;
@@ -4775,79 +4648,13 @@ static bool rtl_wol_pll_power_down(struct rtl8169_private *tp)
return true;
}
-static void r810x_phy_power_down(struct rtl8169_private *tp)
-{
- rtl_writephy(tp, 0x1f, 0x0000);
- rtl_writephy(tp, MII_BMCR, BMCR_PDOWN);
-}
-
-static void r810x_phy_power_up(struct rtl8169_private *tp)
-{
- rtl_writephy(tp, 0x1f, 0x0000);
- rtl_writephy(tp, MII_BMCR, BMCR_ANENABLE);
-}
-
-static void r810x_pll_power_down(struct rtl8169_private *tp)
-{
- if (rtl_wol_pll_power_down(tp))
- return;
-
- r810x_phy_power_down(tp);
-
- switch (tp->mac_version) {
- case RTL_GIGA_MAC_VER_07:
- case RTL_GIGA_MAC_VER_08:
- case RTL_GIGA_MAC_VER_09:
- case RTL_GIGA_MAC_VER_10:
- case RTL_GIGA_MAC_VER_13:
- case RTL_GIGA_MAC_VER_16:
- break;
- default:
- RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80);
- break;
- }
-}
-
-static void r810x_pll_power_up(struct rtl8169_private *tp)
-{
- r810x_phy_power_up(tp);
-
- switch (tp->mac_version) {
- case RTL_GIGA_MAC_VER_07:
- case RTL_GIGA_MAC_VER_08:
- case RTL_GIGA_MAC_VER_09:
- case RTL_GIGA_MAC_VER_10:
- case RTL_GIGA_MAC_VER_13:
- case RTL_GIGA_MAC_VER_16:
- break;
- case RTL_GIGA_MAC_VER_47:
- case RTL_GIGA_MAC_VER_48:
- RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0);
- break;
- default:
- RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0x80);
- break;
- }
-}
-
static void r8168_phy_power_up(struct rtl8169_private *tp)
{
rtl_writephy(tp, 0x1f, 0x0000);
switch (tp->mac_version) {
case RTL_GIGA_MAC_VER_11:
case RTL_GIGA_MAC_VER_12:
- case RTL_GIGA_MAC_VER_17:
- case RTL_GIGA_MAC_VER_18:
- case RTL_GIGA_MAC_VER_19:
- case RTL_GIGA_MAC_VER_20:
- case RTL_GIGA_MAC_VER_21:
- case RTL_GIGA_MAC_VER_22:
- case RTL_GIGA_MAC_VER_23:
- case RTL_GIGA_MAC_VER_24:
- case RTL_GIGA_MAC_VER_25:
- case RTL_GIGA_MAC_VER_26:
- case RTL_GIGA_MAC_VER_27:
- case RTL_GIGA_MAC_VER_28:
+ case RTL_GIGA_MAC_VER_17 ... RTL_GIGA_MAC_VER_28:
case RTL_GIGA_MAC_VER_31:
rtl_writephy(tp, 0x0e, 0x0000);
break;
@@ -4870,18 +4677,7 @@ static void r8168_phy_power_down(struct rtl8169_private *tp)
case RTL_GIGA_MAC_VER_11:
case RTL_GIGA_MAC_VER_12:
- case RTL_GIGA_MAC_VER_17:
- case RTL_GIGA_MAC_VER_18:
- case RTL_GIGA_MAC_VER_19:
- case RTL_GIGA_MAC_VER_20:
- case RTL_GIGA_MAC_VER_21:
- case RTL_GIGA_MAC_VER_22:
- case RTL_GIGA_MAC_VER_23:
- case RTL_GIGA_MAC_VER_24:
- case RTL_GIGA_MAC_VER_25:
- case RTL_GIGA_MAC_VER_26:
- case RTL_GIGA_MAC_VER_27:
- case RTL_GIGA_MAC_VER_28:
+ case RTL_GIGA_MAC_VER_17 ... RTL_GIGA_MAC_VER_28:
case RTL_GIGA_MAC_VER_31:
rtl_writephy(tp, 0x0e, 0x0200);
default:
@@ -4895,12 +4691,6 @@ static void r8168_pll_power_down(struct rtl8169_private *tp)
if (r8168_check_dash(tp))
return;
- if ((tp->mac_version == RTL_GIGA_MAC_VER_23 ||
- tp->mac_version == RTL_GIGA_MAC_VER_24) &&
- (RTL_R16(tp, CPlusCmd) & ASF)) {
- return;
- }
-
if (tp->mac_version == RTL_GIGA_MAC_VER_32 ||
tp->mac_version == RTL_GIGA_MAC_VER_33)
rtl_ephy_write(tp, 0x19, 0xff64);
@@ -4911,16 +4701,15 @@ static void r8168_pll_power_down(struct rtl8169_private *tp)
r8168_phy_power_down(tp);
switch (tp->mac_version) {
- case RTL_GIGA_MAC_VER_25:
- case RTL_GIGA_MAC_VER_26:
- case RTL_GIGA_MAC_VER_27:
- case RTL_GIGA_MAC_VER_28:
- case RTL_GIGA_MAC_VER_31:
- case RTL_GIGA_MAC_VER_32:
- case RTL_GIGA_MAC_VER_33:
+ case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_33:
+ case RTL_GIGA_MAC_VER_37:
+ case RTL_GIGA_MAC_VER_39:
+ case RTL_GIGA_MAC_VER_43:
case RTL_GIGA_MAC_VER_44:
case RTL_GIGA_MAC_VER_45:
case RTL_GIGA_MAC_VER_46:
+ case RTL_GIGA_MAC_VER_47:
+ case RTL_GIGA_MAC_VER_48:
case RTL_GIGA_MAC_VER_50:
case RTL_GIGA_MAC_VER_51:
RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80);
@@ -4938,18 +4727,17 @@ static void r8168_pll_power_down(struct rtl8169_private *tp)
static void r8168_pll_power_up(struct rtl8169_private *tp)
{
switch (tp->mac_version) {
- case RTL_GIGA_MAC_VER_25:
- case RTL_GIGA_MAC_VER_26:
- case RTL_GIGA_MAC_VER_27:
- case RTL_GIGA_MAC_VER_28:
- case RTL_GIGA_MAC_VER_31:
- case RTL_GIGA_MAC_VER_32:
- case RTL_GIGA_MAC_VER_33:
+ case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_33:
+ case RTL_GIGA_MAC_VER_37:
+ case RTL_GIGA_MAC_VER_39:
+ case RTL_GIGA_MAC_VER_43:
RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0x80);
break;
case RTL_GIGA_MAC_VER_44:
case RTL_GIGA_MAC_VER_45:
case RTL_GIGA_MAC_VER_46:
+ case RTL_GIGA_MAC_VER_47:
+ case RTL_GIGA_MAC_VER_48:
case RTL_GIGA_MAC_VER_50:
case RTL_GIGA_MAC_VER_51:
RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0);
@@ -4966,127 +4754,41 @@ static void r8168_pll_power_up(struct rtl8169_private *tp)
r8168_phy_power_up(tp);
}
-static void rtl_generic_op(struct rtl8169_private *tp,
- void (*op)(struct rtl8169_private *))
-{
- if (op)
- op(tp);
-}
-
static void rtl_pll_power_down(struct rtl8169_private *tp)
{
- rtl_generic_op(tp, tp->pll_power_ops.down);
+ switch (tp->mac_version) {
+ case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06:
+ case RTL_GIGA_MAC_VER_13 ... RTL_GIGA_MAC_VER_15:
+ break;
+ default:
+ r8168_pll_power_down(tp);
+ }
}
static void rtl_pll_power_up(struct rtl8169_private *tp)
{
- rtl_generic_op(tp, tp->pll_power_ops.up);
-}
-
-static void rtl_init_pll_power_ops(struct rtl8169_private *tp)
-{
- struct pll_power_ops *ops = &tp->pll_power_ops;
-
switch (tp->mac_version) {
- case RTL_GIGA_MAC_VER_07:
- case RTL_GIGA_MAC_VER_08:
- case RTL_GIGA_MAC_VER_09:
- case RTL_GIGA_MAC_VER_10:
- case RTL_GIGA_MAC_VER_16:
- case RTL_GIGA_MAC_VER_29:
- case RTL_GIGA_MAC_VER_30:
- case RTL_GIGA_MAC_VER_37:
- case RTL_GIGA_MAC_VER_39:
- case RTL_GIGA_MAC_VER_43:
- case RTL_GIGA_MAC_VER_47:
- case RTL_GIGA_MAC_VER_48:
- ops->down = r810x_pll_power_down;
- ops->up = r810x_pll_power_up;
- break;
-
- case RTL_GIGA_MAC_VER_11:
- case RTL_GIGA_MAC_VER_12:
- case RTL_GIGA_MAC_VER_17:
- case RTL_GIGA_MAC_VER_18:
- case RTL_GIGA_MAC_VER_19:
- case RTL_GIGA_MAC_VER_20:
- case RTL_GIGA_MAC_VER_21:
- case RTL_GIGA_MAC_VER_22:
- case RTL_GIGA_MAC_VER_23:
- case RTL_GIGA_MAC_VER_24:
- case RTL_GIGA_MAC_VER_25:
- case RTL_GIGA_MAC_VER_26:
- case RTL_GIGA_MAC_VER_27:
- case RTL_GIGA_MAC_VER_28:
- case RTL_GIGA_MAC_VER_31:
- case RTL_GIGA_MAC_VER_32:
- case RTL_GIGA_MAC_VER_33:
- case RTL_GIGA_MAC_VER_34:
- case RTL_GIGA_MAC_VER_35:
- case RTL_GIGA_MAC_VER_36:
- case RTL_GIGA_MAC_VER_38:
- case RTL_GIGA_MAC_VER_40:
- case RTL_GIGA_MAC_VER_41:
- case RTL_GIGA_MAC_VER_42:
- case RTL_GIGA_MAC_VER_44:
- case RTL_GIGA_MAC_VER_45:
- case RTL_GIGA_MAC_VER_46:
- case RTL_GIGA_MAC_VER_49:
- case RTL_GIGA_MAC_VER_50:
- case RTL_GIGA_MAC_VER_51:
- ops->down = r8168_pll_power_down;
- ops->up = r8168_pll_power_up;
+ case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06:
+ case RTL_GIGA_MAC_VER_13 ... RTL_GIGA_MAC_VER_15:
break;
-
default:
- ops->down = NULL;
- ops->up = NULL;
- break;
+ r8168_pll_power_up(tp);
}
}
static void rtl_init_rxcfg(struct rtl8169_private *tp)
{
switch (tp->mac_version) {
- case RTL_GIGA_MAC_VER_01:
- case RTL_GIGA_MAC_VER_02:
- case RTL_GIGA_MAC_VER_03:
- case RTL_GIGA_MAC_VER_04:
- case RTL_GIGA_MAC_VER_05:
- case RTL_GIGA_MAC_VER_06:
- case RTL_GIGA_MAC_VER_10:
- case RTL_GIGA_MAC_VER_11:
- case RTL_GIGA_MAC_VER_12:
- case RTL_GIGA_MAC_VER_13:
- case RTL_GIGA_MAC_VER_14:
- case RTL_GIGA_MAC_VER_15:
- case RTL_GIGA_MAC_VER_16:
- case RTL_GIGA_MAC_VER_17:
+ case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06:
+ case RTL_GIGA_MAC_VER_10 ... RTL_GIGA_MAC_VER_17:
RTL_W32(tp, RxConfig, RX_FIFO_THRESH | RX_DMA_BURST);
break;
- case RTL_GIGA_MAC_VER_18:
- case RTL_GIGA_MAC_VER_19:
- case RTL_GIGA_MAC_VER_20:
- case RTL_GIGA_MAC_VER_21:
- case RTL_GIGA_MAC_VER_22:
- case RTL_GIGA_MAC_VER_23:
- case RTL_GIGA_MAC_VER_24:
+ case RTL_GIGA_MAC_VER_18 ... RTL_GIGA_MAC_VER_24:
case RTL_GIGA_MAC_VER_34:
case RTL_GIGA_MAC_VER_35:
RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST);
break;
- case RTL_GIGA_MAC_VER_40:
- case RTL_GIGA_MAC_VER_41:
- case RTL_GIGA_MAC_VER_42:
- case RTL_GIGA_MAC_VER_43:
- case RTL_GIGA_MAC_VER_44:
- case RTL_GIGA_MAC_VER_45:
- case RTL_GIGA_MAC_VER_46:
- case RTL_GIGA_MAC_VER_47:
- case RTL_GIGA_MAC_VER_48:
- case RTL_GIGA_MAC_VER_49:
- case RTL_GIGA_MAC_VER_50:
- case RTL_GIGA_MAC_VER_51:
+ case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF);
break;
default:
@@ -5102,16 +4804,20 @@ static void rtl8169_init_ring_indexes(struct rtl8169_private *tp)
static void rtl_hw_jumbo_enable(struct rtl8169_private *tp)
{
- RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
- rtl_generic_op(tp, tp->jumbo_ops.enable);
- RTL_W8(tp, Cfg9346, Cfg9346_Lock);
+ if (tp->jumbo_ops.enable) {
+ RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
+ tp->jumbo_ops.enable(tp);
+ RTL_W8(tp, Cfg9346, Cfg9346_Lock);
+ }
}
static void rtl_hw_jumbo_disable(struct rtl8169_private *tp)
{
- RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
- rtl_generic_op(tp, tp->jumbo_ops.disable);
- RTL_W8(tp, Cfg9346, Cfg9346_Lock);
+ if (tp->jumbo_ops.disable) {
+ RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
+ tp->jumbo_ops.disable(tp);
+ RTL_W8(tp, Cfg9346, Cfg9346_Lock);
+ }
}
static void r8168c_hw_jumbo_enable(struct rtl8169_private *tp)
@@ -5125,7 +4831,7 @@ static void r8168c_hw_jumbo_disable(struct rtl8169_private *tp)
{
RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0);
RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~Jumbo_En1);
- rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+ rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
}
static void r8168dp_hw_jumbo_enable(struct rtl8169_private *tp)
@@ -5151,7 +4857,7 @@ static void r8168e_hw_jumbo_disable(struct rtl8169_private *tp)
RTL_W8(tp, MaxTxPacketSize, 0x0c);
RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0);
RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~0x01);
- rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+ rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
}
static void r8168b_0_hw_jumbo_enable(struct rtl8169_private *tp)
@@ -5163,7 +4869,7 @@ static void r8168b_0_hw_jumbo_enable(struct rtl8169_private *tp)
static void r8168b_0_hw_jumbo_disable(struct rtl8169_private *tp)
{
rtl_tx_performance_tweak(tp,
- (0x5 << MAX_READ_REQUEST_SHIFT) | PCI_EXP_DEVCTL_NOSNOOP_EN);
+ PCI_EXP_DEVCTL_READRQ_4096B | PCI_EXP_DEVCTL_NOSNOOP_EN);
}
static void r8168b_1_hw_jumbo_enable(struct rtl8169_private *tp)
@@ -5223,18 +4929,7 @@ static void rtl_init_jumbo_ops(struct rtl8169_private *tp)
* No action needed for jumbo frames with 8169.
* No jumbo for 810x at all.
*/
- case RTL_GIGA_MAC_VER_40:
- case RTL_GIGA_MAC_VER_41:
- case RTL_GIGA_MAC_VER_42:
- case RTL_GIGA_MAC_VER_43:
- case RTL_GIGA_MAC_VER_44:
- case RTL_GIGA_MAC_VER_45:
- case RTL_GIGA_MAC_VER_46:
- case RTL_GIGA_MAC_VER_47:
- case RTL_GIGA_MAC_VER_48:
- case RTL_GIGA_MAC_VER_49:
- case RTL_GIGA_MAC_VER_50:
- case RTL_GIGA_MAC_VER_51:
+ case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
default:
ops->disable = NULL;
ops->enable = NULL;
@@ -5320,32 +5015,21 @@ static void rtl8169_hw_reset(struct rtl8169_private *tp)
rtl_rx_close(tp);
- if (tp->mac_version == RTL_GIGA_MAC_VER_27 ||
- tp->mac_version == RTL_GIGA_MAC_VER_28 ||
- tp->mac_version == RTL_GIGA_MAC_VER_31) {
+ switch (tp->mac_version) {
+ case RTL_GIGA_MAC_VER_27:
+ case RTL_GIGA_MAC_VER_28:
+ case RTL_GIGA_MAC_VER_31:
rtl_udelay_loop_wait_low(tp, &rtl_npq_cond, 20, 42*42);
- } else if (tp->mac_version == RTL_GIGA_MAC_VER_34 ||
- tp->mac_version == RTL_GIGA_MAC_VER_35 ||
- tp->mac_version == RTL_GIGA_MAC_VER_36 ||
- tp->mac_version == RTL_GIGA_MAC_VER_37 ||
- tp->mac_version == RTL_GIGA_MAC_VER_38 ||
- tp->mac_version == RTL_GIGA_MAC_VER_40 ||
- tp->mac_version == RTL_GIGA_MAC_VER_41 ||
- tp->mac_version == RTL_GIGA_MAC_VER_42 ||
- tp->mac_version == RTL_GIGA_MAC_VER_43 ||
- tp->mac_version == RTL_GIGA_MAC_VER_44 ||
- tp->mac_version == RTL_GIGA_MAC_VER_45 ||
- tp->mac_version == RTL_GIGA_MAC_VER_46 ||
- tp->mac_version == RTL_GIGA_MAC_VER_47 ||
- tp->mac_version == RTL_GIGA_MAC_VER_48 ||
- tp->mac_version == RTL_GIGA_MAC_VER_49 ||
- tp->mac_version == RTL_GIGA_MAC_VER_50 ||
- tp->mac_version == RTL_GIGA_MAC_VER_51) {
+ break;
+ case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_38:
+ case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq);
rtl_udelay_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 666);
- } else {
+ break;
+ default:
RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq);
udelay(100);
+ break;
}
rtl_hw_reset(tp);
@@ -5358,13 +5042,10 @@ static void rtl_set_rx_tx_config_registers(struct rtl8169_private *tp)
(InterFrameGap << TxInterFrameGapShift));
}
-static void rtl_hw_start(struct net_device *dev)
+static void rtl_set_rx_max_size(struct rtl8169_private *tp)
{
- struct rtl8169_private *tp = netdev_priv(dev);
-
- tp->hw_start(dev);
-
- rtl_irq_enable_all(tp);
+ /* Low hurts. Let's disable the filtering. */
+ RTL_W16(tp, RxMaxSize, R8169_RX_BUF_SIZE + 1);
}
static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp)
@@ -5380,19 +5061,23 @@ static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp)
RTL_W32(tp, RxDescAddrLow, ((u64) tp->RxPhyAddr) & DMA_BIT_MASK(32));
}
-static u16 rtl_rw_cpluscmd(struct rtl8169_private *tp)
+static void rtl_hw_start(struct rtl8169_private *tp)
{
- u16 cmd;
+ RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
- cmd = RTL_R16(tp, CPlusCmd);
- RTL_W16(tp, CPlusCmd, cmd);
- return cmd;
-}
+ tp->hw_start(tp);
-static void rtl_set_rx_max_size(struct rtl8169_private *tp, unsigned int rx_buf_sz)
-{
- /* Low hurts. Let's disable the filtering. */
- RTL_W16(tp, RxMaxSize, rx_buf_sz + 1);
+ rtl_set_rx_max_size(tp);
+ rtl_set_rx_tx_desc_registers(tp);
+ rtl_set_rx_tx_config_registers(tp);
+ RTL_W8(tp, Cfg9346, Cfg9346_Lock);
+
+ /* Initially a 10 us delay. Turned it into a PCI commit. - FR */
+ RTL_R8(tp, IntrMask);
+ RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
+ /* no early-rx interrupts */
+ RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
+ rtl_irq_enable_all(tp);
}
static void rtl8169_set_magic_reg(struct rtl8169_private *tp, unsigned mac_version)
@@ -5472,36 +5157,14 @@ static void rtl_set_rx_mode(struct net_device *dev)
RTL_W32(tp, RxConfig, tmp);
}
-static void rtl_hw_start_8169(struct net_device *dev)
+static void rtl_hw_start_8169(struct rtl8169_private *tp)
{
- struct rtl8169_private *tp = netdev_priv(dev);
- struct pci_dev *pdev = tp->pci_dev;
-
- if (tp->mac_version == RTL_GIGA_MAC_VER_05) {
- RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) | PCIMulRW);
- pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE, 0x08);
- }
-
- RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
- if (tp->mac_version == RTL_GIGA_MAC_VER_01 ||
- tp->mac_version == RTL_GIGA_MAC_VER_02 ||
- tp->mac_version == RTL_GIGA_MAC_VER_03 ||
- tp->mac_version == RTL_GIGA_MAC_VER_04)
- RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
-
- rtl_init_rxcfg(tp);
+ if (tp->mac_version == RTL_GIGA_MAC_VER_05)
+ pci_write_config_byte(tp->pci_dev, PCI_CACHE_LINE_SIZE, 0x08);
RTL_W8(tp, EarlyTxThres, NoEarlyTx);
- rtl_set_rx_max_size(tp, rx_buf_sz);
-
- if (tp->mac_version == RTL_GIGA_MAC_VER_01 ||
- tp->mac_version == RTL_GIGA_MAC_VER_02 ||
- tp->mac_version == RTL_GIGA_MAC_VER_03 ||
- tp->mac_version == RTL_GIGA_MAC_VER_04)
- rtl_set_rx_tx_config_registers(tp);
-
- tp->cp_cmd |= rtl_rw_cpluscmd(tp) | PCIMulRW;
+ tp->cp_cmd |= PCIMulRW;
if (tp->mac_version == RTL_GIGA_MAC_VER_02 ||
tp->mac_version == RTL_GIGA_MAC_VER_03) {
@@ -5520,56 +5183,7 @@ static void rtl_hw_start_8169(struct net_device *dev)
*/
RTL_W16(tp, IntrMitigate, 0x0000);
- rtl_set_rx_tx_desc_registers(tp);
-
- if (tp->mac_version != RTL_GIGA_MAC_VER_01 &&
- tp->mac_version != RTL_GIGA_MAC_VER_02 &&
- tp->mac_version != RTL_GIGA_MAC_VER_03 &&
- tp->mac_version != RTL_GIGA_MAC_VER_04) {
- RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
- rtl_set_rx_tx_config_registers(tp);
- }
-
- RTL_W8(tp, Cfg9346, Cfg9346_Lock);
-
- /* Initially a 10 us delay. Turned it into a PCI commit. - FR */
- RTL_R8(tp, IntrMask);
-
RTL_W32(tp, RxMissed, 0);
-
- rtl_set_rx_mode(dev);
-
- /* no early-rx interrupts */
- RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
-}
-
-static void rtl_csi_write(struct rtl8169_private *tp, int addr, int value)
-{
- if (tp->csi_ops.write)
- tp->csi_ops.write(tp, addr, value);
-}
-
-static u32 rtl_csi_read(struct rtl8169_private *tp, int addr)
-{
- return tp->csi_ops.read ? tp->csi_ops.read(tp, addr) : ~0;
-}
-
-static void rtl_csi_access_enable(struct rtl8169_private *tp, u32 bits)
-{
- u32 csi;
-
- csi = rtl_csi_read(tp, 0x070c) & 0x00ffffff;
- rtl_csi_write(tp, 0x070c, csi | bits);
-}
-
-static void rtl_csi_access_enable_1(struct rtl8169_private *tp)
-{
- rtl_csi_access_enable(tp, 0x17000000);
-}
-
-static void rtl_csi_access_enable_2(struct rtl8169_private *tp)
-{
- rtl_csi_access_enable(tp, 0x27000000);
}
DECLARE_RTL_COND(rtl_csiar_cond)
@@ -5577,101 +5191,55 @@ DECLARE_RTL_COND(rtl_csiar_cond)
return RTL_R32(tp, CSIAR) & CSIAR_FLAG;
}
-static void r8169_csi_write(struct rtl8169_private *tp, int addr, int value)
-{
- RTL_W32(tp, CSIDR, value);
- RTL_W32(tp, CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
- CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
-
- rtl_udelay_loop_wait_low(tp, &rtl_csiar_cond, 10, 100);
-}
-
-static u32 r8169_csi_read(struct rtl8169_private *tp, int addr)
+static void rtl_csi_write(struct rtl8169_private *tp, int addr, int value)
{
- RTL_W32(tp, CSIAR, (addr & CSIAR_ADDR_MASK) |
- CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
+ u32 func = PCI_FUNC(tp->pci_dev->devfn);
- return rtl_udelay_loop_wait_high(tp, &rtl_csiar_cond, 10, 100) ?
- RTL_R32(tp, CSIDR) : ~0;
-}
-
-static void r8402_csi_write(struct rtl8169_private *tp, int addr, int value)
-{
RTL_W32(tp, CSIDR, value);
RTL_W32(tp, CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
- CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT |
- CSIAR_FUNC_NIC);
+ CSIAR_BYTE_ENABLE | func << 16);
rtl_udelay_loop_wait_low(tp, &rtl_csiar_cond, 10, 100);
}
-static u32 r8402_csi_read(struct rtl8169_private *tp, int addr)
+static u32 rtl_csi_read(struct rtl8169_private *tp, int addr)
{
- RTL_W32(tp, CSIAR, (addr & CSIAR_ADDR_MASK) | CSIAR_FUNC_NIC |
- CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
+ u32 func = PCI_FUNC(tp->pci_dev->devfn);
+
+ RTL_W32(tp, CSIAR, (addr & CSIAR_ADDR_MASK) | func << 16 |
+ CSIAR_BYTE_ENABLE);
return rtl_udelay_loop_wait_high(tp, &rtl_csiar_cond, 10, 100) ?
RTL_R32(tp, CSIDR) : ~0;
}
-static void r8411_csi_write(struct rtl8169_private *tp, int addr, int value)
+static void rtl_csi_access_enable(struct rtl8169_private *tp, u8 val)
{
- RTL_W32(tp, CSIDR, value);
- RTL_W32(tp, CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
- CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT |
- CSIAR_FUNC_NIC2);
+ struct pci_dev *pdev = tp->pci_dev;
+ u32 csi;
- rtl_udelay_loop_wait_low(tp, &rtl_csiar_cond, 10, 100);
+ /* According to Realtek the value at config space address 0x070f
+ * controls the L0s/L1 entrance latency. We try standard ECAM access
+ * first and if it fails fall back to CSI.
+ */
+ if (pdev->cfg_size > 0x070f &&
+ pci_write_config_byte(pdev, 0x070f, val) == PCIBIOS_SUCCESSFUL)
+ return;
+
+ netdev_notice_once(tp->dev,
+ "No native access to PCI extended config space, falling back to CSI\n");
+ csi = rtl_csi_read(tp, 0x070c) & 0x00ffffff;
+ rtl_csi_write(tp, 0x070c, csi | val << 24);
}
-static u32 r8411_csi_read(struct rtl8169_private *tp, int addr)
+static void rtl_csi_access_enable_1(struct rtl8169_private *tp)
{
- RTL_W32(tp, CSIAR, (addr & CSIAR_ADDR_MASK) | CSIAR_FUNC_NIC2 |
- CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
-
- return rtl_udelay_loop_wait_high(tp, &rtl_csiar_cond, 10, 100) ?
- RTL_R32(tp, CSIDR) : ~0;
+ rtl_csi_access_enable(tp, 0x17);
}
-static void rtl_init_csi_ops(struct rtl8169_private *tp)
+static void rtl_csi_access_enable_2(struct rtl8169_private *tp)
{
- struct csi_ops *ops = &tp->csi_ops;
-
- switch (tp->mac_version) {
- case RTL_GIGA_MAC_VER_01:
- case RTL_GIGA_MAC_VER_02:
- case RTL_GIGA_MAC_VER_03:
- case RTL_GIGA_MAC_VER_04:
- case RTL_GIGA_MAC_VER_05:
- case RTL_GIGA_MAC_VER_06:
- case RTL_GIGA_MAC_VER_10:
- case RTL_GIGA_MAC_VER_11:
- case RTL_GIGA_MAC_VER_12:
- case RTL_GIGA_MAC_VER_13:
- case RTL_GIGA_MAC_VER_14:
- case RTL_GIGA_MAC_VER_15:
- case RTL_GIGA_MAC_VER_16:
- case RTL_GIGA_MAC_VER_17:
- ops->write = NULL;
- ops->read = NULL;
- break;
-
- case RTL_GIGA_MAC_VER_37:
- case RTL_GIGA_MAC_VER_38:
- ops->write = r8402_csi_write;
- ops->read = r8402_csi_read;
- break;
-
- case RTL_GIGA_MAC_VER_44:
- ops->write = r8411_csi_write;
- ops->read = r8411_csi_read;
- break;
-
- default:
- ops->write = r8169_csi_write;
- ops->read = r8169_csi_read;
- break;
- }
+ rtl_csi_access_enable(tp, 0x27);
}
struct ephy_info {
@@ -5718,25 +5286,15 @@ static void rtl_pcie_state_l2l3_enable(struct rtl8169_private *tp, bool enable)
RTL_W8(tp, Config3, data);
}
-#define R8168_CPCMD_QUIRK_MASK (\
- EnableBist | \
- Mac_dbgo_oe | \
- Force_half_dup | \
- Force_rxflow_en | \
- Force_txflow_en | \
- Cxpl_dbg_sel | \
- ASF | \
- PktCntrDisable | \
- Mac_dbgo_sel)
-
static void rtl_hw_start_8168bb(struct rtl8169_private *tp)
{
RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
- RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+ tp->cp_cmd &= CPCMD_QUIRK_MASK;
+ RTL_W16(tp, CPlusCmd, tp->cp_cmd);
if (tp->dev->mtu <= ETH_DATA_LEN) {
- rtl_tx_performance_tweak(tp, (0x5 << MAX_READ_REQUEST_SHIFT) |
+ rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B |
PCI_EXP_DEVCTL_NOSNOOP_EN);
}
}
@@ -5757,11 +5315,12 @@ static void __rtl_hw_start_8168cp(struct rtl8169_private *tp)
RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
if (tp->dev->mtu <= ETH_DATA_LEN)
- rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+ rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
rtl_disable_clock_request(tp);
- RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+ tp->cp_cmd &= CPCMD_QUIRK_MASK;
+ RTL_W16(tp, CPlusCmd, tp->cp_cmd);
}
static void rtl_hw_start_8168cp_1(struct rtl8169_private *tp)
@@ -5788,9 +5347,10 @@ static void rtl_hw_start_8168cp_2(struct rtl8169_private *tp)
RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
if (tp->dev->mtu <= ETH_DATA_LEN)
- rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+ rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
- RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+ tp->cp_cmd &= CPCMD_QUIRK_MASK;
+ RTL_W16(tp, CPlusCmd, tp->cp_cmd);
}
static void rtl_hw_start_8168cp_3(struct rtl8169_private *tp)
@@ -5805,9 +5365,10 @@ static void rtl_hw_start_8168cp_3(struct rtl8169_private *tp)
RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
if (tp->dev->mtu <= ETH_DATA_LEN)
- rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+ rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
- RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+ tp->cp_cmd &= CPCMD_QUIRK_MASK;
+ RTL_W16(tp, CPlusCmd, tp->cp_cmd);
}
static void rtl_hw_start_8168c_1(struct rtl8169_private *tp)
@@ -5862,9 +5423,10 @@ static void rtl_hw_start_8168d(struct rtl8169_private *tp)
RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
if (tp->dev->mtu <= ETH_DATA_LEN)
- rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+ rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
- RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+ tp->cp_cmd &= CPCMD_QUIRK_MASK;
+ RTL_W16(tp, CPlusCmd, tp->cp_cmd);
}
static void rtl_hw_start_8168dp(struct rtl8169_private *tp)
@@ -5872,7 +5434,7 @@ static void rtl_hw_start_8168dp(struct rtl8169_private *tp)
rtl_csi_access_enable_1(tp);
if (tp->dev->mtu <= ETH_DATA_LEN)
- rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+ rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
@@ -5889,7 +5451,7 @@ static void rtl_hw_start_8168d_4(struct rtl8169_private *tp)
rtl_csi_access_enable_1(tp);
- rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+ rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
@@ -5921,7 +5483,7 @@ static void rtl_hw_start_8168e_1(struct rtl8169_private *tp)
rtl_ephy_init(tp, e_info_8168e_1, ARRAY_SIZE(e_info_8168e_1));
if (tp->dev->mtu <= ETH_DATA_LEN)
- rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+ rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
@@ -5946,7 +5508,7 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
rtl_ephy_init(tp, e_info_8168e_2, ARRAY_SIZE(e_info_8168e_2));
if (tp->dev->mtu <= ETH_DATA_LEN)
- rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+ rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
@@ -5976,7 +5538,7 @@ static void rtl_hw_start_8168f(struct rtl8169_private *tp)
{
rtl_csi_access_enable_2(tp);
- rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+ rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
@@ -6047,7 +5609,7 @@ static void rtl_hw_start_8168g(struct rtl8169_private *tp)
rtl_csi_access_enable_1(tp);
- rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+ rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x00, 0x01, ERIAR_EXGMAC);
rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC);
@@ -6147,7 +5709,7 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
rtl_csi_access_enable_1(tp);
- rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+ rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x00, 0x01, ERIAR_EXGMAC);
rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC);
@@ -6229,7 +5791,7 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
rtl_csi_access_enable_1(tp);
- rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+ rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x00, 0x01, ERIAR_EXGMAC);
rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC);
@@ -6325,18 +5887,12 @@ static void rtl_hw_start_8168ep_3(struct rtl8169_private *tp)
r8168_mac_ocp_write(tp, 0xe860, data);
}
-static void rtl_hw_start_8168(struct net_device *dev)
+static void rtl_hw_start_8168(struct rtl8169_private *tp)
{
- struct rtl8169_private *tp = netdev_priv(dev);
-
- RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
-
RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
- rtl_set_rx_max_size(tp, rx_buf_sz);
-
- tp->cp_cmd |= RTL_R16(tp, CPlusCmd) | PktCntrDisable | INTT_1;
-
+ tp->cp_cmd &= ~INTT_MASK;
+ tp->cp_cmd |= PktCntrDisable | INTT_1;
RTL_W16(tp, CPlusCmd, tp->cp_cmd);
RTL_W16(tp, IntrMitigate, 0x5151);
@@ -6347,12 +5903,6 @@ static void rtl_hw_start_8168(struct net_device *dev)
tp->event_slow &= ~RxOverflow;
}
- rtl_set_rx_tx_desc_registers(tp);
-
- rtl_set_rx_tx_config_registers(tp);
-
- RTL_R8(tp, IntrMask);
-
switch (tp->mac_version) {
case RTL_GIGA_MAC_VER_11:
rtl_hw_start_8168bb(tp);
@@ -6453,30 +6003,11 @@ static void rtl_hw_start_8168(struct net_device *dev)
default:
printk(KERN_ERR PFX "%s: unknown chipset (mac_version = %d).\n",
- dev->name, tp->mac_version);
+ tp->dev->name, tp->mac_version);
break;
}
-
- RTL_W8(tp, Cfg9346, Cfg9346_Lock);
-
- RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
-
- rtl_set_rx_mode(dev);
-
- RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
}
-#define R810X_CPCMD_QUIRK_MASK (\
- EnableBist | \
- Mac_dbgo_oe | \
- Force_half_dup | \
- Force_rxflow_en | \
- Force_txflow_en | \
- Cxpl_dbg_sel | \
- ASF | \
- PktCntrDisable | \
- Mac_dbgo_sel)
-
static void rtl_hw_start_8102e_1(struct rtl8169_private *tp)
{
static const struct ephy_info e_info_8102e_1[] = {
@@ -6495,7 +6026,7 @@ static void rtl_hw_start_8102e_1(struct rtl8169_private *tp)
RTL_W8(tp, DBG_REG, FIX_NAK_1);
- rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+ rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
RTL_W8(tp, Config1,
LEDS1 | LEDS0 | Speed_down | MEMMAP | IOMAP | VPD | PMEnable);
@@ -6512,7 +6043,7 @@ static void rtl_hw_start_8102e_2(struct rtl8169_private *tp)
{
rtl_csi_access_enable_2(tp);
- rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+ rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
RTL_W8(tp, Config1, MEMMAP | IOMAP | VPD | PMEnable);
RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
@@ -6575,7 +6106,7 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp)
rtl_ephy_init(tp, e_info_8402, ARRAY_SIZE(e_info_8402));
- rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
+ rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
rtl_eri_write(tp, 0xc8, ERIAR_MASK_1111, 0x00000002, ERIAR_EXGMAC);
rtl_eri_write(tp, 0xe8, ERIAR_MASK_1111, 0x00000006, ERIAR_EXGMAC);
@@ -6600,32 +6131,21 @@ static void rtl_hw_start_8106(struct rtl8169_private *tp)
rtl_pcie_state_l2l3_enable(tp, false);
}
-static void rtl_hw_start_8101(struct net_device *dev)
+static void rtl_hw_start_8101(struct rtl8169_private *tp)
{
- struct rtl8169_private *tp = netdev_priv(dev);
- struct pci_dev *pdev = tp->pci_dev;
-
if (tp->mac_version >= RTL_GIGA_MAC_VER_30)
tp->event_slow &= ~RxFIFOOver;
if (tp->mac_version == RTL_GIGA_MAC_VER_13 ||
tp->mac_version == RTL_GIGA_MAC_VER_16)
- pcie_capability_set_word(pdev, PCI_EXP_DEVCTL,
+ pcie_capability_set_word(tp->pci_dev, PCI_EXP_DEVCTL,
PCI_EXP_DEVCTL_NOSNOOP_EN);
- RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
-
RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
- rtl_set_rx_max_size(tp, rx_buf_sz);
-
- tp->cp_cmd &= ~R810X_CPCMD_QUIRK_MASK;
+ tp->cp_cmd &= CPCMD_QUIRK_MASK;
RTL_W16(tp, CPlusCmd, tp->cp_cmd);
- rtl_set_rx_tx_desc_registers(tp);
-
- rtl_set_rx_tx_config_registers(tp);
-
switch (tp->mac_version) {
case RTL_GIGA_MAC_VER_07:
rtl_hw_start_8102e_1(tp);
@@ -6662,17 +6182,7 @@ static void rtl_hw_start_8101(struct net_device *dev)
break;
}
- RTL_W8(tp, Cfg9346, Cfg9346_Lock);
-
RTL_W16(tp, IntrMitigate, 0x0000);
-
- RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
-
- rtl_set_rx_mode(dev);
-
- RTL_R8(tp, IntrMask);
-
- RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
}
static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
@@ -6699,29 +6209,22 @@ static inline void rtl8169_make_unusable_by_asic(struct RxDesc *desc)
static void rtl8169_free_rx_databuff(struct rtl8169_private *tp,
void **data_buff, struct RxDesc *desc)
{
- dma_unmap_single(tp_to_dev(tp), le64_to_cpu(desc->addr), rx_buf_sz,
- DMA_FROM_DEVICE);
+ dma_unmap_single(tp_to_dev(tp), le64_to_cpu(desc->addr),
+ R8169_RX_BUF_SIZE, DMA_FROM_DEVICE);
kfree(*data_buff);
*data_buff = NULL;
rtl8169_make_unusable_by_asic(desc);
}
-static inline void rtl8169_mark_to_asic(struct RxDesc *desc, u32 rx_buf_sz)
+static inline void rtl8169_mark_to_asic(struct RxDesc *desc)
{
u32 eor = le32_to_cpu(desc->opts1) & RingEnd;
/* Force memory writes to complete before releasing descriptor */
dma_wmb();
- desc->opts1 = cpu_to_le32(DescOwn | eor | rx_buf_sz);
-}
-
-static inline void rtl8169_map_to_asic(struct RxDesc *desc, dma_addr_t mapping,
- u32 rx_buf_sz)
-{
- desc->addr = cpu_to_le64(mapping);
- rtl8169_mark_to_asic(desc, rx_buf_sz);
+ desc->opts1 = cpu_to_le32(DescOwn | eor | R8169_RX_BUF_SIZE);
}
static inline void *rtl8169_align(void *data)
@@ -6735,21 +6238,20 @@ static struct sk_buff *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
void *data;
dma_addr_t mapping;
struct device *d = tp_to_dev(tp);
- struct net_device *dev = tp->dev;
- int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
+ int node = dev_to_node(d);
- data = kmalloc_node(rx_buf_sz, GFP_KERNEL, node);
+ data = kmalloc_node(R8169_RX_BUF_SIZE, GFP_KERNEL, node);
if (!data)
return NULL;
if (rtl8169_align(data) != data) {
kfree(data);
- data = kmalloc_node(rx_buf_sz + 15, GFP_KERNEL, node);
+ data = kmalloc_node(R8169_RX_BUF_SIZE + 15, GFP_KERNEL, node);
if (!data)
return NULL;
}
- mapping = dma_map_single(d, rtl8169_align(data), rx_buf_sz,
+ mapping = dma_map_single(d, rtl8169_align(data), R8169_RX_BUF_SIZE,
DMA_FROM_DEVICE);
if (unlikely(dma_mapping_error(d, mapping))) {
if (net_ratelimit())
@@ -6757,7 +6259,8 @@ static struct sk_buff *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
goto err_out;
}
- rtl8169_map_to_asic(desc, mapping, rx_buf_sz);
+ desc->addr = cpu_to_le64(mapping);
+ rtl8169_mark_to_asic(desc);
return data;
err_out:
@@ -6789,9 +6292,6 @@ static int rtl8169_rx_fill(struct rtl8169_private *tp)
for (i = 0; i < NUM_RX_DESC; i++) {
void *data;
- if (tp->Rx_databuff[i])
- continue;
-
data = rtl8169_alloc_rx_data(tp, tp->RxDescArray + i);
if (!data) {
rtl8169_make_unusable_by_asic(tp->RxDescArray + i);
@@ -6808,14 +6308,12 @@ err_out:
return -ENOMEM;
}
-static int rtl8169_init_ring(struct net_device *dev)
+static int rtl8169_init_ring(struct rtl8169_private *tp)
{
- struct rtl8169_private *tp = netdev_priv(dev);
-
rtl8169_init_ring_indexes(tp);
- memset(tp->tx_skb, 0x0, NUM_TX_DESC * sizeof(struct ring_info));
- memset(tp->Rx_databuff, 0x0, NUM_RX_DESC * sizeof(void *));
+ memset(tp->tx_skb, 0, sizeof(tp->tx_skb));
+ memset(tp->Rx_databuff, 0, sizeof(tp->Rx_databuff));
return rtl8169_rx_fill(tp);
}
@@ -6874,13 +6372,13 @@ static void rtl_reset_work(struct rtl8169_private *tp)
rtl8169_hw_reset(tp);
for (i = 0; i < NUM_RX_DESC; i++)
- rtl8169_mark_to_asic(tp->RxDescArray + i, rx_buf_sz);
+ rtl8169_mark_to_asic(tp->RxDescArray + i);
rtl8169_tx_clear(tp);
rtl8169_init_ring_indexes(tp);
napi_enable(&tp->napi);
- rtl_hw_start(dev);
+ rtl_hw_start(tp);
netif_wake_queue(dev);
rtl8169_check_link_status(dev, tp);
}
@@ -7012,18 +6510,6 @@ static int msdn_giant_send_check(struct sk_buff *skb)
return ret;
}
-static inline __be16 get_protocol(struct sk_buff *skb)
-{
- __be16 protocol;
-
- if (skb->protocol == htons(ETH_P_8021Q))
- protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
- else
- protocol = skb->protocol;
-
- return protocol;
-}
-
static bool rtl8169_tso_csum_v1(struct rtl8169_private *tp,
struct sk_buff *skb, u32 *opts)
{
@@ -7060,7 +6546,7 @@ static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp,
return false;
}
- switch (get_protocol(skb)) {
+ switch (vlan_get_protocol(skb)) {
case htons(ETH_P_IP):
opts[0] |= TD1_GTSENV4;
break;
@@ -7092,7 +6578,7 @@ static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp,
return false;
}
- switch (get_protocol(skb)) {
+ switch (vlan_get_protocol(skb)) {
case htons(ETH_P_IP):
opts[1] |= TD1_IPv4_CS;
ip_protocol = ip_hdr(skb)->protocol;
@@ -7362,7 +6848,7 @@ static struct sk_buff *rtl8169_try_rx_copy(void *data,
prefetch(data);
skb = napi_alloc_skb(&tp->napi, pkt_size);
if (skb)
- memcpy(skb->data, data, pkt_size);
+ skb_copy_to_linear_data(skb, data, pkt_size);
dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE);
return skb;
@@ -7380,7 +6866,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget
struct RxDesc *desc = tp->RxDescArray + entry;
u32 status;
- status = le32_to_cpu(desc->opts1) & tp->opts1_mask;
+ status = le32_to_cpu(desc->opts1);
if (status & DescOwn)
break;
@@ -7398,14 +6884,16 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget
dev->stats.rx_length_errors++;
if (status & RxCRC)
dev->stats.rx_crc_errors++;
- if (status & RxFOVF) {
+ /* RxFOVF is a reserved bit on later chip versions */
+ if (tp->mac_version == RTL_GIGA_MAC_VER_01 &&
+ status & RxFOVF) {
rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING);
dev->stats.rx_fifo_errors++;
- }
- if ((status & (RxRUNT | RxCRC)) &&
- !(status & (RxRWT | RxFOVF)) &&
- (dev->features & NETIF_F_RXALL))
+ } else if (status & (RxRUNT | RxCRC) &&
+ !(status & RxRWT) &&
+ dev->features & NETIF_F_RXALL) {
goto process_pkt;
+ }
} else {
struct sk_buff *skb;
dma_addr_t addr;
@@ -7454,7 +6942,7 @@ process_pkt:
}
release_descriptor:
desc->opts2 = 0;
- rtl8169_mark_to_asic(desc, rx_buf_sz);
+ rtl8169_mark_to_asic(desc);
}
count = cur_rx - tp->cur_rx;
@@ -7465,8 +6953,7 @@ release_descriptor:
static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
{
- struct net_device *dev = dev_instance;
- struct rtl8169_private *tp = netdev_priv(dev);
+ struct rtl8169_private *tp = dev_instance;
int handled = 0;
u16 status;
@@ -7477,7 +6964,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
handled = 1;
rtl_irq_disable(tp);
- napi_schedule(&tp->napi);
+ napi_schedule_irqoff(&tp->napi);
}
}
return IRQ_RETVAL(handled);
@@ -7628,7 +7115,7 @@ static int rtl8169_close(struct net_device *dev)
pm_runtime_get_sync(&pdev->dev);
/* Update counters before going down */
- rtl8169_update_counters(dev);
+ rtl8169_update_counters(tp);
rtl_lock_work(tp);
clear_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags);
@@ -7638,7 +7125,7 @@ static int rtl8169_close(struct net_device *dev)
cancel_work_sync(&tp->wk.work);
- pci_free_irq(pdev, 0, dev);
+ pci_free_irq(pdev, 0, tp);
dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray,
tp->RxPhyAddr);
@@ -7683,7 +7170,7 @@ static int rtl_open(struct net_device *dev)
if (!tp->RxDescArray)
goto err_free_tx_0;
- retval = rtl8169_init_ring(dev);
+ retval = rtl8169_init_ring(tp);
if (retval < 0)
goto err_free_rx_1;
@@ -7693,7 +7180,7 @@ static int rtl_open(struct net_device *dev)
rtl_request_firmware(tp);
- retval = pci_request_irq(pdev, 0, rtl8169_interrupt, NULL, dev,
+ retval = pci_request_irq(pdev, 0, rtl8169_interrupt, NULL, tp,
dev->name);
if (retval < 0)
goto err_release_fw_2;
@@ -7706,13 +7193,11 @@ static int rtl_open(struct net_device *dev)
rtl8169_init_phy(dev, tp);
- __rtl8169_set_features(dev, dev->features);
-
rtl_pll_power_up(tp);
- rtl_hw_start(dev);
+ rtl_hw_start(tp);
- if (!rtl8169_init_counter_offsets(dev))
+ if (!rtl8169_init_counter_offsets(tp))
netif_warn(tp, hw, dev, "counter reset/update failed\n");
netif_start_queue(dev);
@@ -7781,7 +7266,7 @@ rtl8169_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
* from tally counters.
*/
if (pm_runtime_active(&pdev->dev))
- rtl8169_update_counters(dev);
+ rtl8169_update_counters(tp);
/*
* Subtract values fetched during initalization.
@@ -7877,7 +7362,7 @@ static int rtl8169_runtime_suspend(struct device *device)
/* Update counters before going runtime suspend */
rtl8169_rx_missed(dev);
- rtl8169_update_counters(dev);
+ rtl8169_update_counters(tp);
return 0;
}
@@ -8017,9 +7502,7 @@ static const struct net_device_ops rtl_netdev_ops = {
};
static const struct rtl_cfg_info {
- void (*hw_start)(struct net_device *);
- unsigned int region;
- unsigned int align;
+ void (*hw_start)(struct rtl8169_private *tp);
u16 event_slow;
unsigned int has_gmii:1;
const struct rtl_coalesce_info *coalesce_info;
@@ -8027,8 +7510,6 @@ static const struct rtl_cfg_info {
} rtl_cfg_infos [] = {
[RTL_CFG_0] = {
.hw_start = rtl_hw_start_8169,
- .region = 1,
- .align = 0,
.event_slow = SYSErr | LinkChg | RxOverflow | RxFIFOOver,
.has_gmii = 1,
.coalesce_info = rtl_coalesce_info_8169,
@@ -8036,8 +7517,6 @@ static const struct rtl_cfg_info {
},
[RTL_CFG_1] = {
.hw_start = rtl_hw_start_8168,
- .region = 2,
- .align = 8,
.event_slow = SYSErr | LinkChg | RxOverflow,
.has_gmii = 1,
.coalesce_info = rtl_coalesce_info_8168_8136,
@@ -8045,8 +7524,6 @@ static const struct rtl_cfg_info {
},
[RTL_CFG_2] = {
.hw_start = rtl_hw_start_8101,
- .region = 2,
- .align = 8,
.event_slow = SYSErr | LinkChg | RxOverflow | RxFIFOOver |
PCSTimeout,
.coalesce_info = rtl_coalesce_info_8168_8136,
@@ -8122,20 +7599,10 @@ static void rtl_hw_init_8168ep(struct rtl8169_private *tp)
static void rtl_hw_initialize(struct rtl8169_private *tp)
{
switch (tp->mac_version) {
- case RTL_GIGA_MAC_VER_40:
- case RTL_GIGA_MAC_VER_41:
- case RTL_GIGA_MAC_VER_42:
- case RTL_GIGA_MAC_VER_43:
- case RTL_GIGA_MAC_VER_44:
- case RTL_GIGA_MAC_VER_45:
- case RTL_GIGA_MAC_VER_46:
- case RTL_GIGA_MAC_VER_47:
- case RTL_GIGA_MAC_VER_48:
+ case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_48:
rtl_hw_init_8168g(tp);
break;
- case RTL_GIGA_MAC_VER_49:
- case RTL_GIGA_MAC_VER_50:
- case RTL_GIGA_MAC_VER_51:
+ case RTL_GIGA_MAC_VER_49 ... RTL_GIGA_MAC_VER_51:
rtl_hw_init_8168ep(tp);
break;
default:
@@ -8146,11 +7613,10 @@ static void rtl_hw_initialize(struct rtl8169_private *tp)
static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
const struct rtl_cfg_info *cfg = rtl_cfg_infos + ent->driver_data;
- const unsigned int region = cfg->region;
struct rtl8169_private *tp;
struct mii_if_info *mii;
struct net_device *dev;
- int chipset, i;
+ int chipset, region, i;
int rc;
if (netif_msg_drv(&debug)) {
@@ -8185,43 +7651,41 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* enable device (incl. PCI PM wakeup and hotplug setup) */
rc = pcim_enable_device(pdev);
if (rc < 0) {
- netif_err(tp, probe, dev, "enable failure\n");
+ dev_err(&pdev->dev, "enable failure\n");
return rc;
}
if (pcim_set_mwi(pdev) < 0)
- netif_info(tp, probe, dev, "Mem-Wr-Inval unavailable\n");
+ dev_info(&pdev->dev, "Mem-Wr-Inval unavailable\n");
- /* make sure PCI base addr 1 is MMIO */
- if (!(pci_resource_flags(pdev, region) & IORESOURCE_MEM)) {
- netif_err(tp, probe, dev,
- "region #%d not an MMIO resource, aborting\n",
- region);
+ /* use first MMIO region */
+ region = ffs(pci_select_bars(pdev, IORESOURCE_MEM)) - 1;
+ if (region < 0) {
+ dev_err(&pdev->dev, "no MMIO resource found\n");
return -ENODEV;
}
/* check for weird/broken PCI region reporting */
if (pci_resource_len(pdev, region) < R8169_REGS_SIZE) {
- netif_err(tp, probe, dev,
- "Invalid PCI region size(s), aborting\n");
+ dev_err(&pdev->dev, "Invalid PCI region size(s), aborting\n");
return -ENODEV;
}
rc = pcim_iomap_regions(pdev, BIT(region), MODULENAME);
if (rc < 0) {
- netif_err(tp, probe, dev, "cannot remap MMIO, aborting\n");
+ dev_err(&pdev->dev, "cannot remap MMIO, aborting\n");
return rc;
}
tp->mmio_addr = pcim_iomap_table(pdev)[region];
if (!pci_is_pcie(pdev))
- netif_info(tp, probe, dev, "not PCI Express\n");
+ dev_info(&pdev->dev, "not PCI Express\n");
/* Identify chip attached to board */
- rtl8169_get_mac_version(tp, dev, cfg->default_ver);
+ rtl8169_get_mac_version(tp, cfg->default_ver);
- tp->cp_cmd = 0;
+ tp->cp_cmd = RTL_R16(tp, CPlusCmd);
if ((sizeof(dma_addr_t) > 4) &&
(use_dac == 1 || (use_dac == -1 && pci_is_pcie(pdev) &&
@@ -8236,7 +7700,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
} else {
rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
if (rc < 0) {
- netif_err(tp, probe, dev, "DMA configuration failed\n");
+ dev_err(&pdev->dev, "DMA configuration failed\n");
return rc;
}
}
@@ -8254,18 +7718,15 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
pci_set_master(pdev);
rtl_init_mdio_ops(tp);
- rtl_init_pll_power_ops(tp);
rtl_init_jumbo_ops(tp);
- rtl_init_csi_ops(tp);
rtl8169_print_mac_version(tp);
chipset = tp->mac_version;
- tp->txd_version = rtl_chip_infos[chipset].txd_version;
rc = rtl_alloc_irq(tp);
if (rc < 0) {
- netif_err(tp, probe, dev, "Can't allocate interrupt\n");
+ dev_err(&pdev->dev, "Can't allocate interrupt\n");
return rc;
}
@@ -8293,29 +7754,18 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
u64_stats_init(&tp->tx_stats.syncp);
/* Get MAC address */
- if (tp->mac_version == RTL_GIGA_MAC_VER_35 ||
- tp->mac_version == RTL_GIGA_MAC_VER_36 ||
- tp->mac_version == RTL_GIGA_MAC_VER_37 ||
- tp->mac_version == RTL_GIGA_MAC_VER_38 ||
- tp->mac_version == RTL_GIGA_MAC_VER_40 ||
- tp->mac_version == RTL_GIGA_MAC_VER_41 ||
- tp->mac_version == RTL_GIGA_MAC_VER_42 ||
- tp->mac_version == RTL_GIGA_MAC_VER_43 ||
- tp->mac_version == RTL_GIGA_MAC_VER_44 ||
- tp->mac_version == RTL_GIGA_MAC_VER_45 ||
- tp->mac_version == RTL_GIGA_MAC_VER_46 ||
- tp->mac_version == RTL_GIGA_MAC_VER_47 ||
- tp->mac_version == RTL_GIGA_MAC_VER_48 ||
- tp->mac_version == RTL_GIGA_MAC_VER_49 ||
- tp->mac_version == RTL_GIGA_MAC_VER_50 ||
- tp->mac_version == RTL_GIGA_MAC_VER_51) {
- u16 mac_addr[3];
-
+ switch (tp->mac_version) {
+ u8 mac_addr[ETH_ALEN] __aligned(4);
+ case RTL_GIGA_MAC_VER_35 ... RTL_GIGA_MAC_VER_38:
+ case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51:
*(u32 *)&mac_addr[0] = rtl_eri_read(tp, 0xe0, ERIAR_EXGMAC);
- *(u16 *)&mac_addr[2] = rtl_eri_read(tp, 0xe4, ERIAR_EXGMAC);
+ *(u16 *)&mac_addr[4] = rtl_eri_read(tp, 0xe4, ERIAR_EXGMAC);
- if (is_valid_ether_addr((u8 *)mac_addr))
- rtl_rar_set(tp, (u8 *)mac_addr);
+ if (is_valid_ether_addr(mac_addr))
+ rtl_rar_set(tp, mac_addr);
+ break;
+ default:
+ break;
}
for (i = 0; i < ETH_ALEN; i++)
dev->dev_addr[i] = RTL_R8(tp, MAC0 + i);
@@ -8323,7 +7773,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
dev->ethtool_ops = &rtl8169_ethtool_ops;
dev->watchdog_timeo = RTL8169_TX_TIMEOUT;
- netif_napi_add(dev, &tp->napi, rtl8169_poll, R8169_NAPI_WEIGHT);
+ netif_napi_add(dev, &tp->napi, rtl8169_poll, NAPI_POLL_WEIGHT);
/* don't enable SG, IP_CSUM and TSO by default - it might not work
* properly for all devices */
@@ -8346,13 +7796,17 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* Disallow toggling */
dev->hw_features &= ~NETIF_F_HW_VLAN_CTAG_RX;
- if (tp->txd_version == RTL_TD_0)
+ switch (rtl_chip_infos[chipset].txd_version) {
+ case RTL_TD_0:
tp->tso_csum = rtl8169_tso_csum_v1;
- else if (tp->txd_version == RTL_TD_1) {
+ break;
+ case RTL_TD_1:
tp->tso_csum = rtl8169_tso_csum_v2;
dev->hw_features |= NETIF_F_IPV6_CSUM | NETIF_F_TSO6;
- } else
+ break;
+ default:
WARN_ON_ONCE(1);
+ }
dev->hw_features |= NETIF_F_RXALL;
dev->hw_features |= NETIF_F_RXFCS;
@@ -8365,9 +7819,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
tp->event_slow = cfg->event_slow;
tp->coalesce_info = cfg->coalesce_info;
- tp->opts1_mask = (tp->mac_version != RTL_GIGA_MAC_VER_01) ?
- ~(RxBOVF | RxFOVF) : ~0;
-
timer_setup(&tp->timer, rtl8169_phy_timer, 0);
tp->rtl_fw = RTL_FIRMWARE_UNKNOWN;
@@ -8384,15 +7835,15 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (rc < 0)
return rc;
- netif_info(tp, probe, dev, "%s at 0x%p, %pM, XID %08x IRQ %d\n",
- rtl_chip_infos[chipset].name, tp->mmio_addr, dev->dev_addr,
- (u32)(RTL_R32(tp, TxConfig) & 0x9cf0f8ff),
+ netif_info(tp, probe, dev, "%s, %pM, XID %08x, IRQ %d\n",
+ rtl_chip_infos[chipset].name, dev->dev_addr,
+ (u32)(RTL_R32(tp, TxConfig) & 0xfcf0f8ff),
pci_irq_vector(pdev, 0));
if (rtl_chip_infos[chipset].jumbo_max != JUMBO_1K) {
netif_info(tp, probe, dev, "jumbo features [frames: %d bytes, "
"tx checksumming: %s]\n",
rtl_chip_infos[chipset].jumbo_max,
- rtl_chip_infos[chipset].jumbo_tx_csum ? "ok" : "ko");
+ tp->mac_version <= RTL_GIGA_MAC_VER_06 ? "ok" : "ko");
}
if (r8168_check_dash(tp))
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index b6b90a6314e3..5970d9e5ddf1 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -442,12 +442,22 @@ static void sh_eth_modify(struct net_device *ndev, int enum_index, u32 clear,
static void sh_eth_tsu_write(struct sh_eth_private *mdp, u32 data,
int enum_index)
{
- iowrite32(data, mdp->tsu_addr + mdp->reg_offset[enum_index]);
+ u16 offset = mdp->reg_offset[enum_index];
+
+ if (WARN_ON(offset == SH_ETH_OFFSET_INVALID))
+ return;
+
+ iowrite32(data, mdp->tsu_addr + offset);
}
static u32 sh_eth_tsu_read(struct sh_eth_private *mdp, int enum_index)
{
- return ioread32(mdp->tsu_addr + mdp->reg_offset[enum_index]);
+ u16 offset = mdp->reg_offset[enum_index];
+
+ if (WARN_ON(offset == SH_ETH_OFFSET_INVALID))
+ return ~0U;
+
+ return ioread32(mdp->tsu_addr + offset);
}
static void sh_eth_select_mii(struct net_device *ndev)
@@ -2610,12 +2620,6 @@ static int sh_eth_change_mtu(struct net_device *ndev, int new_mtu)
}
/* For TSU_POSTn. Please refer to the manual about this (strange) bitfields */
-static void *sh_eth_tsu_get_post_reg_offset(struct sh_eth_private *mdp,
- int entry)
-{
- return sh_eth_tsu_get_offset(mdp, TSU_POST1) + (entry / 8 * 4);
-}
-
static u32 sh_eth_tsu_get_post_mask(int entry)
{
return 0x0f << (28 - ((entry % 8) * 4));
@@ -2630,27 +2634,25 @@ static void sh_eth_tsu_enable_cam_entry_post(struct net_device *ndev,
int entry)
{
struct sh_eth_private *mdp = netdev_priv(ndev);
+ int reg = TSU_POST1 + entry / 8;
u32 tmp;
- void *reg_offset;
- reg_offset = sh_eth_tsu_get_post_reg_offset(mdp, entry);
- tmp = ioread32(reg_offset);
- iowrite32(tmp | sh_eth_tsu_get_post_bit(mdp, entry), reg_offset);
+ tmp = sh_eth_tsu_read(mdp, reg);
+ sh_eth_tsu_write(mdp, tmp | sh_eth_tsu_get_post_bit(mdp, entry), reg);
}
static bool sh_eth_tsu_disable_cam_entry_post(struct net_device *ndev,
int entry)
{
struct sh_eth_private *mdp = netdev_priv(ndev);
+ int reg = TSU_POST1 + entry / 8;
u32 post_mask, ref_mask, tmp;
- void *reg_offset;
- reg_offset = sh_eth_tsu_get_post_reg_offset(mdp, entry);
post_mask = sh_eth_tsu_get_post_mask(entry);
ref_mask = sh_eth_tsu_get_post_bit(mdp, entry) & ~post_mask;
- tmp = ioread32(reg_offset);
- iowrite32(tmp & ~post_mask, reg_offset);
+ tmp = sh_eth_tsu_read(mdp, reg);
+ sh_eth_tsu_write(mdp, tmp & ~post_mask, reg);
/* If other port enables, the function returns "true" */
return tmp & ref_mask;
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index 056cb6093630..152d6948611c 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -2783,6 +2783,8 @@ static int rocker_switchdev_event(struct notifier_block *unused,
switch (event) {
case SWITCHDEV_FDB_ADD_TO_DEVICE: /* fall through */
case SWITCHDEV_FDB_DEL_TO_DEVICE:
+ if (!fdb_info->added_by_user)
+ break;
memcpy(&switchdev_work->fdb_info, ptr,
sizeof(switchdev_work->fdb_info));
switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC);
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index a4ebd8715494..88c1eee677c2 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -1551,6 +1551,38 @@ static int efx_probe_interrupts(struct efx_nic *efx)
return 0;
}
+#if defined(CONFIG_SMP)
+static void efx_set_interrupt_affinity(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+ unsigned int cpu;
+
+ efx_for_each_channel(channel, efx) {
+ cpu = cpumask_local_spread(channel->channel,
+ pcibus_to_node(efx->pci_dev->bus));
+ irq_set_affinity_hint(channel->irq, cpumask_of(cpu));
+ }
+}
+
+static void efx_clear_interrupt_affinity(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+
+ efx_for_each_channel(channel, efx)
+ irq_set_affinity_hint(channel->irq, NULL);
+}
+#else
+static void
+efx_set_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused)))
+{
+}
+
+static void
+efx_clear_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused)))
+{
+}
+#endif /* CONFIG_SMP */
+
static int efx_soft_enable_interrupts(struct efx_nic *efx)
{
struct efx_channel *channel, *end_channel;
@@ -3308,6 +3340,7 @@ static void efx_pci_remove_main(struct efx_nic *efx)
cancel_work_sync(&efx->reset_work);
efx_disable_interrupts(efx);
+ efx_clear_interrupt_affinity(efx);
efx_nic_fini_interrupt(efx);
efx_fini_port(efx);
efx->type->fini(efx);
@@ -3457,6 +3490,8 @@ static int efx_pci_probe_main(struct efx_nic *efx)
rc = efx_nic_init_interrupt(efx);
if (rc)
goto fail5;
+
+ efx_set_interrupt_affinity(efx);
rc = efx_enable_interrupts(efx);
if (rc)
goto fail6;
@@ -3464,6 +3499,7 @@ static int efx_pci_probe_main(struct efx_nic *efx)
return 0;
fail6:
+ efx_clear_interrupt_affinity(efx);
efx_nic_fini_interrupt(efx);
fail5:
efx_fini_port(efx);
diff --git a/drivers/net/ethernet/socionext/Kconfig b/drivers/net/ethernet/socionext/Kconfig
index 6bcfe27fc560..b80048ca82a0 100644
--- a/drivers/net/ethernet/socionext/Kconfig
+++ b/drivers/net/ethernet/socionext/Kconfig
@@ -14,6 +14,8 @@ if NET_VENDOR_SOCIONEXT
config SNI_AVE
tristate "Socionext AVE ethernet support"
depends on (ARCH_UNIPHIER || COMPILE_TEST) && OF
+ depends on HAS_IOMEM
+ select MFD_SYSCON
select PHYLIB
---help---
Driver for gigabit ethernet MACs, called AVE, in the
diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index f4c0b02ddad8..aa50331b7607 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -1057,7 +1057,8 @@ static int netsec_netdev_load_microcode(struct netsec_priv *priv)
return 0;
}
-static int netsec_reset_hardware(struct netsec_priv *priv)
+static int netsec_reset_hardware(struct netsec_priv *priv,
+ bool load_ucode)
{
u32 value;
int err;
@@ -1102,11 +1103,14 @@ static int netsec_reset_hardware(struct netsec_priv *priv)
netsec_write(priv, NETSEC_REG_NRM_RX_CONFIG,
1 << NETSEC_REG_DESC_ENDIAN);
- err = netsec_netdev_load_microcode(priv);
- if (err) {
- netif_err(priv, probe, priv->ndev,
- "%s: failed to load microcode (%d)\n", __func__, err);
- return err;
+ if (load_ucode) {
+ err = netsec_netdev_load_microcode(priv);
+ if (err) {
+ netif_err(priv, probe, priv->ndev,
+ "%s: failed to load microcode (%d)\n",
+ __func__, err);
+ return err;
+ }
}
/* start DMA engines */
@@ -1313,8 +1317,8 @@ static int netsec_netdev_open(struct net_device *ndev)
napi_enable(&priv->napi);
netif_start_queue(ndev);
- /* Enable RX intr. */
- netsec_write(priv, NETSEC_REG_INTEN_SET, NETSEC_IRQ_RX);
+ /* Enable TX+RX intr. */
+ netsec_write(priv, NETSEC_REG_INTEN_SET, NETSEC_IRQ_RX | NETSEC_IRQ_TX);
return 0;
err3:
@@ -1328,6 +1332,7 @@ err1:
static int netsec_netdev_stop(struct net_device *ndev)
{
+ int ret;
struct netsec_priv *priv = netdev_priv(ndev);
netif_stop_queue(priv->ndev);
@@ -1343,12 +1348,14 @@ static int netsec_netdev_stop(struct net_device *ndev)
netsec_uninit_pkt_dring(priv, NETSEC_RING_TX);
netsec_uninit_pkt_dring(priv, NETSEC_RING_RX);
+ ret = netsec_reset_hardware(priv, false);
+
phy_stop(ndev->phydev);
phy_disconnect(ndev->phydev);
pm_runtime_put_sync(priv->dev);
- return 0;
+ return ret;
}
static int netsec_netdev_init(struct net_device *ndev)
@@ -1364,7 +1371,7 @@ static int netsec_netdev_init(struct net_device *ndev)
if (ret)
goto err1;
- ret = netsec_reset_hardware(priv);
+ ret = netsec_reset_hardware(priv, true);
if (ret)
goto err2;
diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c
index 0b3b7a460641..f7ecceeb1e28 100644
--- a/drivers/net/ethernet/socionext/sni_ave.c
+++ b/drivers/net/ethernet/socionext/sni_ave.c
@@ -11,6 +11,7 @@
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/iopoll.h>
+#include <linux/mfd/syscon.h>
#include <linux/mii.h>
#include <linux/module.h>
#include <linux/netdevice.h>
@@ -18,6 +19,7 @@
#include <linux/of_mdio.h>
#include <linux/of_platform.h>
#include <linux/phy.h>
+#include <linux/regmap.h>
#include <linux/reset.h>
#include <linux/types.h>
#include <linux/u64_stats_sync.h>
@@ -197,8 +199,16 @@
#define AVE_INTM_COUNT 20
#define AVE_FORCE_TXINTCNT 1
+/* SG */
+#define SG_ETPINMODE 0x540
+#define SG_ETPINMODE_EXTPHY BIT(1) /* for LD11 */
+#define SG_ETPINMODE_RMII(ins) BIT(ins)
+
#define IS_DESC_64BIT(p) ((p)->data->is_desc_64bit)
+#define AVE_MAX_CLKS 4
+#define AVE_MAX_RSTS 2
+
enum desc_id {
AVE_DESCID_RX,
AVE_DESCID_TX,
@@ -225,10 +235,6 @@ struct ave_desc_info {
struct ave_desc *desc; /* skb info related descriptor */
};
-struct ave_soc_data {
- bool is_desc_64bit;
-};
-
struct ave_stats {
struct u64_stats_sync syncp;
u64 packets;
@@ -245,11 +251,16 @@ struct ave_private {
int phy_id;
unsigned int desc_size;
u32 msg_enable;
- struct clk *clk;
- struct reset_control *rst;
+ int nclks;
+ struct clk *clk[AVE_MAX_CLKS];
+ int nrsts;
+ struct reset_control *rst[AVE_MAX_RSTS];
phy_interface_t phy_mode;
struct phy_device *phydev;
struct mii_bus *mdio;
+ struct regmap *regmap;
+ unsigned int pinmode_mask;
+ unsigned int pinmode_val;
/* stats */
struct ave_stats stats_rx;
@@ -272,6 +283,14 @@ struct ave_private {
const struct ave_soc_data *data;
};
+struct ave_soc_data {
+ bool is_desc_64bit;
+ const char *clock_names[AVE_MAX_CLKS];
+ const char *reset_names[AVE_MAX_RSTS];
+ int (*get_pinmode)(struct ave_private *priv,
+ phy_interface_t phy_mode, u32 arg);
+};
+
static u32 ave_desc_read(struct net_device *ndev, enum desc_id id, int entry,
int offset)
{
@@ -1153,20 +1172,30 @@ static int ave_init(struct net_device *ndev)
struct device_node *np = dev->of_node;
struct device_node *mdio_np;
struct phy_device *phydev;
- int ret;
+ int nc, nr, ret;
/* enable clk because of hw access until ndo_open */
- ret = clk_prepare_enable(priv->clk);
- if (ret) {
- dev_err(dev, "can't enable clock\n");
- return ret;
+ for (nc = 0; nc < priv->nclks; nc++) {
+ ret = clk_prepare_enable(priv->clk[nc]);
+ if (ret) {
+ dev_err(dev, "can't enable clock\n");
+ goto out_clk_disable;
+ }
}
- ret = reset_control_deassert(priv->rst);
- if (ret) {
- dev_err(dev, "can't deassert reset\n");
- goto out_clk_disable;
+
+ for (nr = 0; nr < priv->nrsts; nr++) {
+ ret = reset_control_deassert(priv->rst[nr]);
+ if (ret) {
+ dev_err(dev, "can't deassert reset\n");
+ goto out_reset_assert;
+ }
}
+ ret = regmap_update_bits(priv->regmap, SG_ETPINMODE,
+ priv->pinmode_mask, priv->pinmode_val);
+ if (ret)
+ return ret;
+
ave_global_reset(ndev);
mdio_np = of_get_child_by_name(np, "mdio");
@@ -1207,9 +1236,11 @@ static int ave_init(struct net_device *ndev)
out_mdio_unregister:
mdiobus_unregister(priv->mdio);
out_reset_assert:
- reset_control_assert(priv->rst);
+ while (--nr >= 0)
+ reset_control_assert(priv->rst[nr]);
out_clk_disable:
- clk_disable_unprepare(priv->clk);
+ while (--nc >= 0)
+ clk_disable_unprepare(priv->clk[nc]);
return ret;
}
@@ -1217,13 +1248,16 @@ out_clk_disable:
static void ave_uninit(struct net_device *ndev)
{
struct ave_private *priv = netdev_priv(ndev);
+ int i;
phy_disconnect(priv->phydev);
mdiobus_unregister(priv->mdio);
/* disable clk because of hw access after ndo_stop */
- reset_control_assert(priv->rst);
- clk_disable_unprepare(priv->clk);
+ for (i = 0; i < priv->nrsts; i++)
+ reset_control_assert(priv->rst[i]);
+ for (i = 0; i < priv->nclks; i++)
+ clk_disable_unprepare(priv->clk[i]);
}
static int ave_open(struct net_device *ndev)
@@ -1520,6 +1554,7 @@ static int ave_probe(struct platform_device *pdev)
const struct ave_soc_data *data;
struct device *dev = &pdev->dev;
char buf[ETHTOOL_FWVERS_LEN];
+ struct of_phandle_args args;
phy_interface_t phy_mode;
struct ave_private *priv;
struct net_device *ndev;
@@ -1527,8 +1562,9 @@ static int ave_probe(struct platform_device *pdev)
struct resource *res;
const void *mac_addr;
void __iomem *base;
+ const char *name;
+ int i, irq, ret;
u64 dma_mask;
- int irq, ret;
u32 ave_id;
data = of_device_get_match_data(dev);
@@ -1541,12 +1577,6 @@ static int ave_probe(struct platform_device *pdev)
dev_err(dev, "phy-mode not found\n");
return -EINVAL;
}
- if ((!phy_interface_mode_is_rgmii(phy_mode)) &&
- phy_mode != PHY_INTERFACE_MODE_RMII &&
- phy_mode != PHY_INTERFACE_MODE_MII) {
- dev_err(dev, "phy-mode is invalid\n");
- return -EINVAL;
- }
irq = platform_get_irq(pdev, 0);
if (irq < 0) {
@@ -1614,15 +1644,47 @@ static int ave_probe(struct platform_device *pdev)
u64_stats_init(&priv->stats_tx.syncp);
u64_stats_init(&priv->stats_rx.syncp);
- priv->clk = devm_clk_get(dev, NULL);
- if (IS_ERR(priv->clk)) {
- ret = PTR_ERR(priv->clk);
- goto out_free_netdev;
+ for (i = 0; i < AVE_MAX_CLKS; i++) {
+ name = priv->data->clock_names[i];
+ if (!name)
+ break;
+ priv->clk[i] = devm_clk_get(dev, name);
+ if (IS_ERR(priv->clk[i])) {
+ ret = PTR_ERR(priv->clk[i]);
+ goto out_free_netdev;
+ }
+ priv->nclks++;
+ }
+
+ for (i = 0; i < AVE_MAX_RSTS; i++) {
+ name = priv->data->reset_names[i];
+ if (!name)
+ break;
+ priv->rst[i] = devm_reset_control_get_shared(dev, name);
+ if (IS_ERR(priv->rst[i])) {
+ ret = PTR_ERR(priv->rst[i]);
+ goto out_free_netdev;
+ }
+ priv->nrsts++;
}
- priv->rst = devm_reset_control_get_optional_shared(dev, NULL);
- if (IS_ERR(priv->rst)) {
- ret = PTR_ERR(priv->rst);
+ ret = of_parse_phandle_with_fixed_args(np,
+ "socionext,syscon-phy-mode",
+ 1, 0, &args);
+ if (ret) {
+ netdev_err(ndev, "can't get syscon-phy-mode property\n");
+ goto out_free_netdev;
+ }
+ priv->regmap = syscon_node_to_regmap(args.np);
+ of_node_put(args.np);
+ if (IS_ERR(priv->regmap)) {
+ netdev_err(ndev, "can't map syscon-phy-mode\n");
+ ret = PTR_ERR(priv->regmap);
+ goto out_free_netdev;
+ }
+ ret = priv->data->get_pinmode(priv, phy_mode, args.args[0]);
+ if (ret) {
+ netdev_err(ndev, "invalid phy-mode setting\n");
goto out_free_netdev;
}
@@ -1685,24 +1747,148 @@ static int ave_remove(struct platform_device *pdev)
return 0;
}
+static int ave_pro4_get_pinmode(struct ave_private *priv,
+ phy_interface_t phy_mode, u32 arg)
+{
+ if (arg > 0)
+ return -EINVAL;
+
+ priv->pinmode_mask = SG_ETPINMODE_RMII(0);
+
+ switch (phy_mode) {
+ case PHY_INTERFACE_MODE_RMII:
+ priv->pinmode_val = SG_ETPINMODE_RMII(0);
+ break;
+ case PHY_INTERFACE_MODE_MII:
+ case PHY_INTERFACE_MODE_RGMII:
+ priv->pinmode_val = 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int ave_ld11_get_pinmode(struct ave_private *priv,
+ phy_interface_t phy_mode, u32 arg)
+{
+ if (arg > 0)
+ return -EINVAL;
+
+ priv->pinmode_mask = SG_ETPINMODE_EXTPHY | SG_ETPINMODE_RMII(0);
+
+ switch (phy_mode) {
+ case PHY_INTERFACE_MODE_INTERNAL:
+ priv->pinmode_val = 0;
+ break;
+ case PHY_INTERFACE_MODE_RMII:
+ priv->pinmode_val = SG_ETPINMODE_EXTPHY | SG_ETPINMODE_RMII(0);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int ave_ld20_get_pinmode(struct ave_private *priv,
+ phy_interface_t phy_mode, u32 arg)
+{
+ if (arg > 0)
+ return -EINVAL;
+
+ priv->pinmode_mask = SG_ETPINMODE_RMII(0);
+
+ switch (phy_mode) {
+ case PHY_INTERFACE_MODE_RMII:
+ priv->pinmode_val = SG_ETPINMODE_RMII(0);
+ break;
+ case PHY_INTERFACE_MODE_RGMII:
+ priv->pinmode_val = 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int ave_pxs3_get_pinmode(struct ave_private *priv,
+ phy_interface_t phy_mode, u32 arg)
+{
+ if (arg > 1)
+ return -EINVAL;
+
+ priv->pinmode_mask = SG_ETPINMODE_RMII(arg);
+
+ switch (phy_mode) {
+ case PHY_INTERFACE_MODE_RMII:
+ priv->pinmode_val = SG_ETPINMODE_RMII(arg);
+ break;
+ case PHY_INTERFACE_MODE_RGMII:
+ priv->pinmode_val = 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static const struct ave_soc_data ave_pro4_data = {
.is_desc_64bit = false,
+ .clock_names = {
+ "gio", "ether", "ether-gb", "ether-phy",
+ },
+ .reset_names = {
+ "gio", "ether",
+ },
+ .get_pinmode = ave_pro4_get_pinmode,
};
static const struct ave_soc_data ave_pxs2_data = {
.is_desc_64bit = false,
+ .clock_names = {
+ "ether",
+ },
+ .reset_names = {
+ "ether",
+ },
+ .get_pinmode = ave_pro4_get_pinmode,
};
static const struct ave_soc_data ave_ld11_data = {
.is_desc_64bit = false,
+ .clock_names = {
+ "ether",
+ },
+ .reset_names = {
+ "ether",
+ },
+ .get_pinmode = ave_ld11_get_pinmode,
};
static const struct ave_soc_data ave_ld20_data = {
.is_desc_64bit = true,
+ .clock_names = {
+ "ether",
+ },
+ .reset_names = {
+ "ether",
+ },
+ .get_pinmode = ave_ld20_get_pinmode,
};
static const struct ave_soc_data ave_pxs3_data = {
.is_desc_64bit = false,
+ .clock_names = {
+ "ether",
+ },
+ .reset_names = {
+ "ether",
+ },
+ .get_pinmode = ave_pxs3_get_pinmode,
};
static const struct of_device_id of_ave_match[] = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index 972e4ef6d414..e3b578b4f7cb 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -4,7 +4,8 @@ stmmac-objs:= stmmac_main.o stmmac_ethtool.o stmmac_mdio.o ring_mode.o \
chain_mode.o dwmac_lib.o dwmac1000_core.o dwmac1000_dma.o \
dwmac100_core.o dwmac100_dma.o enh_desc.o norm_desc.o \
mmc_core.o stmmac_hwtstamp.o stmmac_ptp.o dwmac4_descs.o \
- dwmac4_dma.o dwmac4_lib.o dwmac4_core.o dwmac5.o $(stmmac-y)
+ dwmac4_dma.o dwmac4_lib.o dwmac4_core.o dwmac5.o hwif.o \
+ $(stmmac-y)
# Ordering matters. Generic driver must be last.
obj-$(CONFIG_STMMAC_PLATFORM) += stmmac-platform.o
diff --git a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
index e93c40b4631e..b9c9003060c5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
+++ b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
@@ -24,7 +24,7 @@
#include "stmmac.h"
-static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
+static int jumbo_frm(void *p, struct sk_buff *skb, int csum)
{
struct stmmac_tx_queue *tx_q = (struct stmmac_tx_queue *)p;
unsigned int nopaged_len = skb_headlen(skb);
@@ -51,8 +51,8 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
tx_q->tx_skbuff_dma[entry].buf = des2;
tx_q->tx_skbuff_dma[entry].len = bmax;
/* do not close the descriptor and do not set own bit */
- priv->hw->desc->prepare_tx_desc(desc, 1, bmax, csum, STMMAC_CHAIN_MODE,
- 0, false, skb->len);
+ stmmac_prepare_tx_desc(priv, desc, 1, bmax, csum, STMMAC_CHAIN_MODE,
+ 0, false, skb->len);
while (len != 0) {
tx_q->tx_skbuff[entry] = NULL;
@@ -68,9 +68,8 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
return -1;
tx_q->tx_skbuff_dma[entry].buf = des2;
tx_q->tx_skbuff_dma[entry].len = bmax;
- priv->hw->desc->prepare_tx_desc(desc, 0, bmax, csum,
- STMMAC_CHAIN_MODE, 1,
- false, skb->len);
+ stmmac_prepare_tx_desc(priv, desc, 0, bmax, csum,
+ STMMAC_CHAIN_MODE, 1, false, skb->len);
len -= bmax;
i++;
} else {
@@ -83,9 +82,8 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
tx_q->tx_skbuff_dma[entry].buf = des2;
tx_q->tx_skbuff_dma[entry].len = len;
/* last descriptor can be set now */
- priv->hw->desc->prepare_tx_desc(desc, 0, len, csum,
- STMMAC_CHAIN_MODE, 1,
- true, skb->len);
+ stmmac_prepare_tx_desc(priv, desc, 0, len, csum,
+ STMMAC_CHAIN_MODE, 1, true, skb->len);
len = 0;
}
}
@@ -95,7 +93,7 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
return entry;
}
-static unsigned int stmmac_is_jumbo_frm(int len, int enh_desc)
+static unsigned int is_jumbo_frm(int len, int enh_desc)
{
unsigned int ret = 0;
@@ -107,7 +105,7 @@ static unsigned int stmmac_is_jumbo_frm(int len, int enh_desc)
return ret;
}
-static void stmmac_init_dma_chain(void *des, dma_addr_t phy_addr,
+static void init_dma_chain(void *des, dma_addr_t phy_addr,
unsigned int size, unsigned int extend_desc)
{
/*
@@ -137,7 +135,7 @@ static void stmmac_init_dma_chain(void *des, dma_addr_t phy_addr,
}
}
-static void stmmac_refill_desc3(void *priv_ptr, struct dma_desc *p)
+static void refill_desc3(void *priv_ptr, struct dma_desc *p)
{
struct stmmac_rx_queue *rx_q = (struct stmmac_rx_queue *)priv_ptr;
struct stmmac_priv *priv = rx_q->priv_data;
@@ -153,7 +151,7 @@ static void stmmac_refill_desc3(void *priv_ptr, struct dma_desc *p)
sizeof(struct dma_desc)));
}
-static void stmmac_clean_desc3(void *priv_ptr, struct dma_desc *p)
+static void clean_desc3(void *priv_ptr, struct dma_desc *p)
{
struct stmmac_tx_queue *tx_q = (struct stmmac_tx_queue *)priv_ptr;
struct stmmac_priv *priv = tx_q->priv_data;
@@ -171,9 +169,9 @@ static void stmmac_clean_desc3(void *priv_ptr, struct dma_desc *p)
}
const struct stmmac_mode_ops chain_mode_ops = {
- .init = stmmac_init_dma_chain,
- .is_jumbo_frm = stmmac_is_jumbo_frm,
- .jumbo_frm = stmmac_jumbo_frm,
- .refill_desc3 = stmmac_refill_desc3,
- .clean_desc3 = stmmac_clean_desc3,
+ .init = init_dma_chain,
+ .is_jumbo_frm = is_jumbo_frm,
+ .jumbo_frm = jumbo_frm,
+ .refill_desc3 = refill_desc3,
+ .clean_desc3 = clean_desc3,
};
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index ad2388aee463..627e905b6d76 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -32,12 +32,14 @@
#endif
#include "descs.h"
+#include "hwif.h"
#include "mmc.h"
/* Synopsys Core versions */
#define DWMAC_CORE_3_40 0x34
#define DWMAC_CORE_3_50 0x35
#define DWMAC_CORE_4_00 0x40
+#define DWMAC_CORE_4_10 0x41
#define DWMAC_CORE_5_00 0x50
#define DWMAC_CORE_5_10 0x51
#define STMMAC_CHAN0 0 /* Always supported and default for all chips */
@@ -377,197 +379,11 @@ struct dma_features {
#define JUMBO_LEN 9000
-/* Descriptors helpers */
-struct stmmac_desc_ops {
- /* DMA RX descriptor ring initialization */
- void (*init_rx_desc) (struct dma_desc *p, int disable_rx_ic, int mode,
- int end);
- /* DMA TX descriptor ring initialization */
- void (*init_tx_desc) (struct dma_desc *p, int mode, int end);
-
- /* Invoked by the xmit function to prepare the tx descriptor */
- void (*prepare_tx_desc) (struct dma_desc *p, int is_fs, int len,
- bool csum_flag, int mode, bool tx_own,
- bool ls, unsigned int tot_pkt_len);
- void (*prepare_tso_tx_desc)(struct dma_desc *p, int is_fs, int len1,
- int len2, bool tx_own, bool ls,
- unsigned int tcphdrlen,
- unsigned int tcppayloadlen);
- /* Set/get the owner of the descriptor */
- void (*set_tx_owner) (struct dma_desc *p);
- int (*get_tx_owner) (struct dma_desc *p);
- /* Clean the tx descriptor as soon as the tx irq is received */
- void (*release_tx_desc) (struct dma_desc *p, int mode);
- /* Clear interrupt on tx frame completion. When this bit is
- * set an interrupt happens as soon as the frame is transmitted */
- void (*set_tx_ic)(struct dma_desc *p);
- /* Last tx segment reports the transmit status */
- int (*get_tx_ls) (struct dma_desc *p);
- /* Return the transmit status looking at the TDES1 */
- int (*tx_status) (void *data, struct stmmac_extra_stats *x,
- struct dma_desc *p, void __iomem *ioaddr);
- /* Get the buffer size from the descriptor */
- int (*get_tx_len) (struct dma_desc *p);
- /* Handle extra events on specific interrupts hw dependent */
- void (*set_rx_owner) (struct dma_desc *p);
- /* Get the receive frame size */
- int (*get_rx_frame_len) (struct dma_desc *p, int rx_coe_type);
- /* Return the reception status looking at the RDES1 */
- int (*rx_status) (void *data, struct stmmac_extra_stats *x,
- struct dma_desc *p);
- void (*rx_extended_status) (void *data, struct stmmac_extra_stats *x,
- struct dma_extended_desc *p);
- /* Set tx timestamp enable bit */
- void (*enable_tx_timestamp) (struct dma_desc *p);
- /* get tx timestamp status */
- int (*get_tx_timestamp_status) (struct dma_desc *p);
- /* get timestamp value */
- u64(*get_timestamp) (void *desc, u32 ats);
- /* get rx timestamp status */
- int (*get_rx_timestamp_status)(void *desc, void *next_desc, u32 ats);
- /* Display ring */
- void (*display_ring)(void *head, unsigned int size, bool rx);
- /* set MSS via context descriptor */
- void (*set_mss)(struct dma_desc *p, unsigned int mss);
-};
-
extern const struct stmmac_desc_ops enh_desc_ops;
extern const struct stmmac_desc_ops ndesc_ops;
-/* Specific DMA helpers */
-struct stmmac_dma_ops {
- /* DMA core initialization */
- int (*reset)(void __iomem *ioaddr);
- void (*init)(void __iomem *ioaddr, struct stmmac_dma_cfg *dma_cfg,
- u32 dma_tx, u32 dma_rx, int atds);
- void (*init_chan)(void __iomem *ioaddr,
- struct stmmac_dma_cfg *dma_cfg, u32 chan);
- void (*init_rx_chan)(void __iomem *ioaddr,
- struct stmmac_dma_cfg *dma_cfg,
- u32 dma_rx_phy, u32 chan);
- void (*init_tx_chan)(void __iomem *ioaddr,
- struct stmmac_dma_cfg *dma_cfg,
- u32 dma_tx_phy, u32 chan);
- /* Configure the AXI Bus Mode Register */
- void (*axi)(void __iomem *ioaddr, struct stmmac_axi *axi);
- /* Dump DMA registers */
- void (*dump_regs)(void __iomem *ioaddr, u32 *reg_space);
- /* Set tx/rx threshold in the csr6 register
- * An invalid value enables the store-and-forward mode */
- void (*dma_mode)(void __iomem *ioaddr, int txmode, int rxmode,
- int rxfifosz);
- void (*dma_rx_mode)(void __iomem *ioaddr, int mode, u32 channel,
- int fifosz, u8 qmode);
- void (*dma_tx_mode)(void __iomem *ioaddr, int mode, u32 channel,
- int fifosz, u8 qmode);
- /* To track extra statistic (if supported) */
- void (*dma_diagnostic_fr) (void *data, struct stmmac_extra_stats *x,
- void __iomem *ioaddr);
- void (*enable_dma_transmission) (void __iomem *ioaddr);
- void (*enable_dma_irq)(void __iomem *ioaddr, u32 chan);
- void (*disable_dma_irq)(void __iomem *ioaddr, u32 chan);
- void (*start_tx)(void __iomem *ioaddr, u32 chan);
- void (*stop_tx)(void __iomem *ioaddr, u32 chan);
- void (*start_rx)(void __iomem *ioaddr, u32 chan);
- void (*stop_rx)(void __iomem *ioaddr, u32 chan);
- int (*dma_interrupt) (void __iomem *ioaddr,
- struct stmmac_extra_stats *x, u32 chan);
- /* If supported then get the optional core features */
- void (*get_hw_feature)(void __iomem *ioaddr,
- struct dma_features *dma_cap);
- /* Program the HW RX Watchdog */
- void (*rx_watchdog)(void __iomem *ioaddr, u32 riwt, u32 number_chan);
- void (*set_tx_ring_len)(void __iomem *ioaddr, u32 len, u32 chan);
- void (*set_rx_ring_len)(void __iomem *ioaddr, u32 len, u32 chan);
- void (*set_rx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan);
- void (*set_tx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan);
- void (*enable_tso)(void __iomem *ioaddr, bool en, u32 chan);
-};
-
struct mac_device_info;
-/* Helpers to program the MAC core */
-struct stmmac_ops {
- /* MAC core initialization */
- void (*core_init)(struct mac_device_info *hw, struct net_device *dev);
- /* Enable the MAC RX/TX */
- void (*set_mac)(void __iomem *ioaddr, bool enable);
- /* Enable and verify that the IPC module is supported */
- int (*rx_ipc)(struct mac_device_info *hw);
- /* Enable RX Queues */
- void (*rx_queue_enable)(struct mac_device_info *hw, u8 mode, u32 queue);
- /* RX Queues Priority */
- void (*rx_queue_prio)(struct mac_device_info *hw, u32 prio, u32 queue);
- /* TX Queues Priority */
- void (*tx_queue_prio)(struct mac_device_info *hw, u32 prio, u32 queue);
- /* RX Queues Routing */
- void (*rx_queue_routing)(struct mac_device_info *hw, u8 packet,
- u32 queue);
- /* Program RX Algorithms */
- void (*prog_mtl_rx_algorithms)(struct mac_device_info *hw, u32 rx_alg);
- /* Program TX Algorithms */
- void (*prog_mtl_tx_algorithms)(struct mac_device_info *hw, u32 tx_alg);
- /* Set MTL TX queues weight */
- void (*set_mtl_tx_queue_weight)(struct mac_device_info *hw,
- u32 weight, u32 queue);
- /* RX MTL queue to RX dma mapping */
- void (*map_mtl_to_dma)(struct mac_device_info *hw, u32 queue, u32 chan);
- /* Configure AV Algorithm */
- void (*config_cbs)(struct mac_device_info *hw, u32 send_slope,
- u32 idle_slope, u32 high_credit, u32 low_credit,
- u32 queue);
- /* Dump MAC registers */
- void (*dump_regs)(struct mac_device_info *hw, u32 *reg_space);
- /* Handle extra events on specific interrupts hw dependent */
- int (*host_irq_status)(struct mac_device_info *hw,
- struct stmmac_extra_stats *x);
- /* Handle MTL interrupts */
- int (*host_mtl_irq_status)(struct mac_device_info *hw, u32 chan);
- /* Multicast filter setting */
- void (*set_filter)(struct mac_device_info *hw, struct net_device *dev);
- /* Flow control setting */
- void (*flow_ctrl)(struct mac_device_info *hw, unsigned int duplex,
- unsigned int fc, unsigned int pause_time, u32 tx_cnt);
- /* Set power management mode (e.g. magic frame) */
- void (*pmt)(struct mac_device_info *hw, unsigned long mode);
- /* Set/Get Unicast MAC addresses */
- void (*set_umac_addr)(struct mac_device_info *hw, unsigned char *addr,
- unsigned int reg_n);
- void (*get_umac_addr)(struct mac_device_info *hw, unsigned char *addr,
- unsigned int reg_n);
- void (*set_eee_mode)(struct mac_device_info *hw,
- bool en_tx_lpi_clockgating);
- void (*reset_eee_mode)(struct mac_device_info *hw);
- void (*set_eee_timer)(struct mac_device_info *hw, int ls, int tw);
- void (*set_eee_pls)(struct mac_device_info *hw, int link);
- void (*debug)(void __iomem *ioaddr, struct stmmac_extra_stats *x,
- u32 rx_queues, u32 tx_queues);
- /* PCS calls */
- void (*pcs_ctrl_ane)(void __iomem *ioaddr, bool ane, bool srgmi_ral,
- bool loopback);
- void (*pcs_rane)(void __iomem *ioaddr, bool restart);
- void (*pcs_get_adv_lp)(void __iomem *ioaddr, struct rgmii_adv *adv);
- /* Safety Features */
- int (*safety_feat_config)(void __iomem *ioaddr, unsigned int asp);
- bool (*safety_feat_irq_status)(struct net_device *ndev,
- void __iomem *ioaddr, unsigned int asp,
- struct stmmac_safety_stats *stats);
- const char *(*safety_feat_dump)(struct stmmac_safety_stats *stats,
- int index, unsigned long *count);
-};
-
-/* PTP and HW Timer helpers */
-struct stmmac_hwtimestamp {
- void (*config_hw_tstamping) (void __iomem *ioaddr, u32 data);
- u32 (*config_sub_second_increment)(void __iomem *ioaddr, u32 ptp_clock,
- int gmac4);
- int (*init_systime) (void __iomem *ioaddr, u32 sec, u32 nsec);
- int (*config_addend) (void __iomem *ioaddr, u32 addend);
- int (*adjust_systime) (void __iomem *ioaddr, u32 sec, u32 nsec,
- int add_sub, int gmac4);
- u64(*get_systime) (void __iomem *ioaddr);
-};
-
extern const struct stmmac_hwtimestamp stmmac_ptp;
extern const struct stmmac_mode_ops dwmac4_ring_mode_ops;
@@ -590,18 +406,6 @@ struct mii_regs {
unsigned int clk_csr_mask;
};
-/* Helpers to manage the descriptors for chain and ring modes */
-struct stmmac_mode_ops {
- void (*init) (void *des, dma_addr_t phy_addr, unsigned int size,
- unsigned int extend_desc);
- unsigned int (*is_jumbo_frm) (int len, int ehn_desc);
- int (*jumbo_frm)(void *priv, struct sk_buff *skb, int csum);
- int (*set_16kib_bfsize)(int mtu);
- void (*init_desc3)(struct dma_desc *p);
- void (*refill_desc3) (void *priv, struct dma_desc *p);
- void (*clean_desc3) (void *priv, struct dma_desc *p);
-};
-
struct mac_device_info {
const struct stmmac_ops *mac;
const struct stmmac_desc_ops *desc;
@@ -625,12 +429,9 @@ struct stmmac_rx_routing {
u32 reg_shift;
};
-struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr, int mcbins,
- int perfect_uc_entries,
- int *synopsys_id);
-struct mac_device_info *dwmac100_setup(void __iomem *ioaddr, int *synopsys_id);
-struct mac_device_info *dwmac4_setup(void __iomem *ioaddr, int mcbins,
- int perfect_uc_entries, int *synopsys_id);
+int dwmac100_setup(struct stmmac_priv *priv);
+int dwmac1000_setup(struct stmmac_priv *priv);
+int dwmac4_setup(struct stmmac_priv *priv);
void stmmac_set_mac_addr(void __iomem *ioaddr, u8 addr[6],
unsigned int high, unsigned int low);
@@ -650,24 +451,4 @@ extern const struct stmmac_mode_ops ring_mode_ops;
extern const struct stmmac_mode_ops chain_mode_ops;
extern const struct stmmac_desc_ops dwmac4_desc_ops;
-/**
- * stmmac_get_synopsys_id - return the SYINID.
- * @priv: driver private structure
- * Description: this simple function is to decode and return the SYINID
- * starting from the HW core register.
- */
-static inline u32 stmmac_get_synopsys_id(u32 hwid)
-{
- /* Check Synopsys Id (not available on old chips) */
- if (likely(hwid)) {
- u32 uid = ((hwid & 0x0000ff00) >> 8);
- u32 synid = (hwid & 0x000000ff);
-
- pr_info("stmmac - user ID: 0x%x, Synopsys ID: 0x%x\n",
- uid, synid);
-
- return synid;
- }
- return 0;
-}
#endif /* __COMMON_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index 7cb794094a70..4ff231df7322 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -18,6 +18,7 @@
#include <linux/io.h>
#include <linux/ioport.h>
#include <linux/module.h>
+#include <linux/of_device.h>
#include <linux/of_net.h>
#include <linux/mfd/syscon.h>
#include <linux/platform_device.h>
@@ -29,6 +30,10 @@
#define PRG_ETH0_RGMII_MODE BIT(0)
+#define PRG_ETH0_EXT_PHY_MODE_MASK GENMASK(2, 0)
+#define PRG_ETH0_EXT_RGMII_MODE 1
+#define PRG_ETH0_EXT_RMII_MODE 4
+
/* mux to choose between fclk_div2 (bit unset) and mpll2 (bit set) */
#define PRG_ETH0_CLK_M250_SEL_SHIFT 4
#define PRG_ETH0_CLK_M250_SEL_MASK GENMASK(4, 4)
@@ -47,12 +52,20 @@
#define MUX_CLK_NUM_PARENTS 2
+struct meson8b_dwmac;
+
+struct meson8b_dwmac_data {
+ int (*set_phy_mode)(struct meson8b_dwmac *dwmac);
+};
+
struct meson8b_dwmac {
- struct device *dev;
- void __iomem *regs;
- phy_interface_t phy_mode;
- struct clk *rgmii_tx_clk;
- u32 tx_delay_ns;
+ struct device *dev;
+ void __iomem *regs;
+
+ const struct meson8b_dwmac_data *data;
+ phy_interface_t phy_mode;
+ struct clk *rgmii_tx_clk;
+ u32 tx_delay_ns;
};
struct meson8b_dwmac_clk_configs {
@@ -171,6 +184,59 @@ static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac)
return 0;
}
+static int meson8b_set_phy_mode(struct meson8b_dwmac *dwmac)
+{
+ switch (dwmac->phy_mode) {
+ case PHY_INTERFACE_MODE_RGMII:
+ case PHY_INTERFACE_MODE_RGMII_RXID:
+ case PHY_INTERFACE_MODE_RGMII_ID:
+ case PHY_INTERFACE_MODE_RGMII_TXID:
+ /* enable RGMII mode */
+ meson8b_dwmac_mask_bits(dwmac, PRG_ETH0,
+ PRG_ETH0_RGMII_MODE,
+ PRG_ETH0_RGMII_MODE);
+ break;
+ case PHY_INTERFACE_MODE_RMII:
+ /* disable RGMII mode -> enables RMII mode */
+ meson8b_dwmac_mask_bits(dwmac, PRG_ETH0,
+ PRG_ETH0_RGMII_MODE, 0);
+ break;
+ default:
+ dev_err(dwmac->dev, "fail to set phy-mode %s\n",
+ phy_modes(dwmac->phy_mode));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int meson_axg_set_phy_mode(struct meson8b_dwmac *dwmac)
+{
+ switch (dwmac->phy_mode) {
+ case PHY_INTERFACE_MODE_RGMII:
+ case PHY_INTERFACE_MODE_RGMII_RXID:
+ case PHY_INTERFACE_MODE_RGMII_ID:
+ case PHY_INTERFACE_MODE_RGMII_TXID:
+ /* enable RGMII mode */
+ meson8b_dwmac_mask_bits(dwmac, PRG_ETH0,
+ PRG_ETH0_EXT_PHY_MODE_MASK,
+ PRG_ETH0_EXT_RGMII_MODE);
+ break;
+ case PHY_INTERFACE_MODE_RMII:
+ /* disable RGMII mode -> enables RMII mode */
+ meson8b_dwmac_mask_bits(dwmac, PRG_ETH0,
+ PRG_ETH0_EXT_PHY_MODE_MASK,
+ PRG_ETH0_EXT_RMII_MODE);
+ break;
+ default:
+ dev_err(dwmac->dev, "fail to set phy-mode %s\n",
+ phy_modes(dwmac->phy_mode));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac)
{
int ret;
@@ -188,10 +254,6 @@ static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac)
case PHY_INTERFACE_MODE_RGMII_ID:
case PHY_INTERFACE_MODE_RGMII_TXID:
- /* enable RGMII mode */
- meson8b_dwmac_mask_bits(dwmac, PRG_ETH0, PRG_ETH0_RGMII_MODE,
- PRG_ETH0_RGMII_MODE);
-
/* only relevant for RMII mode -> disable in RGMII mode */
meson8b_dwmac_mask_bits(dwmac, PRG_ETH0,
PRG_ETH0_INVERTED_RMII_CLK, 0);
@@ -224,10 +286,6 @@ static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac)
break;
case PHY_INTERFACE_MODE_RMII:
- /* disable RGMII mode -> enables RMII mode */
- meson8b_dwmac_mask_bits(dwmac, PRG_ETH0, PRG_ETH0_RGMII_MODE,
- 0);
-
/* invert internal clk_rmii_i to generate 25/2.5 tx_rx_clk */
meson8b_dwmac_mask_bits(dwmac, PRG_ETH0,
PRG_ETH0_INVERTED_RMII_CLK,
@@ -274,6 +332,11 @@ static int meson8b_dwmac_probe(struct platform_device *pdev)
goto err_remove_config_dt;
}
+ dwmac->data = (const struct meson8b_dwmac_data *)
+ of_device_get_match_data(&pdev->dev);
+ if (!dwmac->data)
+ return -EINVAL;
+
res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
dwmac->regs = devm_ioremap_resource(&pdev->dev, res);
if (IS_ERR(dwmac->regs)) {
@@ -298,6 +361,10 @@ static int meson8b_dwmac_probe(struct platform_device *pdev)
if (ret)
goto err_remove_config_dt;
+ ret = dwmac->data->set_phy_mode(dwmac);
+ if (ret)
+ goto err_remove_config_dt;
+
ret = meson8b_init_prg_eth(dwmac);
if (ret)
goto err_remove_config_dt;
@@ -316,10 +383,31 @@ err_remove_config_dt:
return ret;
}
+static const struct meson8b_dwmac_data meson8b_dwmac_data = {
+ .set_phy_mode = meson8b_set_phy_mode,
+};
+
+static const struct meson8b_dwmac_data meson_axg_dwmac_data = {
+ .set_phy_mode = meson_axg_set_phy_mode,
+};
+
static const struct of_device_id meson8b_dwmac_match[] = {
- { .compatible = "amlogic,meson8b-dwmac" },
- { .compatible = "amlogic,meson8m2-dwmac" },
- { .compatible = "amlogic,meson-gxbb-dwmac" },
+ {
+ .compatible = "amlogic,meson8b-dwmac",
+ .data = &meson8b_dwmac_data,
+ },
+ {
+ .compatible = "amlogic,meson8m2-dwmac",
+ .data = &meson8b_dwmac_data,
+ },
+ {
+ .compatible = "amlogic,meson-gxbb-dwmac",
+ .data = &meson8b_dwmac_data,
+ },
+ {
+ .compatible = "amlogic,meson-axg-dwmac",
+ .data = &meson_axg_dwmac_data,
+ },
{ }
};
MODULE_DEVICE_TABLE(of, meson8b_dwmac_match);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
index c02d36629c52..184ca13c8f79 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
@@ -29,7 +29,6 @@
#define GMAC_MII_DATA 0x00000014 /* MII Data */
#define GMAC_FLOW_CTRL 0x00000018 /* Flow Control */
#define GMAC_VLAN_TAG 0x0000001c /* VLAN Tag */
-#define GMAC_VERSION 0x00000020 /* GMAC CORE Version */
#define GMAC_DEBUG 0x00000024 /* GMAC debug register */
#define GMAC_WAKEUP_FILTER 0x00000028 /* Wake-up Frame Filter */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
index ef10baf14186..0877bde6e860 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
@@ -27,6 +27,7 @@
#include <linux/ethtool.h>
#include <net/dsa.h>
#include <asm/io.h>
+#include "stmmac.h"
#include "stmmac_pcs.h"
#include "dwmac1000.h"
@@ -498,7 +499,7 @@ static void dwmac1000_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x,
x->mac_gmii_rx_proto_engine++;
}
-static const struct stmmac_ops dwmac1000_ops = {
+const struct stmmac_ops dwmac1000_ops = {
.core_init = dwmac1000_core_init,
.set_mac = stmmac_set_mac,
.rx_ipc = dwmac1000_rx_ipc_enable,
@@ -519,28 +520,21 @@ static const struct stmmac_ops dwmac1000_ops = {
.pcs_get_adv_lp = dwmac1000_get_adv_lp,
};
-struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr, int mcbins,
- int perfect_uc_entries,
- int *synopsys_id)
+int dwmac1000_setup(struct stmmac_priv *priv)
{
- struct mac_device_info *mac;
- u32 hwid = readl(ioaddr + GMAC_VERSION);
+ struct mac_device_info *mac = priv->hw;
- mac = kzalloc(sizeof(const struct mac_device_info), GFP_KERNEL);
- if (!mac)
- return NULL;
+ dev_info(priv->device, "\tDWMAC1000\n");
- mac->pcsr = ioaddr;
- mac->multicast_filter_bins = mcbins;
- mac->unicast_filter_entries = perfect_uc_entries;
+ priv->dev->priv_flags |= IFF_UNICAST_FLT;
+ mac->pcsr = priv->ioaddr;
+ mac->multicast_filter_bins = priv->plat->multicast_filter_bins;
+ mac->unicast_filter_entries = priv->plat->unicast_filter_entries;
mac->mcast_bits_log2 = 0;
if (mac->multicast_filter_bins)
mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins);
- mac->mac = &dwmac1000_ops;
- mac->dma = &dwmac1000_dma_ops;
-
mac->link.duplex = GMAC_CONTROL_DM;
mac->link.speed10 = GMAC_CONTROL_PS;
mac->link.speed100 = GMAC_CONTROL_PS | GMAC_CONTROL_FES;
@@ -555,8 +549,5 @@ struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr, int mcbins,
mac->mii.clk_csr_shift = 2;
mac->mii.clk_csr_mask = GENMASK(5, 2);
- /* Get and dump the chip ID */
- *synopsys_id = stmmac_get_synopsys_id(hwid);
-
- return mac;
+ return 0;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
index 91b23f9db31a..b735143987e1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
@@ -27,6 +27,7 @@
#include <linux/crc32.h>
#include <net/dsa.h>
#include <asm/io.h>
+#include "stmmac.h"
#include "dwmac100.h"
static void dwmac100_core_init(struct mac_device_info *hw,
@@ -159,7 +160,7 @@ static void dwmac100_pmt(struct mac_device_info *hw, unsigned long mode)
return;
}
-static const struct stmmac_ops dwmac100_ops = {
+const struct stmmac_ops dwmac100_ops = {
.core_init = dwmac100_core_init,
.set_mac = stmmac_set_mac,
.rx_ipc = dwmac100_rx_ipc_enable,
@@ -172,20 +173,13 @@ static const struct stmmac_ops dwmac100_ops = {
.get_umac_addr = dwmac100_get_umac_addr,
};
-struct mac_device_info *dwmac100_setup(void __iomem *ioaddr, int *synopsys_id)
+int dwmac100_setup(struct stmmac_priv *priv)
{
- struct mac_device_info *mac;
+ struct mac_device_info *mac = priv->hw;
- mac = kzalloc(sizeof(const struct mac_device_info), GFP_KERNEL);
- if (!mac)
- return NULL;
-
- pr_info("\tDWMAC100\n");
-
- mac->pcsr = ioaddr;
- mac->mac = &dwmac100_ops;
- mac->dma = &dwmac100_dma_ops;
+ dev_info(priv->device, "\tDWMAC100\n");
+ mac->pcsr = priv->ioaddr;
mac->link.duplex = MAC_CONTROL_F;
mac->link.speed10 = 0;
mac->link.speed100 = 0;
@@ -200,8 +194,5 @@ struct mac_device_info *dwmac100_setup(void __iomem *ioaddr, int *synopsys_id)
mac->mii.clk_csr_shift = 2;
mac->mii.clk_csr_mask = GENMASK(5, 2);
- /* Synopsys Id is not available on old chips */
- *synopsys_id = 0;
-
- return mac;
+ return 0;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
index dedd40613090..03eab9077c1c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
@@ -34,7 +34,6 @@
#define GMAC_PCS_BASE 0x000000e0
#define GMAC_PHYIF_CONTROL_STATUS 0x000000f8
#define GMAC_PMT 0x000000c0
-#define GMAC_VERSION 0x00000110
#define GMAC_DEBUG 0x00000114
#define GMAC_HW_FEATURE0 0x0000011c
#define GMAC_HW_FEATURE1 0x00000120
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index 517b1f6736a8..7289b3b47d8e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -18,6 +18,7 @@
#include <linux/ethtool.h>
#include <linux/io.h>
#include <net/dsa.h>
+#include "stmmac.h"
#include "stmmac_pcs.h"
#include "dwmac4.h"
#include "dwmac5.h"
@@ -700,7 +701,7 @@ static void dwmac4_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x,
x->mac_gmii_rx_proto_engine++;
}
-static const struct stmmac_ops dwmac4_ops = {
+const struct stmmac_ops dwmac4_ops = {
.core_init = dwmac4_core_init,
.set_mac = stmmac_set_mac,
.rx_ipc = dwmac4_rx_ipc_enable,
@@ -731,7 +732,7 @@ static const struct stmmac_ops dwmac4_ops = {
.set_filter = dwmac4_set_filter,
};
-static const struct stmmac_ops dwmac410_ops = {
+const struct stmmac_ops dwmac410_ops = {
.core_init = dwmac4_core_init,
.set_mac = stmmac_dwmac4_set_mac,
.rx_ipc = dwmac4_rx_ipc_enable,
@@ -762,7 +763,7 @@ static const struct stmmac_ops dwmac410_ops = {
.set_filter = dwmac4_set_filter,
};
-static const struct stmmac_ops dwmac510_ops = {
+const struct stmmac_ops dwmac510_ops = {
.core_init = dwmac4_core_init,
.set_mac = stmmac_dwmac4_set_mac,
.rx_ipc = dwmac4_rx_ipc_enable,
@@ -796,19 +797,16 @@ static const struct stmmac_ops dwmac510_ops = {
.safety_feat_dump = dwmac5_safety_feat_dump,
};
-struct mac_device_info *dwmac4_setup(void __iomem *ioaddr, int mcbins,
- int perfect_uc_entries, int *synopsys_id)
+int dwmac4_setup(struct stmmac_priv *priv)
{
- struct mac_device_info *mac;
- u32 hwid = readl(ioaddr + GMAC_VERSION);
+ struct mac_device_info *mac = priv->hw;
- mac = kzalloc(sizeof(const struct mac_device_info), GFP_KERNEL);
- if (!mac)
- return NULL;
+ dev_info(priv->device, "\tDWMAC4/5\n");
- mac->pcsr = ioaddr;
- mac->multicast_filter_bins = mcbins;
- mac->unicast_filter_entries = perfect_uc_entries;
+ priv->dev->priv_flags |= IFF_UNICAST_FLT;
+ mac->pcsr = priv->ioaddr;
+ mac->multicast_filter_bins = priv->plat->multicast_filter_bins;
+ mac->unicast_filter_entries = priv->plat->unicast_filter_entries;
mac->mcast_bits_log2 = 0;
if (mac->multicast_filter_bins)
@@ -828,20 +826,5 @@ struct mac_device_info *dwmac4_setup(void __iomem *ioaddr, int mcbins,
mac->mii.clk_csr_shift = 8;
mac->mii.clk_csr_mask = GENMASK(11, 8);
- /* Get and dump the chip ID */
- *synopsys_id = stmmac_get_synopsys_id(hwid);
-
- if (*synopsys_id > DWMAC_CORE_4_00)
- mac->dma = &dwmac410_dma_ops;
- else
- mac->dma = &dwmac4_dma_ops;
-
- if (*synopsys_id >= DWMAC_CORE_5_10)
- mac->mac = &dwmac510_ops;
- else if (*synopsys_id >= DWMAC_CORE_4_00)
- mac->mac = &dwmac410_ops;
- else
- mac->mac = &dwmac4_ops;
-
- return mac;
+ return 0;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index 2a6521d33e43..65ed896c13cb 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -223,7 +223,7 @@ static int dwmac4_wrback_get_tx_timestamp_status(struct dma_desc *p)
return 0;
}
-static inline u64 dwmac4_get_timestamp(void *desc, u32 ats)
+static inline void dwmac4_get_timestamp(void *desc, u32 ats, u64 *ts)
{
struct dma_desc *p = (struct dma_desc *)desc;
u64 ns;
@@ -232,7 +232,7 @@ static inline u64 dwmac4_get_timestamp(void *desc, u32 ats)
/* convert high/sec time stamp value to nanosecond */
ns += le32_to_cpu(p->des1) * 1000000000ULL;
- return ns;
+ *ts = ns;
}
static int dwmac4_rx_check_timestamp(void *desc)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
index 860de39999c7..2978550bb7f6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
@@ -237,15 +237,16 @@ int dwmac5_safety_feat_config(void __iomem *ioaddr, unsigned int asp)
return 0;
}
-bool dwmac5_safety_feat_irq_status(struct net_device *ndev,
+int dwmac5_safety_feat_irq_status(struct net_device *ndev,
void __iomem *ioaddr, unsigned int asp,
struct stmmac_safety_stats *stats)
{
- bool ret = false, err, corr;
+ bool err, corr;
u32 mtl, dma;
+ int ret = 0;
if (!asp)
- return false;
+ return -EINVAL;
mtl = readl(ioaddr + MTL_SAFETY_INT_STATUS);
dma = readl(ioaddr + DMA_SAFETY_INT_STATUS);
@@ -282,17 +283,19 @@ static const struct dwmac5_error {
{ dwmac5_dma_errors },
};
-const char *dwmac5_safety_feat_dump(struct stmmac_safety_stats *stats,
- int index, unsigned long *count)
+int dwmac5_safety_feat_dump(struct stmmac_safety_stats *stats,
+ int index, unsigned long *count, const char **desc)
{
int module = index / 32, offset = index % 32;
unsigned long *ptr = (unsigned long *)stats;
if (module >= ARRAY_SIZE(dwmac5_all_errors))
- return NULL;
+ return -EINVAL;
if (!dwmac5_all_errors[module].desc[offset].valid)
- return NULL;
+ return -EINVAL;
if (count)
*count = *(ptr + index);
- return dwmac5_all_errors[module].desc[offset].desc;
+ if (desc)
+ *desc = dwmac5_all_errors[module].desc[offset].desc;
+ return 0;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
index a0d2c44711b9..bd4c466d303e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
@@ -43,10 +43,10 @@
#define DMA_ECC_INT_STATUS 0x00001088
int dwmac5_safety_feat_config(void __iomem *ioaddr, unsigned int asp);
-bool dwmac5_safety_feat_irq_status(struct net_device *ndev,
+int dwmac5_safety_feat_irq_status(struct net_device *ndev,
void __iomem *ioaddr, unsigned int asp,
struct stmmac_safety_stats *stats);
-const char *dwmac5_safety_feat_dump(struct stmmac_safety_stats *stats,
- int index, unsigned long *count);
+int dwmac5_safety_feat_dump(struct stmmac_safety_stats *stats,
+ int index, unsigned long *count, const char **desc);
#endif /* __DWMAC5_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
index 6768a25b6aa0..3bfb3f584be2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
@@ -382,7 +382,7 @@ static int enh_desc_get_tx_timestamp_status(struct dma_desc *p)
return (le32_to_cpu(p->des0) & ETDES0_TIME_STAMP_STATUS) >> 17;
}
-static u64 enh_desc_get_timestamp(void *desc, u32 ats)
+static void enh_desc_get_timestamp(void *desc, u32 ats, u64 *ts)
{
u64 ns;
@@ -397,7 +397,7 @@ static u64 enh_desc_get_timestamp(void *desc, u32 ats)
ns += le32_to_cpu(p->des3) * 1000000000ULL;
}
- return ns;
+ *ts = ns;
}
static int enh_desc_get_rx_timestamp_status(void *desc, void *next_desc,
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c
new file mode 100644
index 000000000000..2b0a7e79de00
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c
@@ -0,0 +1,220 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/*
+ * Copyright (c) 2018 Synopsys, Inc. and/or its affiliates.
+ * stmmac HW Interface Handling
+ */
+
+#include "common.h"
+#include "stmmac.h"
+
+static u32 stmmac_get_id(struct stmmac_priv *priv, u32 id_reg)
+{
+ u32 reg = readl(priv->ioaddr + id_reg);
+
+ if (!reg) {
+ dev_info(priv->device, "Version ID not available\n");
+ return 0x0;
+ }
+
+ dev_info(priv->device, "User ID: 0x%x, Synopsys ID: 0x%x\n",
+ (unsigned int)(reg & GENMASK(15, 8)) >> 8,
+ (unsigned int)(reg & GENMASK(7, 0)));
+ return reg & GENMASK(7, 0);
+}
+
+static void stmmac_dwmac_mode_quirk(struct stmmac_priv *priv)
+{
+ struct mac_device_info *mac = priv->hw;
+
+ if (priv->chain_mode) {
+ dev_info(priv->device, "Chain mode enabled\n");
+ priv->mode = STMMAC_CHAIN_MODE;
+ mac->mode = &chain_mode_ops;
+ } else {
+ dev_info(priv->device, "Ring mode enabled\n");
+ priv->mode = STMMAC_RING_MODE;
+ mac->mode = &ring_mode_ops;
+ }
+}
+
+static int stmmac_dwmac1_quirks(struct stmmac_priv *priv)
+{
+ struct mac_device_info *mac = priv->hw;
+
+ if (priv->plat->enh_desc) {
+ dev_info(priv->device, "Enhanced/Alternate descriptors\n");
+
+ /* GMAC older than 3.50 has no extended descriptors */
+ if (priv->synopsys_id >= DWMAC_CORE_3_50) {
+ dev_info(priv->device, "Enabled extended descriptors\n");
+ priv->extend_desc = 1;
+ } else {
+ dev_warn(priv->device, "Extended descriptors not supported\n");
+ }
+
+ mac->desc = &enh_desc_ops;
+ } else {
+ dev_info(priv->device, "Normal descriptors\n");
+ mac->desc = &ndesc_ops;
+ }
+
+ stmmac_dwmac_mode_quirk(priv);
+ return 0;
+}
+
+static int stmmac_dwmac4_quirks(struct stmmac_priv *priv)
+{
+ stmmac_dwmac_mode_quirk(priv);
+ return 0;
+}
+
+static const struct stmmac_hwif_entry {
+ bool gmac;
+ bool gmac4;
+ u32 min_id;
+ const void *desc;
+ const void *dma;
+ const void *mac;
+ const void *hwtimestamp;
+ const void *mode;
+ int (*setup)(struct stmmac_priv *priv);
+ int (*quirks)(struct stmmac_priv *priv);
+} stmmac_hw[] = {
+ /* NOTE: New HW versions shall go to the end of this table */
+ {
+ .gmac = false,
+ .gmac4 = false,
+ .min_id = 0,
+ .desc = NULL,
+ .dma = &dwmac100_dma_ops,
+ .mac = &dwmac100_ops,
+ .hwtimestamp = &stmmac_ptp,
+ .mode = NULL,
+ .setup = dwmac100_setup,
+ .quirks = stmmac_dwmac1_quirks,
+ }, {
+ .gmac = true,
+ .gmac4 = false,
+ .min_id = 0,
+ .desc = NULL,
+ .dma = &dwmac1000_dma_ops,
+ .mac = &dwmac1000_ops,
+ .hwtimestamp = &stmmac_ptp,
+ .mode = NULL,
+ .setup = dwmac1000_setup,
+ .quirks = stmmac_dwmac1_quirks,
+ }, {
+ .gmac = false,
+ .gmac4 = true,
+ .min_id = 0,
+ .desc = &dwmac4_desc_ops,
+ .dma = &dwmac4_dma_ops,
+ .mac = &dwmac4_ops,
+ .hwtimestamp = &stmmac_ptp,
+ .mode = NULL,
+ .setup = dwmac4_setup,
+ .quirks = stmmac_dwmac4_quirks,
+ }, {
+ .gmac = false,
+ .gmac4 = true,
+ .min_id = DWMAC_CORE_4_00,
+ .desc = &dwmac4_desc_ops,
+ .dma = &dwmac4_dma_ops,
+ .mac = &dwmac410_ops,
+ .hwtimestamp = &stmmac_ptp,
+ .mode = &dwmac4_ring_mode_ops,
+ .setup = dwmac4_setup,
+ .quirks = NULL,
+ }, {
+ .gmac = false,
+ .gmac4 = true,
+ .min_id = DWMAC_CORE_4_10,
+ .desc = &dwmac4_desc_ops,
+ .dma = &dwmac410_dma_ops,
+ .mac = &dwmac410_ops,
+ .hwtimestamp = &stmmac_ptp,
+ .mode = &dwmac4_ring_mode_ops,
+ .setup = dwmac4_setup,
+ .quirks = NULL,
+ }, {
+ .gmac = false,
+ .gmac4 = true,
+ .min_id = DWMAC_CORE_5_10,
+ .desc = &dwmac4_desc_ops,
+ .dma = &dwmac410_dma_ops,
+ .mac = &dwmac510_ops,
+ .hwtimestamp = &stmmac_ptp,
+ .mode = &dwmac4_ring_mode_ops,
+ .setup = dwmac4_setup,
+ .quirks = NULL,
+ }
+};
+
+int stmmac_hwif_init(struct stmmac_priv *priv)
+{
+ bool needs_gmac4 = priv->plat->has_gmac4;
+ bool needs_gmac = priv->plat->has_gmac;
+ const struct stmmac_hwif_entry *entry;
+ struct mac_device_info *mac;
+ int i, ret;
+ u32 id;
+
+ if (needs_gmac) {
+ id = stmmac_get_id(priv, GMAC_VERSION);
+ } else {
+ id = stmmac_get_id(priv, GMAC4_VERSION);
+ }
+
+ /* Save ID for later use */
+ priv->synopsys_id = id;
+
+ /* Check for HW specific setup first */
+ if (priv->plat->setup) {
+ priv->hw = priv->plat->setup(priv);
+ if (!priv->hw)
+ return -ENOMEM;
+ return 0;
+ }
+
+ mac = devm_kzalloc(priv->device, sizeof(*mac), GFP_KERNEL);
+ if (!mac)
+ return -ENOMEM;
+
+ /* Fallback to generic HW */
+ for (i = ARRAY_SIZE(stmmac_hw) - 1; i >= 0; i--) {
+ entry = &stmmac_hw[i];
+
+ if (needs_gmac ^ entry->gmac)
+ continue;
+ if (needs_gmac4 ^ entry->gmac4)
+ continue;
+ if (id < entry->min_id)
+ continue;
+
+ mac->desc = entry->desc;
+ mac->dma = entry->dma;
+ mac->mac = entry->mac;
+ mac->ptp = entry->hwtimestamp;
+ mac->mode = entry->mode;
+
+ priv->hw = mac;
+
+ /* Entry found */
+ ret = entry->setup(priv);
+ if (ret)
+ return ret;
+
+ /* Run quirks, if needed */
+ if (entry->quirks) {
+ ret = entry->quirks(priv);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+ }
+
+ dev_err(priv->device, "Failed to find HW IF (id=0x%x, gmac=%d/%d)\n",
+ id, needs_gmac, needs_gmac4);
+ return -EINVAL;
+}
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
new file mode 100644
index 000000000000..bfad61607f07
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -0,0 +1,438 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+// Copyright (c) 2018 Synopsys, Inc. and/or its affiliates.
+// stmmac HW Interface Callbacks
+
+#ifndef __STMMAC_HWIF_H__
+#define __STMMAC_HWIF_H__
+
+#define stmmac_do_void_callback(__priv, __module, __cname, __arg0, __args...) \
+({ \
+ int __result = -EINVAL; \
+ if ((__priv)->hw->__module->__cname) { \
+ (__priv)->hw->__module->__cname((__arg0), ##__args); \
+ __result = 0; \
+ } \
+ __result; \
+})
+#define stmmac_do_callback(__priv, __module, __cname, __arg0, __args...) \
+({ \
+ int __result = -EINVAL; \
+ if ((__priv)->hw->__module->__cname) \
+ __result = (__priv)->hw->__module->__cname((__arg0), ##__args); \
+ __result; \
+})
+
+struct stmmac_extra_stats;
+struct stmmac_safety_stats;
+struct dma_desc;
+struct dma_extended_desc;
+
+/* Descriptors helpers */
+struct stmmac_desc_ops {
+ /* DMA RX descriptor ring initialization */
+ void (*init_rx_desc)(struct dma_desc *p, int disable_rx_ic, int mode,
+ int end);
+ /* DMA TX descriptor ring initialization */
+ void (*init_tx_desc)(struct dma_desc *p, int mode, int end);
+ /* Invoked by the xmit function to prepare the tx descriptor */
+ void (*prepare_tx_desc)(struct dma_desc *p, int is_fs, int len,
+ bool csum_flag, int mode, bool tx_own, bool ls,
+ unsigned int tot_pkt_len);
+ void (*prepare_tso_tx_desc)(struct dma_desc *p, int is_fs, int len1,
+ int len2, bool tx_own, bool ls, unsigned int tcphdrlen,
+ unsigned int tcppayloadlen);
+ /* Set/get the owner of the descriptor */
+ void (*set_tx_owner)(struct dma_desc *p);
+ int (*get_tx_owner)(struct dma_desc *p);
+ /* Clean the tx descriptor as soon as the tx irq is received */
+ void (*release_tx_desc)(struct dma_desc *p, int mode);
+ /* Clear interrupt on tx frame completion. When this bit is
+ * set an interrupt happens as soon as the frame is transmitted */
+ void (*set_tx_ic)(struct dma_desc *p);
+ /* Last tx segment reports the transmit status */
+ int (*get_tx_ls)(struct dma_desc *p);
+ /* Return the transmit status looking at the TDES1 */
+ int (*tx_status)(void *data, struct stmmac_extra_stats *x,
+ struct dma_desc *p, void __iomem *ioaddr);
+ /* Get the buffer size from the descriptor */
+ int (*get_tx_len)(struct dma_desc *p);
+ /* Handle extra events on specific interrupts hw dependent */
+ void (*set_rx_owner)(struct dma_desc *p);
+ /* Get the receive frame size */
+ int (*get_rx_frame_len)(struct dma_desc *p, int rx_coe_type);
+ /* Return the reception status looking at the RDES1 */
+ int (*rx_status)(void *data, struct stmmac_extra_stats *x,
+ struct dma_desc *p);
+ void (*rx_extended_status)(void *data, struct stmmac_extra_stats *x,
+ struct dma_extended_desc *p);
+ /* Set tx timestamp enable bit */
+ void (*enable_tx_timestamp) (struct dma_desc *p);
+ /* get tx timestamp status */
+ int (*get_tx_timestamp_status) (struct dma_desc *p);
+ /* get timestamp value */
+ void (*get_timestamp)(void *desc, u32 ats, u64 *ts);
+ /* get rx timestamp status */
+ int (*get_rx_timestamp_status)(void *desc, void *next_desc, u32 ats);
+ /* Display ring */
+ void (*display_ring)(void *head, unsigned int size, bool rx);
+ /* set MSS via context descriptor */
+ void (*set_mss)(struct dma_desc *p, unsigned int mss);
+};
+
+#define stmmac_init_rx_desc(__priv, __args...) \
+ stmmac_do_void_callback(__priv, desc, init_rx_desc, __args)
+#define stmmac_init_tx_desc(__priv, __args...) \
+ stmmac_do_void_callback(__priv, desc, init_tx_desc, __args)
+#define stmmac_prepare_tx_desc(__priv, __args...) \
+ stmmac_do_void_callback(__priv, desc, prepare_tx_desc, __args)
+#define stmmac_prepare_tso_tx_desc(__priv, __args...) \
+ stmmac_do_void_callback(__priv, desc, prepare_tso_tx_desc, __args)
+#define stmmac_set_tx_owner(__priv, __args...) \
+ stmmac_do_void_callback(__priv, desc, set_tx_owner, __args)
+#define stmmac_get_tx_owner(__priv, __args...) \
+ stmmac_do_callback(__priv, desc, get_tx_owner, __args)
+#define stmmac_release_tx_desc(__priv, __args...) \
+ stmmac_do_void_callback(__priv, desc, release_tx_desc, __args)
+#define stmmac_set_tx_ic(__priv, __args...) \
+ stmmac_do_void_callback(__priv, desc, set_tx_ic, __args)
+#define stmmac_get_tx_ls(__priv, __args...) \
+ stmmac_do_callback(__priv, desc, get_tx_ls, __args)
+#define stmmac_tx_status(__priv, __args...) \
+ stmmac_do_callback(__priv, desc, tx_status, __args)
+#define stmmac_get_tx_len(__priv, __args...) \
+ stmmac_do_callback(__priv, desc, get_tx_len, __args)
+#define stmmac_set_rx_owner(__priv, __args...) \
+ stmmac_do_void_callback(__priv, desc, set_rx_owner, __args)
+#define stmmac_get_rx_frame_len(__priv, __args...) \
+ stmmac_do_callback(__priv, desc, get_rx_frame_len, __args)
+#define stmmac_rx_status(__priv, __args...) \
+ stmmac_do_callback(__priv, desc, rx_status, __args)
+#define stmmac_rx_extended_status(__priv, __args...) \
+ stmmac_do_void_callback(__priv, desc, rx_extended_status, __args)
+#define stmmac_enable_tx_timestamp(__priv, __args...) \
+ stmmac_do_void_callback(__priv, desc, enable_tx_timestamp, __args)
+#define stmmac_get_tx_timestamp_status(__priv, __args...) \
+ stmmac_do_callback(__priv, desc, get_tx_timestamp_status, __args)
+#define stmmac_get_timestamp(__priv, __args...) \
+ stmmac_do_void_callback(__priv, desc, get_timestamp, __args)
+#define stmmac_get_rx_timestamp_status(__priv, __args...) \
+ stmmac_do_callback(__priv, desc, get_rx_timestamp_status, __args)
+#define stmmac_display_ring(__priv, __args...) \
+ stmmac_do_void_callback(__priv, desc, display_ring, __args)
+#define stmmac_set_mss(__priv, __args...) \
+ stmmac_do_void_callback(__priv, desc, set_mss, __args)
+
+struct stmmac_dma_cfg;
+struct dma_features;
+
+/* Specific DMA helpers */
+struct stmmac_dma_ops {
+ /* DMA core initialization */
+ int (*reset)(void __iomem *ioaddr);
+ void (*init)(void __iomem *ioaddr, struct stmmac_dma_cfg *dma_cfg,
+ u32 dma_tx, u32 dma_rx, int atds);
+ void (*init_chan)(void __iomem *ioaddr,
+ struct stmmac_dma_cfg *dma_cfg, u32 chan);
+ void (*init_rx_chan)(void __iomem *ioaddr,
+ struct stmmac_dma_cfg *dma_cfg,
+ u32 dma_rx_phy, u32 chan);
+ void (*init_tx_chan)(void __iomem *ioaddr,
+ struct stmmac_dma_cfg *dma_cfg,
+ u32 dma_tx_phy, u32 chan);
+ /* Configure the AXI Bus Mode Register */
+ void (*axi)(void __iomem *ioaddr, struct stmmac_axi *axi);
+ /* Dump DMA registers */
+ void (*dump_regs)(void __iomem *ioaddr, u32 *reg_space);
+ /* Set tx/rx threshold in the csr6 register
+ * An invalid value enables the store-and-forward mode */
+ void (*dma_mode)(void __iomem *ioaddr, int txmode, int rxmode,
+ int rxfifosz);
+ void (*dma_rx_mode)(void __iomem *ioaddr, int mode, u32 channel,
+ int fifosz, u8 qmode);
+ void (*dma_tx_mode)(void __iomem *ioaddr, int mode, u32 channel,
+ int fifosz, u8 qmode);
+ /* To track extra statistic (if supported) */
+ void (*dma_diagnostic_fr) (void *data, struct stmmac_extra_stats *x,
+ void __iomem *ioaddr);
+ void (*enable_dma_transmission) (void __iomem *ioaddr);
+ void (*enable_dma_irq)(void __iomem *ioaddr, u32 chan);
+ void (*disable_dma_irq)(void __iomem *ioaddr, u32 chan);
+ void (*start_tx)(void __iomem *ioaddr, u32 chan);
+ void (*stop_tx)(void __iomem *ioaddr, u32 chan);
+ void (*start_rx)(void __iomem *ioaddr, u32 chan);
+ void (*stop_rx)(void __iomem *ioaddr, u32 chan);
+ int (*dma_interrupt) (void __iomem *ioaddr,
+ struct stmmac_extra_stats *x, u32 chan);
+ /* If supported then get the optional core features */
+ void (*get_hw_feature)(void __iomem *ioaddr,
+ struct dma_features *dma_cap);
+ /* Program the HW RX Watchdog */
+ void (*rx_watchdog)(void __iomem *ioaddr, u32 riwt, u32 number_chan);
+ void (*set_tx_ring_len)(void __iomem *ioaddr, u32 len, u32 chan);
+ void (*set_rx_ring_len)(void __iomem *ioaddr, u32 len, u32 chan);
+ void (*set_rx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan);
+ void (*set_tx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan);
+ void (*enable_tso)(void __iomem *ioaddr, bool en, u32 chan);
+};
+
+#define stmmac_reset(__priv, __args...) \
+ stmmac_do_callback(__priv, dma, reset, __args)
+#define stmmac_dma_init(__priv, __args...) \
+ stmmac_do_void_callback(__priv, dma, init, __args)
+#define stmmac_init_chan(__priv, __args...) \
+ stmmac_do_void_callback(__priv, dma, init_chan, __args)
+#define stmmac_init_rx_chan(__priv, __args...) \
+ stmmac_do_void_callback(__priv, dma, init_rx_chan, __args)
+#define stmmac_init_tx_chan(__priv, __args...) \
+ stmmac_do_void_callback(__priv, dma, init_tx_chan, __args)
+#define stmmac_axi(__priv, __args...) \
+ stmmac_do_void_callback(__priv, dma, axi, __args)
+#define stmmac_dump_dma_regs(__priv, __args...) \
+ stmmac_do_void_callback(__priv, dma, dump_regs, __args)
+#define stmmac_dma_mode(__priv, __args...) \
+ stmmac_do_void_callback(__priv, dma, dma_mode, __args)
+#define stmmac_dma_rx_mode(__priv, __args...) \
+ stmmac_do_void_callback(__priv, dma, dma_rx_mode, __args)
+#define stmmac_dma_tx_mode(__priv, __args...) \
+ stmmac_do_void_callback(__priv, dma, dma_tx_mode, __args)
+#define stmmac_dma_diagnostic_fr(__priv, __args...) \
+ stmmac_do_void_callback(__priv, dma, dma_diagnostic_fr, __args)
+#define stmmac_enable_dma_transmission(__priv, __args...) \
+ stmmac_do_void_callback(__priv, dma, enable_dma_transmission, __args)
+#define stmmac_enable_dma_irq(__priv, __args...) \
+ stmmac_do_void_callback(__priv, dma, enable_dma_irq, __args)
+#define stmmac_disable_dma_irq(__priv, __args...) \
+ stmmac_do_void_callback(__priv, dma, disable_dma_irq, __args)
+#define stmmac_start_tx(__priv, __args...) \
+ stmmac_do_void_callback(__priv, dma, start_tx, __args)
+#define stmmac_stop_tx(__priv, __args...) \
+ stmmac_do_void_callback(__priv, dma, stop_tx, __args)
+#define stmmac_start_rx(__priv, __args...) \
+ stmmac_do_void_callback(__priv, dma, start_rx, __args)
+#define stmmac_stop_rx(__priv, __args...) \
+ stmmac_do_void_callback(__priv, dma, stop_rx, __args)
+#define stmmac_dma_interrupt_status(__priv, __args...) \
+ stmmac_do_callback(__priv, dma, dma_interrupt, __args)
+#define stmmac_get_hw_feature(__priv, __args...) \
+ stmmac_do_void_callback(__priv, dma, get_hw_feature, __args)
+#define stmmac_rx_watchdog(__priv, __args...) \
+ stmmac_do_void_callback(__priv, dma, rx_watchdog, __args)
+#define stmmac_set_tx_ring_len(__priv, __args...) \
+ stmmac_do_void_callback(__priv, dma, set_tx_ring_len, __args)
+#define stmmac_set_rx_ring_len(__priv, __args...) \
+ stmmac_do_void_callback(__priv, dma, set_rx_ring_len, __args)
+#define stmmac_set_rx_tail_ptr(__priv, __args...) \
+ stmmac_do_void_callback(__priv, dma, set_rx_tail_ptr, __args)
+#define stmmac_set_tx_tail_ptr(__priv, __args...) \
+ stmmac_do_void_callback(__priv, dma, set_tx_tail_ptr, __args)
+#define stmmac_enable_tso(__priv, __args...) \
+ stmmac_do_void_callback(__priv, dma, enable_tso, __args)
+
+struct mac_device_info;
+struct net_device;
+struct rgmii_adv;
+struct stmmac_safety_stats;
+
+/* Helpers to program the MAC core */
+struct stmmac_ops {
+ /* MAC core initialization */
+ void (*core_init)(struct mac_device_info *hw, struct net_device *dev);
+ /* Enable the MAC RX/TX */
+ void (*set_mac)(void __iomem *ioaddr, bool enable);
+ /* Enable and verify that the IPC module is supported */
+ int (*rx_ipc)(struct mac_device_info *hw);
+ /* Enable RX Queues */
+ void (*rx_queue_enable)(struct mac_device_info *hw, u8 mode, u32 queue);
+ /* RX Queues Priority */
+ void (*rx_queue_prio)(struct mac_device_info *hw, u32 prio, u32 queue);
+ /* TX Queues Priority */
+ void (*tx_queue_prio)(struct mac_device_info *hw, u32 prio, u32 queue);
+ /* RX Queues Routing */
+ void (*rx_queue_routing)(struct mac_device_info *hw, u8 packet,
+ u32 queue);
+ /* Program RX Algorithms */
+ void (*prog_mtl_rx_algorithms)(struct mac_device_info *hw, u32 rx_alg);
+ /* Program TX Algorithms */
+ void (*prog_mtl_tx_algorithms)(struct mac_device_info *hw, u32 tx_alg);
+ /* Set MTL TX queues weight */
+ void (*set_mtl_tx_queue_weight)(struct mac_device_info *hw,
+ u32 weight, u32 queue);
+ /* RX MTL queue to RX dma mapping */
+ void (*map_mtl_to_dma)(struct mac_device_info *hw, u32 queue, u32 chan);
+ /* Configure AV Algorithm */
+ void (*config_cbs)(struct mac_device_info *hw, u32 send_slope,
+ u32 idle_slope, u32 high_credit, u32 low_credit,
+ u32 queue);
+ /* Dump MAC registers */
+ void (*dump_regs)(struct mac_device_info *hw, u32 *reg_space);
+ /* Handle extra events on specific interrupts hw dependent */
+ int (*host_irq_status)(struct mac_device_info *hw,
+ struct stmmac_extra_stats *x);
+ /* Handle MTL interrupts */
+ int (*host_mtl_irq_status)(struct mac_device_info *hw, u32 chan);
+ /* Multicast filter setting */
+ void (*set_filter)(struct mac_device_info *hw, struct net_device *dev);
+ /* Flow control setting */
+ void (*flow_ctrl)(struct mac_device_info *hw, unsigned int duplex,
+ unsigned int fc, unsigned int pause_time, u32 tx_cnt);
+ /* Set power management mode (e.g. magic frame) */
+ void (*pmt)(struct mac_device_info *hw, unsigned long mode);
+ /* Set/Get Unicast MAC addresses */
+ void (*set_umac_addr)(struct mac_device_info *hw, unsigned char *addr,
+ unsigned int reg_n);
+ void (*get_umac_addr)(struct mac_device_info *hw, unsigned char *addr,
+ unsigned int reg_n);
+ void (*set_eee_mode)(struct mac_device_info *hw,
+ bool en_tx_lpi_clockgating);
+ void (*reset_eee_mode)(struct mac_device_info *hw);
+ void (*set_eee_timer)(struct mac_device_info *hw, int ls, int tw);
+ void (*set_eee_pls)(struct mac_device_info *hw, int link);
+ void (*debug)(void __iomem *ioaddr, struct stmmac_extra_stats *x,
+ u32 rx_queues, u32 tx_queues);
+ /* PCS calls */
+ void (*pcs_ctrl_ane)(void __iomem *ioaddr, bool ane, bool srgmi_ral,
+ bool loopback);
+ void (*pcs_rane)(void __iomem *ioaddr, bool restart);
+ void (*pcs_get_adv_lp)(void __iomem *ioaddr, struct rgmii_adv *adv);
+ /* Safety Features */
+ int (*safety_feat_config)(void __iomem *ioaddr, unsigned int asp);
+ int (*safety_feat_irq_status)(struct net_device *ndev,
+ void __iomem *ioaddr, unsigned int asp,
+ struct stmmac_safety_stats *stats);
+ int (*safety_feat_dump)(struct stmmac_safety_stats *stats,
+ int index, unsigned long *count, const char **desc);
+};
+
+#define stmmac_core_init(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, core_init, __args)
+#define stmmac_mac_set(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, set_mac, __args)
+#define stmmac_rx_ipc(__priv, __args...) \
+ stmmac_do_callback(__priv, mac, rx_ipc, __args)
+#define stmmac_rx_queue_enable(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, rx_queue_enable, __args)
+#define stmmac_rx_queue_prio(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, rx_queue_prio, __args)
+#define stmmac_tx_queue_prio(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, tx_queue_prio, __args)
+#define stmmac_rx_queue_routing(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, rx_queue_routing, __args)
+#define stmmac_prog_mtl_rx_algorithms(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, prog_mtl_rx_algorithms, __args)
+#define stmmac_prog_mtl_tx_algorithms(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, prog_mtl_tx_algorithms, __args)
+#define stmmac_set_mtl_tx_queue_weight(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, set_mtl_tx_queue_weight, __args)
+#define stmmac_map_mtl_to_dma(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, map_mtl_to_dma, __args)
+#define stmmac_config_cbs(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, config_cbs, __args)
+#define stmmac_dump_mac_regs(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, dump_regs, __args)
+#define stmmac_host_irq_status(__priv, __args...) \
+ stmmac_do_callback(__priv, mac, host_irq_status, __args)
+#define stmmac_host_mtl_irq_status(__priv, __args...) \
+ stmmac_do_callback(__priv, mac, host_mtl_irq_status, __args)
+#define stmmac_set_filter(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, set_filter, __args)
+#define stmmac_flow_ctrl(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, flow_ctrl, __args)
+#define stmmac_pmt(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, pmt, __args)
+#define stmmac_set_umac_addr(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, set_umac_addr, __args)
+#define stmmac_get_umac_addr(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, get_umac_addr, __args)
+#define stmmac_set_eee_mode(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, set_eee_mode, __args)
+#define stmmac_reset_eee_mode(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, reset_eee_mode, __args)
+#define stmmac_set_eee_timer(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, set_eee_timer, __args)
+#define stmmac_set_eee_pls(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, set_eee_pls, __args)
+#define stmmac_mac_debug(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, debug, __args)
+#define stmmac_pcs_ctrl_ane(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, pcs_ctrl_ane, __args)
+#define stmmac_pcs_rane(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, pcs_rane, __args)
+#define stmmac_pcs_get_adv_lp(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, pcs_get_adv_lp, __args)
+#define stmmac_safety_feat_config(__priv, __args...) \
+ stmmac_do_callback(__priv, mac, safety_feat_config, __args)
+#define stmmac_safety_feat_irq_status(__priv, __args...) \
+ stmmac_do_callback(__priv, mac, safety_feat_irq_status, __args)
+#define stmmac_safety_feat_dump(__priv, __args...) \
+ stmmac_do_callback(__priv, mac, safety_feat_dump, __args)
+
+/* PTP and HW Timer helpers */
+struct stmmac_hwtimestamp {
+ void (*config_hw_tstamping) (void __iomem *ioaddr, u32 data);
+ void (*config_sub_second_increment)(void __iomem *ioaddr, u32 ptp_clock,
+ int gmac4, u32 *ssinc);
+ int (*init_systime) (void __iomem *ioaddr, u32 sec, u32 nsec);
+ int (*config_addend) (void __iomem *ioaddr, u32 addend);
+ int (*adjust_systime) (void __iomem *ioaddr, u32 sec, u32 nsec,
+ int add_sub, int gmac4);
+ void (*get_systime) (void __iomem *ioaddr, u64 *systime);
+};
+
+#define stmmac_config_hw_tstamping(__priv, __args...) \
+ stmmac_do_void_callback(__priv, ptp, config_hw_tstamping, __args)
+#define stmmac_config_sub_second_increment(__priv, __args...) \
+ stmmac_do_void_callback(__priv, ptp, config_sub_second_increment, __args)
+#define stmmac_init_systime(__priv, __args...) \
+ stmmac_do_callback(__priv, ptp, init_systime, __args)
+#define stmmac_config_addend(__priv, __args...) \
+ stmmac_do_callback(__priv, ptp, config_addend, __args)
+#define stmmac_adjust_systime(__priv, __args...) \
+ stmmac_do_callback(__priv, ptp, adjust_systime, __args)
+#define stmmac_get_systime(__priv, __args...) \
+ stmmac_do_void_callback(__priv, ptp, get_systime, __args)
+
+/* Helpers to manage the descriptors for chain and ring modes */
+struct stmmac_mode_ops {
+ void (*init) (void *des, dma_addr_t phy_addr, unsigned int size,
+ unsigned int extend_desc);
+ unsigned int (*is_jumbo_frm) (int len, int ehn_desc);
+ int (*jumbo_frm)(void *priv, struct sk_buff *skb, int csum);
+ int (*set_16kib_bfsize)(int mtu);
+ void (*init_desc3)(struct dma_desc *p);
+ void (*refill_desc3) (void *priv, struct dma_desc *p);
+ void (*clean_desc3) (void *priv, struct dma_desc *p);
+};
+
+#define stmmac_mode_init(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mode, init, __args)
+#define stmmac_is_jumbo_frm(__priv, __args...) \
+ stmmac_do_callback(__priv, mode, is_jumbo_frm, __args)
+#define stmmac_jumbo_frm(__priv, __args...) \
+ stmmac_do_callback(__priv, mode, jumbo_frm, __args)
+#define stmmac_set_16kib_bfsize(__priv, __args...) \
+ stmmac_do_callback(__priv, mode, set_16kib_bfsize, __args)
+#define stmmac_init_desc3(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mode, init_desc3, __args)
+#define stmmac_refill_desc3(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mode, refill_desc3, __args)
+#define stmmac_clean_desc3(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mode, clean_desc3, __args)
+
+struct stmmac_priv;
+
+extern const struct stmmac_ops dwmac100_ops;
+extern const struct stmmac_dma_ops dwmac100_dma_ops;
+extern const struct stmmac_ops dwmac1000_ops;
+extern const struct stmmac_dma_ops dwmac1000_dma_ops;
+extern const struct stmmac_ops dwmac4_ops;
+extern const struct stmmac_dma_ops dwmac4_dma_ops;
+extern const struct stmmac_ops dwmac410_ops;
+extern const struct stmmac_dma_ops dwmac410_dma_ops;
+extern const struct stmmac_ops dwmac510_ops;
+
+#define GMAC_VERSION 0x00000020 /* GMAC CORE Version */
+#define GMAC4_VERSION 0x00000110 /* GMAC4+ CORE Version */
+
+int stmmac_hwif_init(struct stmmac_priv *priv);
+
+#endif /* __STMMAC_HWIF_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
index ebd9e5e00f16..7b1d901bf5bc 100644
--- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
@@ -253,7 +253,7 @@ static int ndesc_get_tx_timestamp_status(struct dma_desc *p)
return (le32_to_cpu(p->des0) & TDES0_TIME_STAMP_STATUS) >> 17;
}
-static u64 ndesc_get_timestamp(void *desc, u32 ats)
+static void ndesc_get_timestamp(void *desc, u32 ats, u64 *ts)
{
struct dma_desc *p = (struct dma_desc *)desc;
u64 ns;
@@ -262,7 +262,7 @@ static u64 ndesc_get_timestamp(void *desc, u32 ats)
/* convert high/sec time stamp value to nanosecond */
ns += le32_to_cpu(p->des3) * 1000000000ULL;
- return ns;
+ *ts = ns;
}
static int ndesc_get_rx_timestamp_status(void *desc, void *next_desc, u32 ats)
diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
index 28e4b5d50ce6..a7ffc73fffe8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
+++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
@@ -24,7 +24,7 @@
#include "stmmac.h"
-static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
+static int jumbo_frm(void *p, struct sk_buff *skb, int csum)
{
struct stmmac_tx_queue *tx_q = (struct stmmac_tx_queue *)p;
unsigned int nopaged_len = skb_headlen(skb);
@@ -58,9 +58,8 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
tx_q->tx_skbuff_dma[entry].is_jumbo = true;
desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB);
- priv->hw->desc->prepare_tx_desc(desc, 1, bmax, csum,
- STMMAC_RING_MODE, 0,
- false, skb->len);
+ stmmac_prepare_tx_desc(priv, desc, 1, bmax, csum,
+ STMMAC_RING_MODE, 0, false, skb->len);
tx_q->tx_skbuff[entry] = NULL;
entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
@@ -79,9 +78,8 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
tx_q->tx_skbuff_dma[entry].is_jumbo = true;
desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB);
- priv->hw->desc->prepare_tx_desc(desc, 0, len, csum,
- STMMAC_RING_MODE, 1,
- true, skb->len);
+ stmmac_prepare_tx_desc(priv, desc, 0, len, csum,
+ STMMAC_RING_MODE, 1, true, skb->len);
} else {
des2 = dma_map_single(priv->device, skb->data,
nopaged_len, DMA_TO_DEVICE);
@@ -92,9 +90,8 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
tx_q->tx_skbuff_dma[entry].len = nopaged_len;
tx_q->tx_skbuff_dma[entry].is_jumbo = true;
desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB);
- priv->hw->desc->prepare_tx_desc(desc, 1, nopaged_len, csum,
- STMMAC_RING_MODE, 0,
- true, skb->len);
+ stmmac_prepare_tx_desc(priv, desc, 1, nopaged_len, csum,
+ STMMAC_RING_MODE, 0, true, skb->len);
}
tx_q->cur_tx = entry;
@@ -102,7 +99,7 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
return entry;
}
-static unsigned int stmmac_is_jumbo_frm(int len, int enh_desc)
+static unsigned int is_jumbo_frm(int len, int enh_desc)
{
unsigned int ret = 0;
@@ -112,7 +109,7 @@ static unsigned int stmmac_is_jumbo_frm(int len, int enh_desc)
return ret;
}
-static void stmmac_refill_desc3(void *priv_ptr, struct dma_desc *p)
+static void refill_desc3(void *priv_ptr, struct dma_desc *p)
{
struct stmmac_priv *priv = (struct stmmac_priv *)priv_ptr;
@@ -122,12 +119,12 @@ static void stmmac_refill_desc3(void *priv_ptr, struct dma_desc *p)
}
/* In ring mode we need to fill the desc3 because it is used as buffer */
-static void stmmac_init_desc3(struct dma_desc *p)
+static void init_desc3(struct dma_desc *p)
{
p->des3 = cpu_to_le32(le32_to_cpu(p->des2) + BUF_SIZE_8KiB);
}
-static void stmmac_clean_desc3(void *priv_ptr, struct dma_desc *p)
+static void clean_desc3(void *priv_ptr, struct dma_desc *p)
{
struct stmmac_tx_queue *tx_q = (struct stmmac_tx_queue *)priv_ptr;
struct stmmac_priv *priv = tx_q->priv_data;
@@ -140,7 +137,7 @@ static void stmmac_clean_desc3(void *priv_ptr, struct dma_desc *p)
p->des3 = 0;
}
-static int stmmac_set_16kib_bfsize(int mtu)
+static int set_16kib_bfsize(int mtu)
{
int ret = 0;
if (unlikely(mtu >= BUF_SIZE_8KiB))
@@ -149,10 +146,10 @@ static int stmmac_set_16kib_bfsize(int mtu)
}
const struct stmmac_mode_ops ring_mode_ops = {
- .is_jumbo_frm = stmmac_is_jumbo_frm,
- .jumbo_frm = stmmac_jumbo_frm,
- .refill_desc3 = stmmac_refill_desc3,
- .init_desc3 = stmmac_init_desc3,
- .clean_desc3 = stmmac_clean_desc3,
- .set_16kib_bfsize = stmmac_set_16kib_bfsize,
+ .is_jumbo_frm = is_jumbo_frm,
+ .jumbo_frm = jumbo_frm,
+ .refill_desc3 = refill_desc3,
+ .init_desc3 = init_desc3,
+ .clean_desc3 = clean_desc3,
+ .set_16kib_bfsize = set_16kib_bfsize,
};
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index da50451f8999..2443f20e07bf 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -130,6 +130,7 @@ struct stmmac_priv {
int eee_active;
int tx_lpi_timer;
unsigned int mode;
+ unsigned int chain_mode;
int extend_desc;
struct ptp_clock *ptp_clock;
struct ptp_clock_info ptp_clock_ops;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 2c6ed47704fc..6d82b3ef5c3b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -291,11 +291,9 @@ static int stmmac_ethtool_get_link_ksettings(struct net_device *dev,
cmd->base.speed = priv->xstats.pcs_speed;
/* Get and convert ADV/LP_ADV from the HW AN registers */
- if (!priv->hw->mac->pcs_get_adv_lp)
+ if (stmmac_pcs_get_adv_lp(priv, priv->ioaddr, &adv))
return -EOPNOTSUPP; /* should never happen indeed */
- priv->hw->mac->pcs_get_adv_lp(priv->ioaddr, &adv);
-
/* Encoding of PSE bits is defined in 802.3z, 37.2.1.4 */
ethtool_convert_link_mode_to_legacy_u32(
@@ -393,11 +391,7 @@ stmmac_ethtool_set_link_ksettings(struct net_device *dev,
ADVERTISED_10baseT_Full);
spin_lock(&priv->lock);
-
- if (priv->hw->mac->pcs_ctrl_ane)
- priv->hw->mac->pcs_ctrl_ane(priv->ioaddr, 1,
- priv->hw->ps, 0);
-
+ stmmac_pcs_ctrl_ane(priv, priv->ioaddr, 1, priv->hw->ps, 0);
spin_unlock(&priv->lock);
return 0;
@@ -442,8 +436,8 @@ static void stmmac_ethtool_gregs(struct net_device *dev,
memset(reg_space, 0x0, REG_SPACE_SIZE);
- priv->hw->mac->dump_regs(priv->hw, reg_space);
- priv->hw->dma->dump_regs(priv->ioaddr, reg_space);
+ stmmac_dump_mac_regs(priv, priv->hw, reg_space);
+ stmmac_dump_dma_regs(priv, priv->ioaddr, reg_space);
/* Copy DMA registers to where ethtool expects them */
memcpy(&reg_space[ETHTOOL_DMA_OFFSET], &reg_space[DMA_BUS_MODE / 4],
NUM_DWMAC1000_DMA_REGS * 4);
@@ -454,15 +448,13 @@ stmmac_get_pauseparam(struct net_device *netdev,
struct ethtool_pauseparam *pause)
{
struct stmmac_priv *priv = netdev_priv(netdev);
+ struct rgmii_adv adv_lp;
pause->rx_pause = 0;
pause->tx_pause = 0;
- if (priv->hw->pcs && priv->hw->mac->pcs_get_adv_lp) {
- struct rgmii_adv adv_lp;
-
+ if (priv->hw->pcs && !stmmac_pcs_get_adv_lp(priv, priv->ioaddr, &adv_lp)) {
pause->autoneg = 1;
- priv->hw->mac->pcs_get_adv_lp(priv->ioaddr, &adv_lp);
if (!adv_lp.pause)
return;
} else {
@@ -488,12 +480,10 @@ stmmac_set_pauseparam(struct net_device *netdev,
u32 tx_cnt = priv->plat->tx_queues_to_use;
struct phy_device *phy = netdev->phydev;
int new_pause = FLOW_OFF;
+ struct rgmii_adv adv_lp;
- if (priv->hw->pcs && priv->hw->mac->pcs_get_adv_lp) {
- struct rgmii_adv adv_lp;
-
+ if (priv->hw->pcs && !stmmac_pcs_get_adv_lp(priv, priv->ioaddr, &adv_lp)) {
pause->autoneg = 1;
- priv->hw->mac->pcs_get_adv_lp(priv->ioaddr, &adv_lp);
if (!adv_lp.pause)
return -EOPNOTSUPP;
} else {
@@ -515,37 +505,32 @@ stmmac_set_pauseparam(struct net_device *netdev,
return phy_start_aneg(phy);
}
- priv->hw->mac->flow_ctrl(priv->hw, phy->duplex, priv->flow_ctrl,
- priv->pause, tx_cnt);
+ stmmac_flow_ctrl(priv, priv->hw, phy->duplex, priv->flow_ctrl,
+ priv->pause, tx_cnt);
return 0;
}
static void stmmac_get_ethtool_stats(struct net_device *dev,
struct ethtool_stats *dummy, u64 *data)
{
- const char *(*dump)(struct stmmac_safety_stats *stats, int index,
- unsigned long *count);
struct stmmac_priv *priv = netdev_priv(dev);
u32 rx_queues_count = priv->plat->rx_queues_to_use;
u32 tx_queues_count = priv->plat->tx_queues_to_use;
unsigned long count;
- int i, j = 0;
-
- if (priv->dma_cap.asp && priv->hw->mac->safety_feat_dump) {
- dump = priv->hw->mac->safety_feat_dump;
+ int i, j = 0, ret;
+ if (priv->dma_cap.asp) {
for (i = 0; i < STMMAC_SAFETY_FEAT_SIZE; i++) {
- if (dump(&priv->sstats, i, &count))
+ if (!stmmac_safety_feat_dump(priv, &priv->sstats, i,
+ &count, NULL))
data[j++] = count;
}
}
/* Update the DMA HW counters for dwmac10/100 */
- if (priv->hw->dma->dma_diagnostic_fr)
- priv->hw->dma->dma_diagnostic_fr(&dev->stats,
- (void *) &priv->xstats,
- priv->ioaddr);
- else {
+ ret = stmmac_dma_diagnostic_fr(priv, &dev->stats, (void *) &priv->xstats,
+ priv->ioaddr);
+ if (ret) {
/* If supported, for new GMAC chips expose the MMC counters */
if (priv->dma_cap.rmon) {
dwmac_mmc_read(priv->mmcaddr, &priv->mmc);
@@ -565,11 +550,10 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
priv->xstats.phy_eee_wakeup_error_n = val;
}
- if ((priv->hw->mac->debug) &&
- (priv->synopsys_id >= DWMAC_CORE_3_50))
- priv->hw->mac->debug(priv->ioaddr,
- (void *)&priv->xstats,
- rx_queues_count, tx_queues_count);
+ if (priv->synopsys_id >= DWMAC_CORE_3_50)
+ stmmac_mac_debug(priv, priv->ioaddr,
+ (void *)&priv->xstats,
+ rx_queues_count, tx_queues_count);
}
for (i = 0; i < STMMAC_STATS_LEN; i++) {
char *p = (char *)priv + stmmac_gstrings_stats[i].stat_offset;
@@ -581,8 +565,6 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
static int stmmac_get_sset_count(struct net_device *netdev, int sset)
{
struct stmmac_priv *priv = netdev_priv(netdev);
- const char *(*dump)(struct stmmac_safety_stats *stats, int index,
- unsigned long *count);
int i, len, safety_len = 0;
switch (sset) {
@@ -591,11 +573,11 @@ static int stmmac_get_sset_count(struct net_device *netdev, int sset)
if (priv->dma_cap.rmon)
len += STMMAC_MMC_STATS_LEN;
- if (priv->dma_cap.asp && priv->hw->mac->safety_feat_dump) {
- dump = priv->hw->mac->safety_feat_dump;
-
+ if (priv->dma_cap.asp) {
for (i = 0; i < STMMAC_SAFETY_FEAT_SIZE; i++) {
- if (dump(&priv->sstats, i, NULL))
+ if (!stmmac_safety_feat_dump(priv,
+ &priv->sstats, i,
+ NULL, NULL))
safety_len++;
}
@@ -613,17 +595,15 @@ static void stmmac_get_strings(struct net_device *dev, u32 stringset, u8 *data)
int i;
u8 *p = data;
struct stmmac_priv *priv = netdev_priv(dev);
- const char *(*dump)(struct stmmac_safety_stats *stats, int index,
- unsigned long *count);
switch (stringset) {
case ETH_SS_STATS:
- if (priv->dma_cap.asp && priv->hw->mac->safety_feat_dump) {
- dump = priv->hw->mac->safety_feat_dump;
+ if (priv->dma_cap.asp) {
for (i = 0; i < STMMAC_SAFETY_FEAT_SIZE; i++) {
- const char *desc = dump(&priv->sstats, i, NULL);
-
- if (desc) {
+ const char *desc;
+ if (!stmmac_safety_feat_dump(priv,
+ &priv->sstats, i,
+ NULL, &desc)) {
memcpy(p, desc, ETH_GSTRING_LEN);
p += ETH_GSTRING_LEN;
}
@@ -810,7 +790,7 @@ static int stmmac_set_coalesce(struct net_device *dev,
priv->tx_coal_frames = ec->tx_max_coalesced_frames;
priv->tx_coal_timer = ec->tx_coalesce_usecs;
priv->rx_riwt = rx_riwt;
- priv->hw->dma->rx_watchdog(priv->ioaddr, priv->rx_riwt, rx_cnt);
+ stmmac_rx_watchdog(priv, priv->ioaddr, priv->rx_riwt, rx_cnt);
return 0;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
index 08c19ebd5306..8d9cc2157afd 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
@@ -24,13 +24,13 @@
#include "common.h"
#include "stmmac_ptp.h"
-static void stmmac_config_hw_tstamping(void __iomem *ioaddr, u32 data)
+static void config_hw_tstamping(void __iomem *ioaddr, u32 data)
{
writel(data, ioaddr + PTP_TCR);
}
-static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr,
- u32 ptp_clock, int gmac4)
+static void config_sub_second_increment(void __iomem *ioaddr,
+ u32 ptp_clock, int gmac4, u32 *ssinc)
{
u32 value = readl(ioaddr + PTP_TCR);
unsigned long data;
@@ -57,10 +57,11 @@ static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr,
writel(reg_value, ioaddr + PTP_SSIR);
- return data;
+ if (ssinc)
+ *ssinc = data;
}
-static int stmmac_init_systime(void __iomem *ioaddr, u32 sec, u32 nsec)
+static int init_systime(void __iomem *ioaddr, u32 sec, u32 nsec)
{
int limit;
u32 value;
@@ -85,7 +86,7 @@ static int stmmac_init_systime(void __iomem *ioaddr, u32 sec, u32 nsec)
return 0;
}
-static int stmmac_config_addend(void __iomem *ioaddr, u32 addend)
+static int config_addend(void __iomem *ioaddr, u32 addend)
{
u32 value;
int limit;
@@ -109,8 +110,8 @@ static int stmmac_config_addend(void __iomem *ioaddr, u32 addend)
return 0;
}
-static int stmmac_adjust_systime(void __iomem *ioaddr, u32 sec, u32 nsec,
- int add_sub, int gmac4)
+static int adjust_systime(void __iomem *ioaddr, u32 sec, u32 nsec,
+ int add_sub, int gmac4)
{
u32 value;
int limit;
@@ -152,7 +153,7 @@ static int stmmac_adjust_systime(void __iomem *ioaddr, u32 sec, u32 nsec,
return 0;
}
-static u64 stmmac_get_systime(void __iomem *ioaddr)
+static void get_systime(void __iomem *ioaddr, u64 *systime)
{
u64 ns;
@@ -161,14 +162,15 @@ static u64 stmmac_get_systime(void __iomem *ioaddr)
/* Get the TSS and convert sec time value to nanosecond */
ns += readl(ioaddr + PTP_STSR) * 1000000000ULL;
- return ns;
+ if (systime)
+ *systime = ns;
}
const struct stmmac_hwtimestamp stmmac_ptp = {
- .config_hw_tstamping = stmmac_config_hw_tstamping,
- .init_systime = stmmac_init_systime,
- .config_sub_second_increment = stmmac_config_sub_second_increment,
- .config_addend = stmmac_config_addend,
- .adjust_systime = stmmac_adjust_systime,
- .get_systime = stmmac_get_systime,
+ .config_hw_tstamping = config_hw_tstamping,
+ .init_systime = init_systime,
+ .config_sub_second_increment = config_sub_second_increment,
+ .config_addend = config_addend,
+ .adjust_systime = adjust_systime,
+ .get_systime = get_systime,
};
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index b65e2d144698..b935478df55f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -50,6 +50,7 @@
#include <linux/reset.h>
#include <linux/of_mdio.h>
#include "dwmac1000.h"
+#include "hwif.h"
#define STMMAC_ALIGN(x) L1_CACHE_ALIGN(x)
#define TSO_MAX_BUFF_SIZE (SZ_16K - 1)
@@ -335,8 +336,8 @@ static void stmmac_enable_eee_mode(struct stmmac_priv *priv)
/* Check and enter in LPI mode */
if (!priv->tx_path_in_lpi_mode)
- priv->hw->mac->set_eee_mode(priv->hw,
- priv->plat->en_tx_lpi_clockgating);
+ stmmac_set_eee_mode(priv, priv->hw,
+ priv->plat->en_tx_lpi_clockgating);
}
/**
@@ -347,7 +348,7 @@ static void stmmac_enable_eee_mode(struct stmmac_priv *priv)
*/
void stmmac_disable_eee_mode(struct stmmac_priv *priv)
{
- priv->hw->mac->reset_eee_mode(priv->hw);
+ stmmac_reset_eee_mode(priv, priv->hw);
del_timer_sync(&priv->eee_ctrl_timer);
priv->tx_path_in_lpi_mode = false;
}
@@ -410,8 +411,8 @@ bool stmmac_eee_init(struct stmmac_priv *priv)
if (priv->eee_active) {
netdev_dbg(priv->dev, "disable EEE\n");
del_timer_sync(&priv->eee_ctrl_timer);
- priv->hw->mac->set_eee_timer(priv->hw, 0,
- tx_lpi_timer);
+ stmmac_set_eee_timer(priv, priv->hw, 0,
+ tx_lpi_timer);
}
priv->eee_active = 0;
spin_unlock_irqrestore(&priv->lock, flags);
@@ -426,12 +427,11 @@ bool stmmac_eee_init(struct stmmac_priv *priv)
mod_timer(&priv->eee_ctrl_timer,
STMMAC_LPI_T(eee_timer));
- priv->hw->mac->set_eee_timer(priv->hw,
- STMMAC_DEFAULT_LIT_LS,
- tx_lpi_timer);
+ stmmac_set_eee_timer(priv, priv->hw,
+ STMMAC_DEFAULT_LIT_LS, tx_lpi_timer);
}
/* Set HW EEE according to the speed */
- priv->hw->mac->set_eee_pls(priv->hw, ndev->phydev->link);
+ stmmac_set_eee_pls(priv, priv->hw, ndev->phydev->link);
ret = true;
spin_unlock_irqrestore(&priv->lock, flags);
@@ -464,9 +464,9 @@ static void stmmac_get_tx_hwtstamp(struct stmmac_priv *priv,
return;
/* check tx tstamp status */
- if (priv->hw->desc->get_tx_timestamp_status(p)) {
+ if (stmmac_get_tx_timestamp_status(priv, p)) {
/* get the valid tstamp */
- ns = priv->hw->desc->get_timestamp(p, priv->adv_ts);
+ stmmac_get_timestamp(priv, p, priv->adv_ts, &ns);
memset(&shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps));
shhwtstamp.hwtstamp = ns_to_ktime(ns);
@@ -502,8 +502,8 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p,
desc = np;
/* Check if timestamp is available */
- if (priv->hw->desc->get_rx_timestamp_status(p, np, priv->adv_ts)) {
- ns = priv->hw->desc->get_timestamp(desc, priv->adv_ts);
+ if (stmmac_get_rx_timestamp_status(priv, p, np, priv->adv_ts)) {
+ stmmac_get_timestamp(priv, desc, priv->adv_ts, &ns);
netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns);
shhwtstamp = skb_hwtstamps(skb);
memset(shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps));
@@ -707,18 +707,18 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr)
priv->hwts_tx_en = config.tx_type == HWTSTAMP_TX_ON;
if (!priv->hwts_tx_en && !priv->hwts_rx_en)
- priv->hw->ptp->config_hw_tstamping(priv->ptpaddr, 0);
+ stmmac_config_hw_tstamping(priv, priv->ptpaddr, 0);
else {
value = (PTP_TCR_TSENA | PTP_TCR_TSCFUPDT | PTP_TCR_TSCTRLSSR |
tstamp_all | ptp_v2 | ptp_over_ethernet |
ptp_over_ipv6_udp | ptp_over_ipv4_udp | ts_event_en |
ts_master_en | snap_type_sel);
- priv->hw->ptp->config_hw_tstamping(priv->ptpaddr, value);
+ stmmac_config_hw_tstamping(priv, priv->ptpaddr, value);
/* program Sub Second Increment reg */
- sec_inc = priv->hw->ptp->config_sub_second_increment(
- priv->ptpaddr, priv->plat->clk_ptp_rate,
- priv->plat->has_gmac4);
+ stmmac_config_sub_second_increment(priv,
+ priv->ptpaddr, priv->plat->clk_ptp_rate,
+ priv->plat->has_gmac4, &sec_inc);
temp = div_u64(1000000000ULL, sec_inc);
/* calculate default added value:
@@ -728,15 +728,14 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr)
*/
temp = (u64)(temp << 32);
priv->default_addend = div_u64(temp, priv->plat->clk_ptp_rate);
- priv->hw->ptp->config_addend(priv->ptpaddr,
- priv->default_addend);
+ stmmac_config_addend(priv, priv->ptpaddr, priv->default_addend);
/* initialize system time */
ktime_get_real_ts64(&now);
/* lower 32 bits of tv_sec are safe until y2106 */
- priv->hw->ptp->init_systime(priv->ptpaddr, (u32)now.tv_sec,
- now.tv_nsec);
+ stmmac_init_systime(priv, priv->ptpaddr,
+ (u32)now.tv_sec, now.tv_nsec);
}
return copy_to_user(ifr->ifr_data, &config,
@@ -770,7 +769,6 @@ static int stmmac_init_ptp(struct stmmac_priv *priv)
netdev_info(priv->dev,
"IEEE 1588-2008 Advanced Timestamp supported\n");
- priv->hw->ptp = &stmmac_ptp;
priv->hwts_tx_en = 0;
priv->hwts_rx_en = 0;
@@ -795,8 +793,8 @@ static void stmmac_mac_flow_ctrl(struct stmmac_priv *priv, u32 duplex)
{
u32 tx_cnt = priv->plat->tx_queues_to_use;
- priv->hw->mac->flow_ctrl(priv->hw, duplex, priv->flow_ctrl,
- priv->pause, tx_cnt);
+ stmmac_flow_ctrl(priv, priv->hw, duplex, priv->flow_ctrl,
+ priv->pause, tx_cnt);
}
/**
@@ -1008,7 +1006,7 @@ static void stmmac_display_rx_rings(struct stmmac_priv *priv)
head_rx = (void *)rx_q->dma_rx;
/* Display RX ring */
- priv->hw->desc->display_ring(head_rx, DMA_RX_SIZE, true);
+ stmmac_display_ring(priv, head_rx, DMA_RX_SIZE, true);
}
}
@@ -1029,7 +1027,7 @@ static void stmmac_display_tx_rings(struct stmmac_priv *priv)
else
head_tx = (void *)tx_q->dma_tx;
- priv->hw->desc->display_ring(head_tx, DMA_TX_SIZE, false);
+ stmmac_display_ring(priv, head_tx, DMA_TX_SIZE, false);
}
}
@@ -1073,13 +1071,13 @@ static void stmmac_clear_rx_descriptors(struct stmmac_priv *priv, u32 queue)
/* Clear the RX descriptors */
for (i = 0; i < DMA_RX_SIZE; i++)
if (priv->extend_desc)
- priv->hw->desc->init_rx_desc(&rx_q->dma_erx[i].basic,
- priv->use_riwt, priv->mode,
- (i == DMA_RX_SIZE - 1));
+ stmmac_init_rx_desc(priv, &rx_q->dma_erx[i].basic,
+ priv->use_riwt, priv->mode,
+ (i == DMA_RX_SIZE - 1));
else
- priv->hw->desc->init_rx_desc(&rx_q->dma_rx[i],
- priv->use_riwt, priv->mode,
- (i == DMA_RX_SIZE - 1));
+ stmmac_init_rx_desc(priv, &rx_q->dma_rx[i],
+ priv->use_riwt, priv->mode,
+ (i == DMA_RX_SIZE - 1));
}
/**
@@ -1097,13 +1095,11 @@ static void stmmac_clear_tx_descriptors(struct stmmac_priv *priv, u32 queue)
/* Clear the TX descriptors */
for (i = 0; i < DMA_TX_SIZE; i++)
if (priv->extend_desc)
- priv->hw->desc->init_tx_desc(&tx_q->dma_etx[i].basic,
- priv->mode,
- (i == DMA_TX_SIZE - 1));
+ stmmac_init_tx_desc(priv, &tx_q->dma_etx[i].basic,
+ priv->mode, (i == DMA_TX_SIZE - 1));
else
- priv->hw->desc->init_tx_desc(&tx_q->dma_tx[i],
- priv->mode,
- (i == DMA_TX_SIZE - 1));
+ stmmac_init_tx_desc(priv, &tx_q->dma_tx[i],
+ priv->mode, (i == DMA_TX_SIZE - 1));
}
/**
@@ -1164,9 +1160,8 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p,
else
p->des2 = cpu_to_le32(rx_q->rx_skbuff_dma[i]);
- if ((priv->hw->mode->init_desc3) &&
- (priv->dma_buf_sz == BUF_SIZE_16KiB))
- priv->hw->mode->init_desc3(p);
+ if (priv->dma_buf_sz == BUF_SIZE_16KiB)
+ stmmac_init_desc3(priv, p);
return 0;
}
@@ -1232,13 +1227,14 @@ static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags)
{
struct stmmac_priv *priv = netdev_priv(dev);
u32 rx_count = priv->plat->rx_queues_to_use;
- unsigned int bfsize = 0;
int ret = -ENOMEM;
+ int bfsize = 0;
int queue;
int i;
- if (priv->hw->mode->set_16kib_bfsize)
- bfsize = priv->hw->mode->set_16kib_bfsize(dev->mtu);
+ bfsize = stmmac_set_16kib_bfsize(priv, dev->mtu);
+ if (bfsize < 0)
+ bfsize = 0;
if (bfsize < BUF_SIZE_16KiB)
bfsize = stmmac_set_bfsize(dev->mtu, priv->dma_buf_sz);
@@ -1282,13 +1278,11 @@ static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags)
/* Setup the chained descriptor addresses */
if (priv->mode == STMMAC_CHAIN_MODE) {
if (priv->extend_desc)
- priv->hw->mode->init(rx_q->dma_erx,
- rx_q->dma_rx_phy,
- DMA_RX_SIZE, 1);
+ stmmac_mode_init(priv, rx_q->dma_erx,
+ rx_q->dma_rx_phy, DMA_RX_SIZE, 1);
else
- priv->hw->mode->init(rx_q->dma_rx,
- rx_q->dma_rx_phy,
- DMA_RX_SIZE, 0);
+ stmmac_mode_init(priv, rx_q->dma_rx,
+ rx_q->dma_rx_phy, DMA_RX_SIZE, 0);
}
}
@@ -1335,13 +1329,11 @@ static int init_dma_tx_desc_rings(struct net_device *dev)
/* Setup the chained descriptor addresses */
if (priv->mode == STMMAC_CHAIN_MODE) {
if (priv->extend_desc)
- priv->hw->mode->init(tx_q->dma_etx,
- tx_q->dma_tx_phy,
- DMA_TX_SIZE, 1);
+ stmmac_mode_init(priv, tx_q->dma_etx,
+ tx_q->dma_tx_phy, DMA_TX_SIZE, 1);
else
- priv->hw->mode->init(tx_q->dma_tx,
- tx_q->dma_tx_phy,
- DMA_TX_SIZE, 0);
+ stmmac_mode_init(priv, tx_q->dma_tx,
+ tx_q->dma_tx_phy, DMA_TX_SIZE, 0);
}
for (i = 0; i < DMA_TX_SIZE; i++) {
@@ -1664,7 +1656,7 @@ static void stmmac_mac_enable_rx_queues(struct stmmac_priv *priv)
for (queue = 0; queue < rx_queues_count; queue++) {
mode = priv->plat->rx_queues_cfg[queue].mode_to_use;
- priv->hw->mac->rx_queue_enable(priv->hw, mode, queue);
+ stmmac_rx_queue_enable(priv, priv->hw, mode, queue);
}
}
@@ -1678,7 +1670,7 @@ static void stmmac_mac_enable_rx_queues(struct stmmac_priv *priv)
static void stmmac_start_rx_dma(struct stmmac_priv *priv, u32 chan)
{
netdev_dbg(priv->dev, "DMA RX processes started in channel %d\n", chan);
- priv->hw->dma->start_rx(priv->ioaddr, chan);
+ stmmac_start_rx(priv, priv->ioaddr, chan);
}
/**
@@ -1691,7 +1683,7 @@ static void stmmac_start_rx_dma(struct stmmac_priv *priv, u32 chan)
static void stmmac_start_tx_dma(struct stmmac_priv *priv, u32 chan)
{
netdev_dbg(priv->dev, "DMA TX processes started in channel %d\n", chan);
- priv->hw->dma->start_tx(priv->ioaddr, chan);
+ stmmac_start_tx(priv, priv->ioaddr, chan);
}
/**
@@ -1704,7 +1696,7 @@ static void stmmac_start_tx_dma(struct stmmac_priv *priv, u32 chan)
static void stmmac_stop_rx_dma(struct stmmac_priv *priv, u32 chan)
{
netdev_dbg(priv->dev, "DMA RX processes stopped in channel %d\n", chan);
- priv->hw->dma->stop_rx(priv->ioaddr, chan);
+ stmmac_stop_rx(priv, priv->ioaddr, chan);
}
/**
@@ -1717,7 +1709,7 @@ static void stmmac_stop_rx_dma(struct stmmac_priv *priv, u32 chan)
static void stmmac_stop_tx_dma(struct stmmac_priv *priv, u32 chan)
{
netdev_dbg(priv->dev, "DMA TX processes stopped in channel %d\n", chan);
- priv->hw->dma->stop_tx(priv->ioaddr, chan);
+ stmmac_stop_tx(priv, priv->ioaddr, chan);
}
/**
@@ -1808,19 +1800,18 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
for (chan = 0; chan < rx_channels_count; chan++) {
qmode = priv->plat->rx_queues_cfg[chan].mode_to_use;
- priv->hw->dma->dma_rx_mode(priv->ioaddr, rxmode, chan,
- rxfifosz, qmode);
+ stmmac_dma_rx_mode(priv, priv->ioaddr, rxmode, chan,
+ rxfifosz, qmode);
}
for (chan = 0; chan < tx_channels_count; chan++) {
qmode = priv->plat->tx_queues_cfg[chan].mode_to_use;
- priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan,
- txfifosz, qmode);
+ stmmac_dma_tx_mode(priv, priv->ioaddr, txmode, chan,
+ txfifosz, qmode);
}
} else {
- priv->hw->dma->dma_mode(priv->ioaddr, txmode, rxmode,
- rxfifosz);
+ stmmac_dma_mode(priv, priv->ioaddr, txmode, rxmode, rxfifosz);
}
}
@@ -1851,9 +1842,8 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
else
p = tx_q->dma_tx + entry;
- status = priv->hw->desc->tx_status(&priv->dev->stats,
- &priv->xstats, p,
- priv->ioaddr);
+ status = stmmac_tx_status(priv, &priv->dev->stats,
+ &priv->xstats, p, priv->ioaddr);
/* Check if the descriptor is owned by the DMA */
if (unlikely(status & tx_dma_own))
break;
@@ -1891,8 +1881,7 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
tx_q->tx_skbuff_dma[entry].map_as_page = false;
}
- if (priv->hw->mode->clean_desc3)
- priv->hw->mode->clean_desc3(tx_q, p);
+ stmmac_clean_desc3(priv, tx_q, p);
tx_q->tx_skbuff_dma[entry].last_segment = false;
tx_q->tx_skbuff_dma[entry].is_jumbo = false;
@@ -1904,7 +1893,7 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
tx_q->tx_skbuff[entry] = NULL;
}
- priv->hw->desc->release_tx_desc(p, priv->mode);
+ stmmac_release_tx_desc(priv, p, priv->mode);
entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
}
@@ -1929,16 +1918,6 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
netif_tx_unlock(priv->dev);
}
-static inline void stmmac_enable_dma_irq(struct stmmac_priv *priv, u32 chan)
-{
- priv->hw->dma->enable_dma_irq(priv->ioaddr, chan);
-}
-
-static inline void stmmac_disable_dma_irq(struct stmmac_priv *priv, u32 chan)
-{
- priv->hw->dma->disable_dma_irq(priv->ioaddr, chan);
-}
-
/**
* stmmac_tx_err - to manage the tx error
* @priv: driver private structure
@@ -1957,13 +1936,11 @@ static void stmmac_tx_err(struct stmmac_priv *priv, u32 chan)
dma_free_tx_skbufs(priv, chan);
for (i = 0; i < DMA_TX_SIZE; i++)
if (priv->extend_desc)
- priv->hw->desc->init_tx_desc(&tx_q->dma_etx[i].basic,
- priv->mode,
- (i == DMA_TX_SIZE - 1));
+ stmmac_init_tx_desc(priv, &tx_q->dma_etx[i].basic,
+ priv->mode, (i == DMA_TX_SIZE - 1));
else
- priv->hw->desc->init_tx_desc(&tx_q->dma_tx[i],
- priv->mode,
- (i == DMA_TX_SIZE - 1));
+ stmmac_init_tx_desc(priv, &tx_q->dma_tx[i],
+ priv->mode, (i == DMA_TX_SIZE - 1));
tx_q->dirty_tx = 0;
tx_q->cur_tx = 0;
tx_q->mss = 0;
@@ -2004,30 +1981,30 @@ static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode,
txfifosz /= tx_channels_count;
if (priv->synopsys_id >= DWMAC_CORE_4_00) {
- priv->hw->dma->dma_rx_mode(priv->ioaddr, rxmode, chan,
- rxfifosz, rxqmode);
- priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan,
- txfifosz, txqmode);
+ stmmac_dma_rx_mode(priv, priv->ioaddr, rxmode, chan, rxfifosz,
+ rxqmode);
+ stmmac_dma_tx_mode(priv, priv->ioaddr, txmode, chan, txfifosz,
+ txqmode);
} else {
- priv->hw->dma->dma_mode(priv->ioaddr, txmode, rxmode,
- rxfifosz);
+ stmmac_dma_mode(priv, priv->ioaddr, txmode, rxmode, rxfifosz);
}
}
static bool stmmac_safety_feat_interrupt(struct stmmac_priv *priv)
{
- bool ret = false;
+ int ret = false;
/* Safety features are only available in cores >= 5.10 */
if (priv->synopsys_id < DWMAC_CORE_5_10)
return ret;
- if (priv->hw->mac->safety_feat_irq_status)
- ret = priv->hw->mac->safety_feat_irq_status(priv->dev,
- priv->ioaddr, priv->dma_cap.asp, &priv->sstats);
-
- if (ret)
+ ret = stmmac_safety_feat_irq_status(priv, priv->dev,
+ priv->ioaddr, priv->dma_cap.asp, &priv->sstats);
+ if (ret && (ret != -EINVAL)) {
stmmac_global_err(priv);
- return ret;
+ return true;
+ }
+
+ return false;
}
/**
@@ -2045,7 +2022,11 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv)
tx_channel_count : rx_channel_count;
u32 chan;
bool poll_scheduled = false;
- int status[channels_to_check];
+ int status[max_t(u32, MTL_MAX_TX_QUEUES, MTL_MAX_RX_QUEUES)];
+
+ /* Make sure we never check beyond our status buffer. */
+ if (WARN_ON_ONCE(channels_to_check > ARRAY_SIZE(status)))
+ channels_to_check = ARRAY_SIZE(status);
/* Each DMA channel can be used for rx and tx simultaneously, yet
* napi_struct is embedded in struct stmmac_rx_queue rather than in a
@@ -2054,16 +2035,15 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv)
* all tx queues rather than just a single tx queue.
*/
for (chan = 0; chan < channels_to_check; chan++)
- status[chan] = priv->hw->dma->dma_interrupt(priv->ioaddr,
- &priv->xstats,
- chan);
+ status[chan] = stmmac_dma_interrupt_status(priv, priv->ioaddr,
+ &priv->xstats, chan);
for (chan = 0; chan < rx_channel_count; chan++) {
if (likely(status[chan] & handle_rx)) {
struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan];
if (likely(napi_schedule_prep(&rx_q->napi))) {
- stmmac_disable_dma_irq(priv, chan);
+ stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
__napi_schedule(&rx_q->napi);
poll_scheduled = true;
}
@@ -2084,7 +2064,8 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv)
&priv->rx_queue[0];
if (likely(napi_schedule_prep(&rx_q->napi))) {
- stmmac_disable_dma_irq(priv, chan);
+ stmmac_disable_dma_irq(priv,
+ priv->ioaddr, chan);
__napi_schedule(&rx_q->napi);
}
break;
@@ -2144,32 +2125,6 @@ static void stmmac_mmc_setup(struct stmmac_priv *priv)
}
/**
- * stmmac_selec_desc_mode - to select among: normal/alternate/extend descriptors
- * @priv: driver private structure
- * Description: select the Enhanced/Alternate or Normal descriptors.
- * In case of Enhanced/Alternate, it checks if the extended descriptors are
- * supported by the HW capability register.
- */
-static void stmmac_selec_desc_mode(struct stmmac_priv *priv)
-{
- if (priv->plat->enh_desc) {
- dev_info(priv->device, "Enhanced/Alternate descriptors\n");
-
- /* GMAC older than 3.50 has no extended descriptors */
- if (priv->synopsys_id >= DWMAC_CORE_3_50) {
- dev_info(priv->device, "Enabled extended descriptors\n");
- priv->extend_desc = 1;
- } else
- dev_warn(priv->device, "Extended descriptors not supported\n");
-
- priv->hw->desc = &enh_desc_ops;
- } else {
- dev_info(priv->device, "Normal descriptors\n");
- priv->hw->desc = &ndesc_ops;
- }
-}
-
-/**
* stmmac_get_hw_features - get MAC capabilities from the HW cap. register.
* @priv: driver private structure
* Description:
@@ -2180,15 +2135,7 @@ static void stmmac_selec_desc_mode(struct stmmac_priv *priv)
*/
static int stmmac_get_hw_features(struct stmmac_priv *priv)
{
- u32 ret = 0;
-
- if (priv->hw->dma->get_hw_feature) {
- priv->hw->dma->get_hw_feature(priv->ioaddr,
- &priv->dma_cap);
- ret = 1;
- }
-
- return ret;
+ return stmmac_get_hw_feature(priv, priv->ioaddr, &priv->dma_cap) == 0;
}
/**
@@ -2201,8 +2148,7 @@ static int stmmac_get_hw_features(struct stmmac_priv *priv)
static void stmmac_check_ether_addr(struct stmmac_priv *priv)
{
if (!is_valid_ether_addr(priv->dev->dev_addr)) {
- priv->hw->mac->get_umac_addr(priv->hw,
- priv->dev->dev_addr, 0);
+ stmmac_get_umac_addr(priv, priv->hw, priv->dev->dev_addr, 0);
if (!is_valid_ether_addr(priv->dev->dev_addr))
eth_hw_addr_random(priv->dev);
netdev_info(priv->dev, "device MAC address %pM\n",
@@ -2238,7 +2184,7 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
if (priv->extend_desc && (priv->mode == STMMAC_RING_MODE))
atds = 1;
- ret = priv->hw->dma->reset(priv->ioaddr);
+ ret = stmmac_reset(priv, priv->ioaddr);
if (ret) {
dev_err(priv->device, "Failed to reset the dma\n");
return ret;
@@ -2246,51 +2192,48 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
if (priv->synopsys_id >= DWMAC_CORE_4_00) {
/* DMA Configuration */
- priv->hw->dma->init(priv->ioaddr, priv->plat->dma_cfg,
- dummy_dma_tx_phy, dummy_dma_rx_phy, atds);
+ stmmac_dma_init(priv, priv->ioaddr, priv->plat->dma_cfg,
+ dummy_dma_tx_phy, dummy_dma_rx_phy, atds);
/* DMA RX Channel Configuration */
for (chan = 0; chan < rx_channels_count; chan++) {
rx_q = &priv->rx_queue[chan];
- priv->hw->dma->init_rx_chan(priv->ioaddr,
- priv->plat->dma_cfg,
- rx_q->dma_rx_phy, chan);
+ stmmac_init_rx_chan(priv, priv->ioaddr,
+ priv->plat->dma_cfg, rx_q->dma_rx_phy,
+ chan);
rx_q->rx_tail_addr = rx_q->dma_rx_phy +
(DMA_RX_SIZE * sizeof(struct dma_desc));
- priv->hw->dma->set_rx_tail_ptr(priv->ioaddr,
- rx_q->rx_tail_addr,
- chan);
+ stmmac_set_rx_tail_ptr(priv, priv->ioaddr,
+ rx_q->rx_tail_addr, chan);
}
/* DMA TX Channel Configuration */
for (chan = 0; chan < tx_channels_count; chan++) {
tx_q = &priv->tx_queue[chan];
- priv->hw->dma->init_chan(priv->ioaddr,
- priv->plat->dma_cfg,
- chan);
+ stmmac_init_chan(priv, priv->ioaddr,
+ priv->plat->dma_cfg, chan);
- priv->hw->dma->init_tx_chan(priv->ioaddr,
- priv->plat->dma_cfg,
- tx_q->dma_tx_phy, chan);
+ stmmac_init_tx_chan(priv, priv->ioaddr,
+ priv->plat->dma_cfg, tx_q->dma_tx_phy,
+ chan);
tx_q->tx_tail_addr = tx_q->dma_tx_phy +
(DMA_TX_SIZE * sizeof(struct dma_desc));
- priv->hw->dma->set_tx_tail_ptr(priv->ioaddr,
- tx_q->tx_tail_addr,
- chan);
+ stmmac_set_tx_tail_ptr(priv, priv->ioaddr,
+ tx_q->tx_tail_addr, chan);
}
} else {
rx_q = &priv->rx_queue[chan];
tx_q = &priv->tx_queue[chan];
- priv->hw->dma->init(priv->ioaddr, priv->plat->dma_cfg,
- tx_q->dma_tx_phy, rx_q->dma_rx_phy, atds);
+ stmmac_dma_init(priv, priv->ioaddr, priv->plat->dma_cfg,
+ tx_q->dma_tx_phy, rx_q->dma_rx_phy, atds);
}
- if (priv->plat->axi && priv->hw->dma->axi)
- priv->hw->dma->axi(priv->ioaddr, priv->plat->axi);
+ if (priv->plat->axi)
+ stmmac_axi(priv, priv->ioaddr, priv->plat->axi);
return ret;
}
@@ -2336,18 +2279,14 @@ static void stmmac_set_rings_length(struct stmmac_priv *priv)
u32 chan;
/* set TX ring length */
- if (priv->hw->dma->set_tx_ring_len) {
- for (chan = 0; chan < tx_channels_count; chan++)
- priv->hw->dma->set_tx_ring_len(priv->ioaddr,
- (DMA_TX_SIZE - 1), chan);
- }
+ for (chan = 0; chan < tx_channels_count; chan++)
+ stmmac_set_tx_ring_len(priv, priv->ioaddr,
+ (DMA_TX_SIZE - 1), chan);
/* set RX ring length */
- if (priv->hw->dma->set_rx_ring_len) {
- for (chan = 0; chan < rx_channels_count; chan++)
- priv->hw->dma->set_rx_ring_len(priv->ioaddr,
- (DMA_RX_SIZE - 1), chan);
- }
+ for (chan = 0; chan < rx_channels_count; chan++)
+ stmmac_set_rx_ring_len(priv, priv->ioaddr,
+ (DMA_RX_SIZE - 1), chan);
}
/**
@@ -2363,7 +2302,7 @@ static void stmmac_set_tx_queue_weight(struct stmmac_priv *priv)
for (queue = 0; queue < tx_queues_count; queue++) {
weight = priv->plat->tx_queues_cfg[queue].weight;
- priv->hw->mac->set_mtl_tx_queue_weight(priv->hw, weight, queue);
+ stmmac_set_mtl_tx_queue_weight(priv, priv->hw, weight, queue);
}
}
@@ -2384,7 +2323,7 @@ static void stmmac_configure_cbs(struct stmmac_priv *priv)
if (mode_to_use == MTL_QUEUE_DCB)
continue;
- priv->hw->mac->config_cbs(priv->hw,
+ stmmac_config_cbs(priv, priv->hw,
priv->plat->tx_queues_cfg[queue].send_slope,
priv->plat->tx_queues_cfg[queue].idle_slope,
priv->plat->tx_queues_cfg[queue].high_credit,
@@ -2406,7 +2345,7 @@ static void stmmac_rx_queue_dma_chan_map(struct stmmac_priv *priv)
for (queue = 0; queue < rx_queues_count; queue++) {
chan = priv->plat->rx_queues_cfg[queue].chan;
- priv->hw->mac->map_mtl_to_dma(priv->hw, queue, chan);
+ stmmac_map_mtl_to_dma(priv, priv->hw, queue, chan);
}
}
@@ -2426,7 +2365,7 @@ static void stmmac_mac_config_rx_queues_prio(struct stmmac_priv *priv)
continue;
prio = priv->plat->rx_queues_cfg[queue].prio;
- priv->hw->mac->rx_queue_prio(priv->hw, prio, queue);
+ stmmac_rx_queue_prio(priv, priv->hw, prio, queue);
}
}
@@ -2446,7 +2385,7 @@ static void stmmac_mac_config_tx_queues_prio(struct stmmac_priv *priv)
continue;
prio = priv->plat->tx_queues_cfg[queue].prio;
- priv->hw->mac->tx_queue_prio(priv->hw, prio, queue);
+ stmmac_tx_queue_prio(priv, priv->hw, prio, queue);
}
}
@@ -2467,7 +2406,7 @@ static void stmmac_mac_config_rx_queues_routing(struct stmmac_priv *priv)
continue;
packet = priv->plat->rx_queues_cfg[queue].pkt_route;
- priv->hw->mac->rx_queue_routing(priv->hw, packet, queue);
+ stmmac_rx_queue_routing(priv, priv->hw, packet, queue);
}
}
@@ -2481,50 +2420,47 @@ static void stmmac_mtl_configuration(struct stmmac_priv *priv)
u32 rx_queues_count = priv->plat->rx_queues_to_use;
u32 tx_queues_count = priv->plat->tx_queues_to_use;
- if (tx_queues_count > 1 && priv->hw->mac->set_mtl_tx_queue_weight)
+ if (tx_queues_count > 1)
stmmac_set_tx_queue_weight(priv);
/* Configure MTL RX algorithms */
- if (rx_queues_count > 1 && priv->hw->mac->prog_mtl_rx_algorithms)
- priv->hw->mac->prog_mtl_rx_algorithms(priv->hw,
- priv->plat->rx_sched_algorithm);
+ if (rx_queues_count > 1)
+ stmmac_prog_mtl_rx_algorithms(priv, priv->hw,
+ priv->plat->rx_sched_algorithm);
/* Configure MTL TX algorithms */
- if (tx_queues_count > 1 && priv->hw->mac->prog_mtl_tx_algorithms)
- priv->hw->mac->prog_mtl_tx_algorithms(priv->hw,
- priv->plat->tx_sched_algorithm);
+ if (tx_queues_count > 1)
+ stmmac_prog_mtl_tx_algorithms(priv, priv->hw,
+ priv->plat->tx_sched_algorithm);
/* Configure CBS in AVB TX queues */
- if (tx_queues_count > 1 && priv->hw->mac->config_cbs)
+ if (tx_queues_count > 1)
stmmac_configure_cbs(priv);
/* Map RX MTL to DMA channels */
- if (priv->hw->mac->map_mtl_to_dma)
- stmmac_rx_queue_dma_chan_map(priv);
+ stmmac_rx_queue_dma_chan_map(priv);
/* Enable MAC RX Queues */
- if (priv->hw->mac->rx_queue_enable)
- stmmac_mac_enable_rx_queues(priv);
+ stmmac_mac_enable_rx_queues(priv);
/* Set RX priorities */
- if (rx_queues_count > 1 && priv->hw->mac->rx_queue_prio)
+ if (rx_queues_count > 1)
stmmac_mac_config_rx_queues_prio(priv);
/* Set TX priorities */
- if (tx_queues_count > 1 && priv->hw->mac->tx_queue_prio)
+ if (tx_queues_count > 1)
stmmac_mac_config_tx_queues_prio(priv);
/* Set RX routing */
- if (rx_queues_count > 1 && priv->hw->mac->rx_queue_routing)
+ if (rx_queues_count > 1)
stmmac_mac_config_rx_queues_routing(priv);
}
static void stmmac_safety_feat_configuration(struct stmmac_priv *priv)
{
- if (priv->hw->mac->safety_feat_config && priv->dma_cap.asp) {
+ if (priv->dma_cap.asp) {
netdev_info(priv->dev, "Enabling Safety Features\n");
- priv->hw->mac->safety_feat_config(priv->ioaddr,
- priv->dma_cap.asp);
+ stmmac_safety_feat_config(priv, priv->ioaddr, priv->dma_cap.asp);
} else {
netdev_info(priv->dev, "No Safety Features support found\n");
}
@@ -2559,7 +2495,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
}
/* Copy the MAC addr into the HW */
- priv->hw->mac->set_umac_addr(priv->hw, dev->dev_addr, 0);
+ stmmac_set_umac_addr(priv, priv->hw, dev->dev_addr, 0);
/* PS and related bits will be programmed according to the speed */
if (priv->hw->pcs) {
@@ -2575,7 +2511,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
}
/* Initialize the MAC Core */
- priv->hw->mac->core_init(priv->hw, dev);
+ stmmac_core_init(priv, priv->hw, dev);
/* Initialize MTL*/
if (priv->synopsys_id >= DWMAC_CORE_4_00)
@@ -2585,7 +2521,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
if (priv->synopsys_id >= DWMAC_CORE_5_10)
stmmac_safety_feat_configuration(priv);
- ret = priv->hw->mac->rx_ipc(priv->hw);
+ ret = stmmac_rx_ipc(priv, priv->hw);
if (!ret) {
netdev_warn(priv->dev, "RX IPC Checksum Offload disabled\n");
priv->plat->rx_coe = STMMAC_RX_COE_NONE;
@@ -2593,7 +2529,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
}
/* Enable the MAC Rx/Tx */
- priv->hw->mac->set_mac(priv->ioaddr, true);
+ stmmac_mac_set(priv, priv->ioaddr, true);
/* Set the HW DMA mode and the COE */
stmmac_dma_operation_mode(priv);
@@ -2623,13 +2559,14 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
priv->tx_lpi_timer = STMMAC_DEFAULT_TWT_LS;
- if ((priv->use_riwt) && (priv->hw->dma->rx_watchdog)) {
- priv->rx_riwt = MAX_DMA_RIWT;
- priv->hw->dma->rx_watchdog(priv->ioaddr, MAX_DMA_RIWT, rx_cnt);
+ if (priv->use_riwt) {
+ ret = stmmac_rx_watchdog(priv, priv->ioaddr, MAX_DMA_RIWT, rx_cnt);
+ if (!ret)
+ priv->rx_riwt = MAX_DMA_RIWT;
}
- if (priv->hw->pcs && priv->hw->mac->pcs_ctrl_ane)
- priv->hw->mac->pcs_ctrl_ane(priv->hw, 1, priv->hw->ps, 0);
+ if (priv->hw->pcs)
+ stmmac_pcs_ctrl_ane(priv, priv->hw, 1, priv->hw->ps, 0);
/* set TX and RX rings length */
stmmac_set_rings_length(priv);
@@ -2637,7 +2574,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
/* Enable TSO */
if (priv->tso) {
for (chan = 0; chan < tx_cnt; chan++)
- priv->hw->dma->enable_tso(priv->ioaddr, 1, chan);
+ stmmac_enable_tso(priv, priv->ioaddr, 1, chan);
}
return 0;
@@ -2808,7 +2745,7 @@ static int stmmac_release(struct net_device *dev)
free_dma_desc_resources(priv);
/* Disable the MAC Rx/Tx */
- priv->hw->mac->set_mac(priv->ioaddr, false);
+ stmmac_mac_set(priv, priv->ioaddr, false);
netif_carrier_off(dev);
@@ -2851,10 +2788,10 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des,
buff_size = tmp_len >= TSO_MAX_BUFF_SIZE ?
TSO_MAX_BUFF_SIZE : tmp_len;
- priv->hw->desc->prepare_tso_tx_desc(desc, 0, buff_size,
- 0, 1,
- (last_segment) && (tmp_len <= TSO_MAX_BUFF_SIZE),
- 0, 0);
+ stmmac_prepare_tso_tx_desc(priv, desc, 0, buff_size,
+ 0, 1,
+ (last_segment) && (tmp_len <= TSO_MAX_BUFF_SIZE),
+ 0, 0);
tmp_len -= TSO_MAX_BUFF_SIZE;
}
@@ -2926,7 +2863,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
/* set new MSS value if needed */
if (mss != tx_q->mss) {
mss_desc = tx_q->dma_tx + tx_q->cur_tx;
- priv->hw->desc->set_mss(mss_desc, mss);
+ stmmac_set_mss(priv, mss_desc, mss);
tx_q->mss = mss;
tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
WARN_ON(tx_q->tx_skbuff[tx_q->cur_tx]);
@@ -3012,7 +2949,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
STMMAC_COAL_TIMER(priv->tx_coal_timer));
} else {
priv->tx_count_frames = 0;
- priv->hw->desc->set_tx_ic(desc);
+ stmmac_set_tx_ic(priv, desc);
priv->xstats.tx_set_ic_bit++;
}
@@ -3022,11 +2959,11 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
priv->hwts_tx_en)) {
/* declare that device is doing timestamping */
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
- priv->hw->desc->enable_tx_timestamp(first);
+ stmmac_enable_tx_timestamp(priv, first);
}
/* Complete the first descriptor before granting the DMA */
- priv->hw->desc->prepare_tso_tx_desc(first, 1,
+ stmmac_prepare_tso_tx_desc(priv, first, 1,
proto_hdr_len,
pay_len,
1, tx_q->tx_skbuff_dma[first_entry].last_segment,
@@ -3040,7 +2977,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
* sure that MSS's own bit is the last thing written.
*/
dma_wmb();
- priv->hw->desc->set_tx_owner(mss_desc);
+ stmmac_set_tx_owner(priv, mss_desc);
}
/* The own bit must be the latest setting done when prepare the
@@ -3054,8 +2991,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
__func__, tx_q->cur_tx, tx_q->dirty_tx, first_entry,
tx_q->cur_tx, first, nfrags);
- priv->hw->desc->display_ring((void *)tx_q->dma_tx, DMA_TX_SIZE,
- 0);
+ stmmac_display_ring(priv, (void *)tx_q->dma_tx, DMA_TX_SIZE, 0);
pr_info(">>> frame to be transmitted: ");
print_pkt(skb->data, skb_headlen(skb));
@@ -3063,8 +2999,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
- priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, tx_q->tx_tail_addr,
- queue);
+ stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
return NETDEV_TX_OK;
@@ -3136,11 +3071,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
enh_desc = priv->plat->enh_desc;
/* To program the descriptors according to the size of the frame */
if (enh_desc)
- is_jumbo = priv->hw->mode->is_jumbo_frm(skb->len, enh_desc);
+ is_jumbo = stmmac_is_jumbo_frm(priv, skb->len, enh_desc);
if (unlikely(is_jumbo) && likely(priv->synopsys_id <
DWMAC_CORE_4_00)) {
- entry = priv->hw->mode->jumbo_frm(tx_q, skb, csum_insertion);
+ entry = stmmac_jumbo_frm(priv, tx_q, skb, csum_insertion);
if (unlikely(entry < 0))
goto dma_map_err;
}
@@ -3174,9 +3109,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
tx_q->tx_skbuff_dma[entry].last_segment = last_segment;
/* Prepare the descriptor and set the own bit too */
- priv->hw->desc->prepare_tx_desc(desc, 0, len, csum_insertion,
- priv->mode, 1, last_segment,
- skb->len);
+ stmmac_prepare_tx_desc(priv, desc, 0, len, csum_insertion,
+ priv->mode, 1, last_segment, skb->len);
}
/* Only the last descriptor gets to point to the skb. */
@@ -3203,7 +3137,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
else
tx_head = (void *)tx_q->dma_tx;
- priv->hw->desc->display_ring(tx_head, DMA_TX_SIZE, false);
+ stmmac_display_ring(priv, tx_head, DMA_TX_SIZE, false);
netdev_dbg(priv->dev, ">>> frame to be transmitted: ");
print_pkt(skb->data, skb->len);
@@ -3228,7 +3162,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
STMMAC_COAL_TIMER(priv->tx_coal_timer));
} else {
priv->tx_count_frames = 0;
- priv->hw->desc->set_tx_ic(desc);
+ stmmac_set_tx_ic(priv, desc);
priv->xstats.tx_set_ic_bit++;
}
@@ -3259,13 +3193,13 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
priv->hwts_tx_en)) {
/* declare that device is doing timestamping */
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
- priv->hw->desc->enable_tx_timestamp(first);
+ stmmac_enable_tx_timestamp(priv, first);
}
/* Prepare the first descriptor setting the OWN bit too */
- priv->hw->desc->prepare_tx_desc(first, 1, nopaged_len,
- csum_insertion, priv->mode, 1,
- last_segment, skb->len);
+ stmmac_prepare_tx_desc(priv, first, 1, nopaged_len,
+ csum_insertion, priv->mode, 1, last_segment,
+ skb->len);
/* The own bit must be the latest setting done when prepare the
* descriptor and then barrier is needed to make sure that
@@ -3277,10 +3211,10 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
if (priv->synopsys_id < DWMAC_CORE_4_00)
- priv->hw->dma->enable_dma_transmission(priv->ioaddr);
+ stmmac_enable_dma_transmission(priv, priv->ioaddr);
else
- priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, tx_q->tx_tail_addr,
- queue);
+ stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr,
+ queue);
return NETDEV_TX_OK;
@@ -3370,8 +3304,8 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
} else {
p->des2 = cpu_to_le32(rx_q->rx_skbuff_dma[entry]);
}
- if (priv->hw->mode->refill_desc3)
- priv->hw->mode->refill_desc3(rx_q, p);
+
+ stmmac_refill_desc3(priv, rx_q, p);
if (rx_q->rx_zeroc_thresh > 0)
rx_q->rx_zeroc_thresh--;
@@ -3382,9 +3316,9 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
dma_wmb();
if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00))
- priv->hw->desc->init_rx_desc(p, priv->use_riwt, 0, 0);
+ stmmac_init_rx_desc(priv, p, priv->use_riwt, 0, 0);
else
- priv->hw->desc->set_rx_owner(p);
+ stmmac_set_rx_owner(priv, p);
dma_wmb();
@@ -3418,7 +3352,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
else
rx_head = (void *)rx_q->dma_rx;
- priv->hw->desc->display_ring(rx_head, DMA_RX_SIZE, true);
+ stmmac_display_ring(priv, rx_head, DMA_RX_SIZE, true);
}
while (count < limit) {
int status;
@@ -3431,8 +3365,8 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
p = rx_q->dma_rx + entry;
/* read the status of the incoming frame */
- status = priv->hw->desc->rx_status(&priv->dev->stats,
- &priv->xstats, p);
+ status = stmmac_rx_status(priv, &priv->dev->stats,
+ &priv->xstats, p);
/* check if managed by the DMA otherwise go ahead */
if (unlikely(status & dma_own))
break;
@@ -3449,11 +3383,9 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
prefetch(np);
- if ((priv->extend_desc) && (priv->hw->desc->rx_extended_status))
- priv->hw->desc->rx_extended_status(&priv->dev->stats,
- &priv->xstats,
- rx_q->dma_erx +
- entry);
+ if (priv->extend_desc)
+ stmmac_rx_extended_status(priv, &priv->dev->stats,
+ &priv->xstats, rx_q->dma_erx + entry);
if (unlikely(status == discard_frame)) {
priv->dev->stats.rx_errors++;
if (priv->hwts_rx_en && !priv->extend_desc) {
@@ -3479,7 +3411,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
else
des = le32_to_cpu(p->des2);
- frame_len = priv->hw->desc->get_rx_frame_len(p, coe);
+ frame_len = stmmac_get_rx_frame_len(priv, p, coe);
/* If frame length is greater than skb buffer size
* (preallocated during init) then the packet is
@@ -3621,7 +3553,7 @@ static int stmmac_poll(struct napi_struct *napi, int budget)
work_done = stmmac_rx(priv, budget, rx_q->queue_index);
if (work_done < budget) {
napi_complete_done(napi, work_done);
- stmmac_enable_dma_irq(priv, chan);
+ stmmac_enable_dma_irq(priv, priv->ioaddr, chan);
}
return work_done;
}
@@ -3654,7 +3586,7 @@ static void stmmac_set_rx_mode(struct net_device *dev)
{
struct stmmac_priv *priv = netdev_priv(dev);
- priv->hw->mac->set_filter(priv->hw, dev);
+ stmmac_set_filter(priv, priv->hw, dev);
}
/**
@@ -3727,7 +3659,7 @@ static int stmmac_set_features(struct net_device *netdev,
/* No check needed because rx_coe has been set before and it will be
* fixed in case of issue.
*/
- priv->hw->mac->rx_ipc(priv->hw);
+ stmmac_rx_ipc(priv, priv->hw);
return 0;
}
@@ -3771,8 +3703,7 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
/* To handle GMAC own interrupts */
if ((priv->plat->has_gmac) || (priv->plat->has_gmac4)) {
- int status = priv->hw->mac->host_irq_status(priv->hw,
- &priv->xstats);
+ int status = stmmac_host_irq_status(priv, priv->hw, &priv->xstats);
if (unlikely(status)) {
/* For LPI we need to save the tx status */
@@ -3787,15 +3718,14 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
struct stmmac_rx_queue *rx_q =
&priv->rx_queue[queue];
- status |=
- priv->hw->mac->host_mtl_irq_status(priv->hw,
- queue);
+ status |= stmmac_host_mtl_irq_status(priv,
+ priv->hw, queue);
- if (status & CORE_IRQ_MTL_RX_OVERFLOW &&
- priv->hw->dma->set_rx_tail_ptr)
- priv->hw->dma->set_rx_tail_ptr(priv->ioaddr,
- rx_q->rx_tail_addr,
- queue);
+ if (status & CORE_IRQ_MTL_RX_OVERFLOW)
+ stmmac_set_rx_tail_ptr(priv,
+ priv->ioaddr,
+ rx_q->rx_tail_addr,
+ queue);
}
}
@@ -3869,7 +3799,7 @@ static int stmmac_set_mac_address(struct net_device *ndev, void *addr)
if (ret)
return ret;
- priv->hw->mac->set_umac_addr(priv->hw, ndev->dev_addr, 0);
+ stmmac_set_umac_addr(priv, priv->hw, ndev->dev_addr, 0);
return ret;
}
@@ -4145,49 +4075,17 @@ static void stmmac_service_task(struct work_struct *work)
*/
static int stmmac_hw_init(struct stmmac_priv *priv)
{
- struct mac_device_info *mac;
-
- /* Identify the MAC HW device */
- if (priv->plat->setup) {
- mac = priv->plat->setup(priv);
- } else if (priv->plat->has_gmac) {
- priv->dev->priv_flags |= IFF_UNICAST_FLT;
- mac = dwmac1000_setup(priv->ioaddr,
- priv->plat->multicast_filter_bins,
- priv->plat->unicast_filter_entries,
- &priv->synopsys_id);
- } else if (priv->plat->has_gmac4) {
- priv->dev->priv_flags |= IFF_UNICAST_FLT;
- mac = dwmac4_setup(priv->ioaddr,
- priv->plat->multicast_filter_bins,
- priv->plat->unicast_filter_entries,
- &priv->synopsys_id);
- } else {
- mac = dwmac100_setup(priv->ioaddr, &priv->synopsys_id);
- }
- if (!mac)
- return -ENOMEM;
-
- priv->hw = mac;
+ int ret;
/* dwmac-sun8i only work in chain mode */
if (priv->plat->has_sun8i)
chain_mode = 1;
+ priv->chain_mode = chain_mode;
- /* To use the chained or ring mode */
- if (priv->synopsys_id >= DWMAC_CORE_4_00) {
- priv->hw->mode = &dwmac4_ring_mode_ops;
- } else {
- if (chain_mode) {
- priv->hw->mode = &chain_mode_ops;
- dev_info(priv->device, "Chain mode enabled\n");
- priv->mode = STMMAC_CHAIN_MODE;
- } else {
- priv->hw->mode = &ring_mode_ops;
- dev_info(priv->device, "Ring mode enabled\n");
- priv->mode = STMMAC_RING_MODE;
- }
- }
+ /* Initialize HW Interface */
+ ret = stmmac_hwif_init(priv);
+ if (ret)
+ return ret;
/* Get the HW capability (new GMAC newer than 3.50a) */
priv->hw_cap_support = stmmac_get_hw_features(priv);
@@ -4221,12 +4119,6 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
dev_info(priv->device, "No HW DMA feature register supported\n");
}
- /* To use alternate (extended), normal or GMAC4 descriptor structures */
- if (priv->synopsys_id >= DWMAC_CORE_4_00)
- priv->hw->desc = &dwmac4_desc_ops;
- else
- stmmac_selec_desc_mode(priv);
-
if (priv->plat->rx_coe) {
priv->hw->rx_csum = priv->plat->rx_coe;
dev_info(priv->device, "RX Checksum Offload Engine supported\n");
@@ -4458,7 +4350,7 @@ int stmmac_dvr_remove(struct device *dev)
stmmac_stop_all_dma(priv);
- priv->hw->mac->set_mac(priv->ioaddr, false);
+ stmmac_mac_set(priv, priv->ioaddr, false);
netif_carrier_off(ndev);
unregister_netdev(ndev);
if (priv->plat->stmmac_rst)
@@ -4507,10 +4399,10 @@ int stmmac_suspend(struct device *dev)
/* Enable Power down mode by programming the PMT regs */
if (device_may_wakeup(priv->device)) {
- priv->hw->mac->pmt(priv->hw, priv->wolopts);
+ stmmac_pmt(priv, priv->hw, priv->wolopts);
priv->irq_wake = 1;
} else {
- priv->hw->mac->set_mac(priv->ioaddr, false);
+ stmmac_mac_set(priv, priv->ioaddr, false);
pinctrl_pm_select_sleep_state(priv->device);
/* Disable clock in case of PWM is off */
clk_disable(priv->plat->pclk);
@@ -4574,7 +4466,7 @@ int stmmac_resume(struct device *dev)
*/
if (device_may_wakeup(priv->device)) {
spin_lock_irqsave(&priv->lock, flags);
- priv->hw->mac->pmt(priv->hw, 0);
+ stmmac_pmt(priv, priv->hw, 0);
spin_unlock_irqrestore(&priv->lock, flags);
priv->irq_wake = 0;
} else {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
index e471a903c654..7d3a5c7f5db6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
@@ -49,9 +49,7 @@ static int stmmac_adjust_freq(struct ptp_clock_info *ptp, s32 ppb)
addend = neg_adj ? (addend - diff) : (addend + diff);
spin_lock_irqsave(&priv->ptp_lock, flags);
-
- priv->hw->ptp->config_addend(priv->ptpaddr, addend);
-
+ stmmac_config_addend(priv, priv->ptpaddr, addend);
spin_unlock_irqrestore(&priv->ptp_lock, flags);
return 0;
@@ -84,10 +82,8 @@ static int stmmac_adjust_time(struct ptp_clock_info *ptp, s64 delta)
nsec = reminder;
spin_lock_irqsave(&priv->ptp_lock, flags);
-
- priv->hw->ptp->adjust_systime(priv->ptpaddr, sec, nsec, neg_adj,
- priv->plat->has_gmac4);
-
+ stmmac_adjust_systime(priv, priv->ptpaddr, sec, nsec, neg_adj,
+ priv->plat->has_gmac4);
spin_unlock_irqrestore(&priv->ptp_lock, flags);
return 0;
@@ -110,9 +106,7 @@ static int stmmac_get_time(struct ptp_clock_info *ptp, struct timespec64 *ts)
u64 ns;
spin_lock_irqsave(&priv->ptp_lock, flags);
-
- ns = priv->hw->ptp->get_systime(priv->ptpaddr);
-
+ stmmac_get_systime(priv, priv->ptpaddr, &ns);
spin_unlock_irqrestore(&priv->ptp_lock, flags);
*ts = ns_to_timespec64(ns);
@@ -137,9 +131,7 @@ static int stmmac_set_time(struct ptp_clock_info *ptp,
unsigned long flags;
spin_lock_irqsave(&priv->ptp_lock, flags);
-
- priv->hw->ptp->init_systime(priv->ptpaddr, ts->tv_sec, ts->tv_nsec);
-
+ stmmac_init_systime(priv, priv->ptpaddr, ts->tv_sec, ts->tv_nsec);
spin_unlock_irqrestore(&priv->ptp_lock, flags);
return 0;
diff --git a/drivers/net/ethernet/ti/netcp.h b/drivers/net/ethernet/ti/netcp.h
index 8900a6fad318..c4ffdf47bad5 100644
--- a/drivers/net/ethernet/ti/netcp.h
+++ b/drivers/net/ethernet/ti/netcp.h
@@ -33,6 +33,8 @@
#define SGMII_LINK_MAC_MAC_FORCED 2
#define SGMII_LINK_MAC_FIBER 3
#define SGMII_LINK_MAC_PHY_NO_MDIO 4
+#define RGMII_LINK_MAC_PHY 5
+#define RGMII_LINK_MAC_PHY_NO_MDIO 7
#define XGMII_LINK_MAC_PHY 10
#define XGMII_LINK_MAC_MAC_FORCED 11
@@ -212,6 +214,7 @@ struct netcp_module {
int (*add_vid)(void *intf_priv, int vid);
int (*del_vid)(void *intf_priv, int vid);
int (*ioctl)(void *intf_priv, struct ifreq *req, int cmd);
+ int (*set_rx_mode)(void *intf_priv, bool promisc);
/* used internally */
struct list_head module_list;
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index f5a7eb22d0f5..e40aa3e31af2 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -1509,6 +1509,24 @@ static void netcp_addr_sweep_add(struct netcp_intf *netcp)
}
}
+static int netcp_set_promiscuous(struct netcp_intf *netcp, bool promisc)
+{
+ struct netcp_intf_modpriv *priv;
+ struct netcp_module *module;
+ int error;
+
+ for_each_module(netcp, priv) {
+ module = priv->netcp_module;
+ if (!module->set_rx_mode)
+ continue;
+
+ error = module->set_rx_mode(priv->module_priv, promisc);
+ if (error)
+ return error;
+ }
+ return 0;
+}
+
static void netcp_set_rx_mode(struct net_device *ndev)
{
struct netcp_intf *netcp = netdev_priv(ndev);
@@ -1538,6 +1556,7 @@ static void netcp_set_rx_mode(struct net_device *ndev)
/* finally sweep and callout into modules */
netcp_addr_sweep_del(netcp);
netcp_addr_sweep_add(netcp);
+ netcp_set_promiscuous(netcp, promisc);
spin_unlock(&netcp->lock);
}
@@ -2155,8 +2174,13 @@ static int netcp_probe(struct platform_device *pdev)
struct device_node *child, *interfaces;
struct netcp_device *netcp_device;
struct device *dev = &pdev->dev;
+ struct netcp_module *module;
int ret;
+ if (!knav_dma_device_ready() ||
+ !knav_qmss_device_ready())
+ return -EPROBE_DEFER;
+
if (!node) {
dev_err(dev, "could not find device info\n");
return -ENODEV;
@@ -2203,6 +2227,14 @@ static int netcp_probe(struct platform_device *pdev)
/* Add the device instance to the list */
list_add_tail(&netcp_device->device_list, &netcp_devices);
+ /* Probe & attach any modules already registered */
+ mutex_lock(&netcp_modules_lock);
+ for_each_netcp_module(module) {
+ ret = netcp_module_probe(netcp_device, module);
+ if (ret < 0)
+ dev_err(dev, "module(%s) probe failed\n", module->name);
+ }
+ mutex_unlock(&netcp_modules_lock);
return 0;
probe_quit_interface:
diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c
index 56dbc0b9fedc..6a728d35e776 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -21,6 +21,7 @@
#include <linux/io.h>
#include <linux/module.h>
#include <linux/of_mdio.h>
+#include <linux/of_net.h>
#include <linux/of_address.h>
#include <linux/if_vlan.h>
#include <linux/ptp_classify.h>
@@ -42,7 +43,7 @@
/* 1G Ethernet SS defines */
#define GBE_MODULE_NAME "netcp-gbe"
-#define GBE_SS_VERSION_14 0x4ed21104
+#define GBE_SS_VERSION_14 0x4ed2
#define GBE_SS_REG_INDEX 0
#define GBE_SGMII34_REG_INDEX 1
@@ -72,6 +73,11 @@
#define IS_SS_ID_NU(d) \
(GBE_IDENT((d)->ss_version) == GBE_SS_ID_NU)
+#define IS_SS_ID_VER_14(d) \
+ (GBE_IDENT((d)->ss_version) == GBE_SS_VERSION_14)
+#define IS_SS_ID_2U(d) \
+ (GBE_IDENT((d)->ss_version) == GBE_SS_ID_2U)
+
#define GBENU_SS_REG_INDEX 0
#define GBENU_SM_REG_INDEX 1
#define GBENU_SGMII_MODULE_OFFSET 0x100
@@ -86,7 +92,7 @@
/* 10G Ethernet SS defines */
#define XGBE_MODULE_NAME "netcp-xgbe"
-#define XGBE_SS_VERSION_10 0x4ee42100
+#define XGBE_SS_VERSION_10 0x4ee4
#define XGBE_SS_REG_INDEX 0
#define XGBE_SM_REG_INDEX 1
@@ -166,6 +172,11 @@
#define GBE_RXHOOK_ORDER 0
#define GBE_DEFAULT_ALE_AGEOUT 30
#define SLAVE_LINK_IS_XGMII(s) ((s)->link_interface >= XGMII_LINK_MAC_PHY)
+#define SLAVE_LINK_IS_RGMII(s) \
+ (((s)->link_interface >= RGMII_LINK_MAC_PHY) && \
+ ((s)->link_interface <= RGMII_LINK_MAC_PHY_NO_MDIO))
+#define SLAVE_LINK_IS_SGMII(s) \
+ ((s)->link_interface <= SGMII_LINK_MAC_PHY_NO_MDIO)
#define NETCP_LINK_STATE_INVALID -1
#define GBE_SET_REG_OFS(p, rb, rn) p->rb##_ofs.rn = \
@@ -549,6 +560,7 @@ struct gbe_ss_regs {
struct gbe_ss_regs_ofs {
u16 id_ver;
u16 control;
+ u16 rgmii_status; /* 2U */
};
struct gbe_switch_regs {
@@ -591,6 +603,7 @@ struct gbe_port_regs {
struct gbe_port_regs_ofs {
u16 port_vlan;
u16 tx_pri_map;
+ u16 rx_pri_map;
u16 sa_lo;
u16 sa_hi;
u16 ts_ctl;
@@ -695,6 +708,7 @@ struct gbe_slave {
u32 link_interface;
u32 mac_control;
u8 phy_port_t;
+ struct device_node *node;
struct device_node *phy_node;
struct ts_ctl ts_ctl;
struct list_head slave_list;
@@ -1915,7 +1929,7 @@ static void keystone_get_ethtool_stats(struct net_device *ndev,
gbe_dev = gbe_intf->gbe_dev;
spin_lock_bh(&gbe_dev->hw_stats_lock);
- if (gbe_dev->ss_version == GBE_SS_VERSION_14)
+ if (IS_SS_ID_VER_14(gbe_dev))
gbe_update_stats_ver14(gbe_dev, data);
else
gbe_update_stats(gbe_dev, data);
@@ -2091,8 +2105,9 @@ static void netcp_ethss_link_state_action(struct gbe_priv *gbe_dev,
ALE_PORT_STATE_FORWARD);
if (ndev && slave->open &&
- slave->link_interface != SGMII_LINK_MAC_PHY &&
- slave->link_interface != XGMII_LINK_MAC_PHY)
+ ((slave->link_interface != SGMII_LINK_MAC_PHY) &&
+ (slave->link_interface != RGMII_LINK_MAC_PHY) &&
+ (slave->link_interface != XGMII_LINK_MAC_PHY)))
netif_carrier_on(ndev);
} else {
writel(mac_control, GBE_REG_ADDR(slave, emac_regs,
@@ -2101,8 +2116,9 @@ static void netcp_ethss_link_state_action(struct gbe_priv *gbe_dev,
ALE_PORT_STATE,
ALE_PORT_STATE_DISABLE);
if (ndev &&
- slave->link_interface != SGMII_LINK_MAC_PHY &&
- slave->link_interface != XGMII_LINK_MAC_PHY)
+ ((slave->link_interface != SGMII_LINK_MAC_PHY) &&
+ (slave->link_interface != RGMII_LINK_MAC_PHY) &&
+ (slave->link_interface != XGMII_LINK_MAC_PHY)))
netif_carrier_off(ndev);
}
@@ -2115,23 +2131,35 @@ static bool gbe_phy_link_status(struct gbe_slave *slave)
return !slave->phy || slave->phy->link;
}
+#define RGMII_REG_STATUS_LINK BIT(0)
+
+static void netcp_2u_rgmii_get_port_link(struct gbe_priv *gbe_dev, bool *status)
+{
+ u32 val = 0;
+
+ val = readl(GBE_REG_ADDR(gbe_dev, ss_regs, rgmii_status));
+ *status = !!(val & RGMII_REG_STATUS_LINK);
+}
+
static void netcp_ethss_update_link_state(struct gbe_priv *gbe_dev,
struct gbe_slave *slave,
struct net_device *ndev)
{
- int sp = slave->slave_num;
- int phy_link_state, sgmii_link_state = 1, link_state;
+ bool sw_link_state = true, phy_link_state;
+ int sp = slave->slave_num, link_state;
if (!slave->open)
return;
- if (!SLAVE_LINK_IS_XGMII(slave)) {
- sgmii_link_state =
- netcp_sgmii_get_port_link(SGMII_BASE(gbe_dev, sp), sp);
- }
+ if (SLAVE_LINK_IS_RGMII(slave))
+ netcp_2u_rgmii_get_port_link(gbe_dev,
+ &sw_link_state);
+ if (SLAVE_LINK_IS_SGMII(slave))
+ sw_link_state =
+ netcp_sgmii_get_port_link(SGMII_BASE(gbe_dev, sp), sp);
phy_link_state = gbe_phy_link_status(slave);
- link_state = phy_link_state & sgmii_link_state;
+ link_state = phy_link_state & sw_link_state;
if (atomic_xchg(&slave->link_state, link_state) != link_state)
netcp_ethss_link_state_action(gbe_dev, ndev, slave,
@@ -2205,7 +2233,7 @@ static void gbe_port_config(struct gbe_priv *gbe_dev, struct gbe_slave *slave,
max_rx_len = NETCP_MAX_FRAME_SIZE;
/* Enable correct MII mode at SS level */
- if ((gbe_dev->ss_version == XGBE_SS_VERSION_10) &&
+ if (IS_SS_ID_XGBE(gbe_dev) &&
(slave->link_interface >= XGMII_LINK_MAC_PHY)) {
xgmii_mode = readl(GBE_REG_ADDR(gbe_dev, ss_regs, control));
xgmii_mode |= (1 << slave->slave_num);
@@ -2236,7 +2264,8 @@ static void gbe_slave_stop(struct gbe_intf *intf)
struct gbe_priv *gbe_dev = intf->gbe_dev;
struct gbe_slave *slave = intf->slave;
- gbe_sgmii_rtreset(gbe_dev, slave, true);
+ if (!IS_SS_ID_2U(gbe_dev))
+ gbe_sgmii_rtreset(gbe_dev, slave, true);
gbe_port_reset(slave);
/* Disable forwarding */
cpsw_ale_control_set(gbe_dev->ale, slave->port_num,
@@ -2271,11 +2300,20 @@ static int gbe_slave_open(struct gbe_intf *gbe_intf)
void (*hndlr)(struct net_device *) = gbe_adjust_link;
- gbe_sgmii_config(priv, slave);
+ if (!IS_SS_ID_2U(priv))
+ gbe_sgmii_config(priv, slave);
gbe_port_reset(slave);
- gbe_sgmii_rtreset(priv, slave, false);
+ if (!IS_SS_ID_2U(priv))
+ gbe_sgmii_rtreset(priv, slave, false);
gbe_port_config(priv, slave, priv->rx_packet_max);
gbe_set_slave_mac(slave, gbe_intf);
+ /* For NU & 2U switch, map the vlan priorities to zero
+ * as we only configure to use priority 0
+ */
+ if (IS_SS_ID_MU(priv))
+ writel(HOST_TX_PRI_MAP_DEFAULT,
+ GBE_REG_ADDR(slave, port_regs, rx_pri_map));
+
/* enable forwarding */
cpsw_ale_control_set(priv->ale, slave->port_num,
ALE_PORT_STATE, ALE_PORT_STATE_FORWARD);
@@ -2286,6 +2324,21 @@ static int gbe_slave_open(struct gbe_intf *gbe_intf)
has_phy = true;
phy_mode = PHY_INTERFACE_MODE_SGMII;
slave->phy_port_t = PORT_MII;
+ } else if (slave->link_interface == RGMII_LINK_MAC_PHY) {
+ has_phy = true;
+ phy_mode = of_get_phy_mode(slave->node);
+ /* if phy-mode is not present, default to
+ * PHY_INTERFACE_MODE_RGMII
+ */
+ if (phy_mode < 0)
+ phy_mode = PHY_INTERFACE_MODE_RGMII;
+
+ if (!phy_interface_mode_is_rgmii(phy_mode)) {
+ dev_err(priv->dev,
+ "Unsupported phy mode %d\n", phy_mode);
+ return -EINVAL;
+ }
+ slave->phy_port_t = PORT_MII;
} else if (slave->link_interface == XGMII_LINK_MAC_PHY) {
has_phy = true;
phy_mode = PHY_INTERFACE_MODE_NA;
@@ -2293,7 +2346,7 @@ static int gbe_slave_open(struct gbe_intf *gbe_intf)
}
if (has_phy) {
- if (priv->ss_version == XGBE_SS_VERSION_10)
+ if (IS_SS_ID_XGBE(priv))
hndlr = xgbe_adjust_link;
slave->phy = of_phy_connect(gbe_intf->ndev,
@@ -2722,6 +2775,61 @@ static inline int gbe_hwtstamp_set(struct gbe_intf *gbe_intf, struct ifreq *req)
}
#endif /* CONFIG_TI_CPTS */
+static int gbe_set_rx_mode(void *intf_priv, bool promisc)
+{
+ struct gbe_intf *gbe_intf = intf_priv;
+ struct gbe_priv *gbe_dev = gbe_intf->gbe_dev;
+ struct cpsw_ale *ale = gbe_dev->ale;
+ unsigned long timeout;
+ int i, ret = -ETIMEDOUT;
+
+ /* Disable(1)/Enable(0) Learn for all ports (host is port 0 and
+ * slaves are port 1 and up
+ */
+ for (i = 0; i <= gbe_dev->num_slaves; i++) {
+ cpsw_ale_control_set(ale, i,
+ ALE_PORT_NOLEARN, !!promisc);
+ cpsw_ale_control_set(ale, i,
+ ALE_PORT_NO_SA_UPDATE, !!promisc);
+ }
+
+ if (!promisc) {
+ /* Don't Flood All Unicast Packets to Host port */
+ cpsw_ale_control_set(ale, 0, ALE_P0_UNI_FLOOD, 0);
+ dev_vdbg(gbe_dev->dev, "promiscuous mode disabled\n");
+ return 0;
+ }
+
+ timeout = jiffies + HZ;
+
+ /* Clear All Untouched entries */
+ cpsw_ale_control_set(ale, 0, ALE_AGEOUT, 1);
+ do {
+ cpu_relax();
+ if (cpsw_ale_control_get(ale, 0, ALE_AGEOUT)) {
+ ret = 0;
+ break;
+ }
+
+ } while (time_after(timeout, jiffies));
+
+ /* Make sure it is not a false timeout */
+ if (ret && !cpsw_ale_control_get(ale, 0, ALE_AGEOUT))
+ return ret;
+
+ cpsw_ale_control_set(ale, 0, ALE_AGEOUT, 1);
+
+ /* Clear all mcast from ALE */
+ cpsw_ale_flush_multicast(ale,
+ GBE_PORT_MASK(gbe_dev->ale_ports),
+ -1);
+
+ /* Flood All Unicast Packets to Host port */
+ cpsw_ale_control_set(ale, 0, ALE_P0_UNI_FLOOD, 1);
+ dev_vdbg(gbe_dev->dev, "promiscuous mode enabled\n");
+ return ret;
+}
+
static int gbe_ioctl(void *intf_priv, struct ifreq *req, int cmd)
{
struct gbe_intf *gbe_intf = intf_priv;
@@ -2764,7 +2872,7 @@ static void netcp_ethss_timer(struct timer_list *t)
/* A timer runs as a BH, no need to block them */
spin_lock(&gbe_dev->hw_stats_lock);
- if (gbe_dev->ss_version == GBE_SS_VERSION_14)
+ if (IS_SS_ID_VER_14(gbe_dev))
gbe_update_stats_ver14(gbe_dev, NULL);
else
gbe_update_stats(gbe_dev, NULL);
@@ -2807,7 +2915,7 @@ static int gbe_open(void *intf_priv, struct net_device *ndev)
GBE_RTL_VERSION(reg), GBE_IDENT(reg));
/* For 10G and on NetCP 1.5, use directed to port */
- if ((gbe_dev->ss_version == XGBE_SS_VERSION_10) || IS_SS_ID_MU(gbe_dev))
+ if (IS_SS_ID_XGBE(gbe_dev) || IS_SS_ID_MU(gbe_dev))
gbe_intf->tx_pipe.flags = SWITCH_TO_PORT_IN_TAGINFO;
if (gbe_dev->enable_ale)
@@ -2911,8 +3019,10 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave,
slave->link_interface = SGMII_LINK_MAC_PHY;
}
+ slave->node = node;
slave->open = false;
if ((slave->link_interface == SGMII_LINK_MAC_PHY) ||
+ (slave->link_interface == RGMII_LINK_MAC_PHY) ||
(slave->link_interface == XGMII_LINK_MAC_PHY))
slave->phy_node = of_parse_phandle(node, "phy-handle", 0);
slave->port_num = gbe_get_slave_port(gbe_dev, slave->slave_num);
@@ -2924,7 +3034,7 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave,
/* Emac regs memmap are contiguous but port regs are not */
port_reg_num = slave->slave_num;
- if (gbe_dev->ss_version == GBE_SS_VERSION_14) {
+ if (IS_SS_ID_VER_14(gbe_dev)) {
if (slave->slave_num > 1) {
port_reg_ofs = GBE13_SLAVE_PORT2_OFFSET;
port_reg_num -= 2;
@@ -2939,7 +3049,7 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave,
emac_reg_ofs = GBENU_EMAC_OFFSET;
port_reg_blk_sz = 0x1000;
emac_reg_blk_sz = 0x1000;
- } else if (gbe_dev->ss_version == XGBE_SS_VERSION_10) {
+ } else if (IS_SS_ID_XGBE(gbe_dev)) {
port_reg_ofs = XGBE10_SLAVE_PORT_OFFSET;
emac_reg_ofs = XGBE10_EMAC_OFFSET;
port_reg_blk_sz = 0x30;
@@ -2955,7 +3065,7 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave,
slave->emac_regs = gbe_dev->switch_regs + emac_reg_ofs +
(emac_reg_blk_sz * slave->slave_num);
- if (gbe_dev->ss_version == GBE_SS_VERSION_14) {
+ if (IS_SS_ID_VER_14(gbe_dev)) {
/* Initialize slave port register offsets */
GBE_SET_REG_OFS(slave, port_regs, port_vlan);
GBE_SET_REG_OFS(slave, port_regs, tx_pri_map);
@@ -2976,6 +3086,7 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave,
/* Initialize slave port register offsets */
GBENU_SET_REG_OFS(slave, port_regs, port_vlan);
GBENU_SET_REG_OFS(slave, port_regs, tx_pri_map);
+ GBENU_SET_REG_OFS(slave, port_regs, rx_pri_map);
GBENU_SET_REG_OFS(slave, port_regs, sa_lo);
GBENU_SET_REG_OFS(slave, port_regs, sa_hi);
GBENU_SET_REG_OFS(slave, port_regs, ts_ctl);
@@ -2989,7 +3100,7 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave,
GBENU_SET_REG_OFS(slave, emac_regs, mac_control);
GBENU_SET_REG_OFS(slave, emac_regs, soft_reset);
- } else if (gbe_dev->ss_version == XGBE_SS_VERSION_10) {
+ } else if (IS_SS_ID_XGBE(gbe_dev)) {
/* Initialize slave port register offsets */
XGBE_SET_REG_OFS(slave, port_regs, port_vlan);
XGBE_SET_REG_OFS(slave, port_regs, tx_pri_map);
@@ -3039,7 +3150,8 @@ static void init_secondary_ports(struct gbe_priv *gbe_dev,
continue;
}
- gbe_sgmii_config(gbe_dev, slave);
+ if (!IS_SS_ID_2U(gbe_dev))
+ gbe_sgmii_config(gbe_dev, slave);
gbe_port_reset(slave);
gbe_port_config(gbe_dev, slave, gbe_dev->rx_packet_max);
list_add_tail(&slave->slave_list, &gbe_dev->secondary_slaves);
@@ -3073,6 +3185,9 @@ static void init_secondary_ports(struct gbe_priv *gbe_dev,
if (slave->link_interface == SGMII_LINK_MAC_PHY) {
phy_mode = PHY_INTERFACE_MODE_SGMII;
slave->phy_port_t = PORT_MII;
+ } else if (slave->link_interface == RGMII_LINK_MAC_PHY) {
+ phy_mode = PHY_INTERFACE_MODE_RGMII;
+ slave->phy_port_t = PORT_MII;
} else {
phy_mode = PHY_INTERFACE_MODE_NA;
slave->phy_port_t = PORT_FIBRE;
@@ -3080,6 +3195,7 @@ static void init_secondary_ports(struct gbe_priv *gbe_dev,
for_each_sec_slave(slave, gbe_dev) {
if ((slave->link_interface != SGMII_LINK_MAC_PHY) &&
+ (slave->link_interface != RGMII_LINK_MAC_PHY) &&
(slave->link_interface != XGMII_LINK_MAC_PHY))
continue;
slave->phy =
@@ -3355,7 +3471,7 @@ static int set_gbenu_ethss_priv(struct gbe_priv *gbe_dev,
gbe_dev->num_stats_mods = gbe_dev->max_num_ports;
gbe_dev->et_stats = gbenu_et_stats;
- if (IS_SS_ID_NU(gbe_dev))
+ if (IS_SS_ID_MU(gbe_dev))
gbe_dev->num_et_stats = GBENU_ET_STATS_HOST_SIZE +
(gbe_dev->max_num_slaves * GBENU_ET_STATS_PORT_SIZE);
else
@@ -3396,7 +3512,9 @@ static int set_gbenu_ethss_priv(struct gbe_priv *gbe_dev,
}
gbe_dev->switch_regs = regs;
- gbe_dev->sgmii_port_regs = gbe_dev->ss_regs + GBENU_SGMII_MODULE_OFFSET;
+ if (!IS_SS_ID_2U(gbe_dev))
+ gbe_dev->sgmii_port_regs =
+ gbe_dev->ss_regs + GBENU_SGMII_MODULE_OFFSET;
/* Although sgmii modules are mem mapped to one contiguous
* region on GBENU devices, setting sgmii_port34_regs allows
@@ -3419,6 +3537,8 @@ static int set_gbenu_ethss_priv(struct gbe_priv *gbe_dev,
/* Subsystem registers */
GBENU_SET_REG_OFS(gbe_dev, ss_regs, id_ver);
+ /* ok to set for MU, but used by 2U only */
+ GBENU_SET_REG_OFS(gbe_dev, ss_regs, rgmii_status);
/* Switch module registers */
GBENU_SET_REG_OFS(gbe_dev, switch_regs, id_ver);
@@ -3464,6 +3584,7 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev,
gbe_dev->max_num_slaves = 8;
} else if (of_device_is_compatible(node, "ti,netcp-gbe-2")) {
gbe_dev->max_num_slaves = 1;
+ gbe_module.set_rx_mode = gbe_set_rx_mode;
} else if (of_device_is_compatible(node, "ti,netcp-xgbe")) {
gbe_dev->max_num_slaves = 2;
} else {
@@ -3508,7 +3629,7 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev,
dev_dbg(dev, "ss_version: 0x%08x\n", gbe_dev->ss_version);
- if (gbe_dev->ss_version == GBE_SS_VERSION_14)
+ if (IS_SS_ID_VER_14(gbe_dev))
ret = set_gbe_ethss14_priv(gbe_dev, node);
else if (IS_SS_ID_MU(gbe_dev))
ret = set_gbenu_ethss_priv(gbe_dev, node);
@@ -3606,7 +3727,7 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev,
spin_lock_bh(&gbe_dev->hw_stats_lock);
for (i = 0; i < gbe_dev->num_stats_mods; i++) {
- if (gbe_dev->ss_version == GBE_SS_VERSION_14)
+ if (IS_SS_ID_VER_14(gbe_dev))
gbe_reset_mod_stats_ver14(gbe_dev, i);
else
gbe_reset_mod_stats(gbe_dev, i);
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index b919e89a9b93..750eaa53bf0c 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -36,6 +36,8 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
#define GENEVE_VER 0
#define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr))
+#define GENEVE_IPV4_HLEN (ETH_HLEN + sizeof(struct iphdr) + GENEVE_BASE_HLEN)
+#define GENEVE_IPV6_HLEN (ETH_HLEN + sizeof(struct ipv6hdr) + GENEVE_BASE_HLEN)
/* per-network namespace private data for this module */
struct geneve_net {
@@ -826,8 +828,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
return PTR_ERR(rt);
if (skb_dst(skb)) {
- int mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr) -
- GENEVE_BASE_HLEN - info->options_len - 14;
+ int mtu = dst_mtu(&rt->dst) - GENEVE_IPV4_HLEN -
+ info->options_len;
skb_dst_update_pmtu(skb, mtu);
}
@@ -872,8 +874,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
return PTR_ERR(dst);
if (skb_dst(skb)) {
- int mtu = dst_mtu(dst) - sizeof(struct ipv6hdr) -
- GENEVE_BASE_HLEN - info->options_len - 14;
+ int mtu = dst_mtu(dst) - GENEVE_IPV6_HLEN - info->options_len;
skb_dst_update_pmtu(skb, mtu);
}
@@ -941,11 +942,10 @@ tx_error:
static int geneve_change_mtu(struct net_device *dev, int new_mtu)
{
- /* Only possible if called internally, ndo_change_mtu path's new_mtu
- * is guaranteed to be between dev->min_mtu and dev->max_mtu.
- */
if (new_mtu > dev->max_mtu)
new_mtu = dev->max_mtu;
+ else if (new_mtu < dev->min_mtu)
+ new_mtu = dev->min_mtu;
dev->mtu = new_mtu;
return 0;
@@ -1261,7 +1261,7 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
}
if (data[IFLA_GENEVE_REMOTE6]) {
- #if IS_ENABLED(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
if (changelink && (ip_tunnel_info_af(info) == AF_INET)) {
attrtype = IFLA_GENEVE_REMOTE6;
goto change_notsup;
@@ -1387,6 +1387,48 @@ change_notsup:
return -EOPNOTSUPP;
}
+static void geneve_link_config(struct net_device *dev,
+ struct ip_tunnel_info *info, struct nlattr *tb[])
+{
+ struct geneve_dev *geneve = netdev_priv(dev);
+ int ldev_mtu = 0;
+
+ if (tb[IFLA_MTU]) {
+ geneve_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
+ return;
+ }
+
+ switch (ip_tunnel_info_af(info)) {
+ case AF_INET: {
+ struct flowi4 fl4 = { .daddr = info->key.u.ipv4.dst };
+ struct rtable *rt = ip_route_output_key(geneve->net, &fl4);
+
+ if (!IS_ERR(rt) && rt->dst.dev) {
+ ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV4_HLEN;
+ ip_rt_put(rt);
+ }
+ break;
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6: {
+ struct rt6_info *rt = rt6_lookup(geneve->net,
+ &info->key.u.ipv6.dst, NULL, 0,
+ NULL, 0);
+
+ if (rt && rt->dst.dev)
+ ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV6_HLEN;
+ ip6_rt_put(rt);
+ break;
+ }
+#endif
+ }
+
+ if (ldev_mtu <= 0)
+ return;
+
+ geneve_change_mtu(dev, ldev_mtu - info->options_len);
+}
+
static int geneve_newlink(struct net *net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
@@ -1402,8 +1444,14 @@ static int geneve_newlink(struct net *net, struct net_device *dev,
if (err)
return err;
- return geneve_configure(net, dev, extack, &info, metadata,
- use_udp6_rx_checksums);
+ err = geneve_configure(net, dev, extack, &info, metadata,
+ use_udp6_rx_checksums);
+ if (err)
+ return err;
+
+ geneve_link_config(dev, &info, tb);
+
+ return 0;
}
/* Quiesces the geneve device data path for both TX and RX.
@@ -1477,8 +1525,10 @@ static int geneve_changelink(struct net_device *dev, struct nlattr *tb[],
if (err)
return err;
- if (!geneve_dst_addr_equal(&geneve->info, &info))
+ if (!geneve_dst_addr_equal(&geneve->info, &info)) {
dst_cache_reset(&info.dst_cache);
+ geneve_link_config(dev, &info, tb);
+ }
geneve_quiesce(geneve, &gs4, &gs6);
geneve->info = info;
diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
index c180b480f8ef..13e4c1eff353 100644
--- a/drivers/net/hamradio/mkiss.c
+++ b/drivers/net/hamradio/mkiss.c
@@ -217,7 +217,7 @@ static int kiss_esc_crc(unsigned char *s, unsigned char *d, unsigned short crc,
c = *s++;
else if (len > 1)
c = crc >> 8;
- else if (len > 0)
+ else
c = crc & 0xff;
len--;
diff --git a/drivers/net/hyperv/Kconfig b/drivers/net/hyperv/Kconfig
index 936968d23559..0765d5f61714 100644
--- a/drivers/net/hyperv/Kconfig
+++ b/drivers/net/hyperv/Kconfig
@@ -1,5 +1,6 @@
config HYPERV_NET
tristate "Microsoft Hyper-V virtual network driver"
depends on HYPERV
+ select UCS2_STRING
help
Select this option to enable the Hyper-V virtual network driver.
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 960f06141472..6ebe39a3dde6 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -237,6 +237,8 @@ void netvsc_switch_datapath(struct net_device *nv_dev, bool vf);
#define NVSP_PROTOCOL_VERSION_2 0x30002
#define NVSP_PROTOCOL_VERSION_4 0x40000
#define NVSP_PROTOCOL_VERSION_5 0x50000
+#define NVSP_PROTOCOL_VERSION_6 0x60000
+#define NVSP_PROTOCOL_VERSION_61 0x60001
enum {
NVSP_MSG_TYPE_NONE = 0,
@@ -308,6 +310,12 @@ enum {
NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE,
NVSP_MSG5_MAX = NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE,
+
+ /* Version 6 messages */
+ NVSP_MSG6_TYPE_PD_API,
+ NVSP_MSG6_TYPE_PD_POST_BATCH,
+
+ NVSP_MSG6_MAX = NVSP_MSG6_TYPE_PD_POST_BATCH
};
enum {
@@ -619,12 +627,168 @@ union nvsp_5_message_uber {
struct nvsp_5_send_indirect_table send_table;
} __packed;
+enum nvsp_6_pd_api_op {
+ PD_API_OP_CONFIG = 1,
+ PD_API_OP_SW_DATAPATH, /* Switch Datapath */
+ PD_API_OP_OPEN_PROVIDER,
+ PD_API_OP_CLOSE_PROVIDER,
+ PD_API_OP_CREATE_QUEUE,
+ PD_API_OP_FLUSH_QUEUE,
+ PD_API_OP_FREE_QUEUE,
+ PD_API_OP_ALLOC_COM_BUF, /* Allocate Common Buffer */
+ PD_API_OP_FREE_COM_BUF, /* Free Common Buffer */
+ PD_API_OP_MAX
+};
+
+struct grp_affinity {
+ u64 mask;
+ u16 grp;
+ u16 reserved[3];
+} __packed;
+
+struct nvsp_6_pd_api_req {
+ u32 op;
+
+ union {
+ /* MMIO information is sent from the VM to VSP */
+ struct __packed {
+ u64 mmio_pa; /* MMIO Physical Address */
+ u32 mmio_len;
+
+ /* Number of PD queues a VM can support */
+ u16 num_subchn;
+ } config;
+
+ /* Switch Datapath */
+ struct __packed {
+ /* Host Datapath Is PacketDirect */
+ u8 host_dpath_is_pd;
+
+ /* Guest PacketDirect Is Enabled */
+ u8 guest_pd_enabled;
+ } sw_dpath;
+
+ /* Open Provider*/
+ struct __packed {
+ u32 prov_id; /* Provider id */
+ u32 flag;
+ } open_prov;
+
+ /* Close Provider */
+ struct __packed {
+ u32 prov_id;
+ } cls_prov;
+
+ /* Create Queue*/
+ struct __packed {
+ u32 prov_id;
+ u16 q_id;
+ u16 q_size;
+ u8 is_recv_q;
+ u8 is_rss_q;
+ u32 recv_data_len;
+ struct grp_affinity affy;
+ } cr_q;
+
+ /* Delete Queue*/
+ struct __packed {
+ u32 prov_id;
+ u16 q_id;
+ } del_q;
+
+ /* Flush Queue */
+ struct __packed {
+ u32 prov_id;
+ u16 q_id;
+ } flush_q;
+
+ /* Allocate Common Buffer */
+ struct __packed {
+ u32 len;
+ u32 pf_node; /* Preferred Node */
+ u16 region_id;
+ } alloc_com_buf;
+
+ /* Free Common Buffer */
+ struct __packed {
+ u32 len;
+ u64 pa; /* Physical Address */
+ u32 pf_node; /* Preferred Node */
+ u16 region_id;
+ u8 cache_type;
+ } free_com_buf;
+ } __packed;
+} __packed;
+
+struct nvsp_6_pd_api_comp {
+ u32 op;
+ u32 status;
+
+ union {
+ struct __packed {
+ /* actual number of PD queues allocated to the VM */
+ u16 num_pd_q;
+
+ /* Num Receive Rss PD Queues */
+ u8 num_rss_q;
+
+ u8 is_supported; /* Is supported by VSP */
+ u8 is_enabled; /* Is enabled by VSP */
+ } config;
+
+ /* Open Provider */
+ struct __packed {
+ u32 prov_id;
+ } open_prov;
+
+ /* Create Queue */
+ struct __packed {
+ u32 prov_id;
+ u16 q_id;
+ u16 q_size;
+ u32 recv_data_len;
+ struct grp_affinity affy;
+ } cr_q;
+
+ /* Allocate Common Buffer */
+ struct __packed {
+ u64 pa; /* Physical Address */
+ u32 len;
+ u32 pf_node; /* Preferred Node */
+ u16 region_id;
+ u8 cache_type;
+ } alloc_com_buf;
+ } __packed;
+} __packed;
+
+struct nvsp_6_pd_buf {
+ u32 region_offset;
+ u16 region_id;
+ u16 is_partial:1;
+ u16 reserved:15;
+} __packed;
+
+struct nvsp_6_pd_batch_msg {
+ struct nvsp_message_header hdr;
+ u16 count;
+ u16 guest2host:1;
+ u16 is_recv:1;
+ u16 reserved:14;
+ struct nvsp_6_pd_buf pd_buf[0];
+} __packed;
+
+union nvsp_6_message_uber {
+ struct nvsp_6_pd_api_req pd_req;
+ struct nvsp_6_pd_api_comp pd_comp;
+} __packed;
+
union nvsp_all_messages {
union nvsp_message_init_uber init_msg;
union nvsp_1_message_uber v1_msg;
union nvsp_2_message_uber v2_msg;
union nvsp_4_message_uber v4_msg;
union nvsp_5_message_uber v5_msg;
+ union nvsp_6_message_uber v6_msg;
} __packed;
/* ALL Messages */
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 04f611e6f678..d2ee66c259a7 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -525,7 +525,8 @@ static int netvsc_connect_vsp(struct hv_device *device,
struct net_device *ndev = hv_get_drvdata(device);
static const u32 ver_list[] = {
NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
- NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5
+ NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5,
+ NVSP_PROTOCOL_VERSION_6, NVSP_PROTOCOL_VERSION_61
};
struct nvsp_message *init_packet;
int ndis_version, i, ret;
@@ -651,16 +652,14 @@ static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
sync_change_bit(index, net_device->send_section_map);
}
-static void netvsc_send_tx_complete(struct netvsc_device *net_device,
- struct vmbus_channel *incoming_channel,
- struct hv_device *device,
+static void netvsc_send_tx_complete(struct net_device *ndev,
+ struct netvsc_device *net_device,
+ struct vmbus_channel *channel,
const struct vmpacket_descriptor *desc,
int budget)
{
struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->trans_id;
- struct net_device *ndev = hv_get_drvdata(device);
struct net_device_context *ndev_ctx = netdev_priv(ndev);
- struct vmbus_channel *channel = device->channel;
u16 q_idx = 0;
int queue_sends;
@@ -674,7 +673,6 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device,
if (send_index != NETVSC_INVALID_INDEX)
netvsc_free_send_slot(net_device, send_index);
q_idx = packet->q_idx;
- channel = incoming_channel;
tx_stats = &net_device->chan_table[q_idx].tx_stats;
@@ -704,14 +702,13 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device,
}
}
-static void netvsc_send_completion(struct netvsc_device *net_device,
+static void netvsc_send_completion(struct net_device *ndev,
+ struct netvsc_device *net_device,
struct vmbus_channel *incoming_channel,
- struct hv_device *device,
const struct vmpacket_descriptor *desc,
int budget)
{
- struct nvsp_message *nvsp_packet = hv_pkt_data(desc);
- struct net_device *ndev = hv_get_drvdata(device);
+ const struct nvsp_message *nvsp_packet = hv_pkt_data(desc);
switch (nvsp_packet->hdr.msg_type) {
case NVSP_MSG_TYPE_INIT_COMPLETE:
@@ -725,8 +722,8 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
break;
case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE:
- netvsc_send_tx_complete(net_device, incoming_channel,
- device, desc, budget);
+ netvsc_send_tx_complete(ndev, net_device, incoming_channel,
+ desc, budget);
break;
default:
@@ -1091,12 +1088,11 @@ static void enq_receive_complete(struct net_device *ndev,
static int netvsc_receive(struct net_device *ndev,
struct netvsc_device *net_device,
- struct net_device_context *net_device_ctx,
- struct hv_device *device,
struct vmbus_channel *channel,
const struct vmpacket_descriptor *desc,
- struct nvsp_message *nvsp)
+ const struct nvsp_message *nvsp)
{
+ struct net_device_context *net_device_ctx = netdev_priv(ndev);
const struct vmtransfer_page_packet_header *vmxferpage_packet
= container_of(desc, const struct vmtransfer_page_packet_header, d);
u16 q_idx = channel->offermsg.offer.sub_channel_index;
@@ -1157,13 +1153,12 @@ static int netvsc_receive(struct net_device *ndev,
return count;
}
-static void netvsc_send_table(struct hv_device *hdev,
- struct nvsp_message *nvmsg)
+static void netvsc_send_table(struct net_device *ndev,
+ const struct nvsp_message *nvmsg)
{
- struct net_device *ndev = hv_get_drvdata(hdev);
struct net_device_context *net_device_ctx = netdev_priv(ndev);
- int i;
u32 count, *tab;
+ int i;
count = nvmsg->msg.v5_msg.send_table.count;
if (count != VRSS_SEND_TAB_SIZE) {
@@ -1178,24 +1173,25 @@ static void netvsc_send_table(struct hv_device *hdev,
net_device_ctx->tx_table[i] = tab[i];
}
-static void netvsc_send_vf(struct net_device_context *net_device_ctx,
- struct nvsp_message *nvmsg)
+static void netvsc_send_vf(struct net_device *ndev,
+ const struct nvsp_message *nvmsg)
{
+ struct net_device_context *net_device_ctx = netdev_priv(ndev);
+
net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
}
-static inline void netvsc_receive_inband(struct hv_device *hdev,
- struct net_device_context *net_device_ctx,
- struct nvsp_message *nvmsg)
+static void netvsc_receive_inband(struct net_device *ndev,
+ const struct nvsp_message *nvmsg)
{
switch (nvmsg->hdr.msg_type) {
case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE:
- netvsc_send_table(hdev, nvmsg);
+ netvsc_send_table(ndev, nvmsg);
break;
case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION:
- netvsc_send_vf(net_device_ctx, nvmsg);
+ netvsc_send_vf(ndev, nvmsg);
break;
}
}
@@ -1207,24 +1203,23 @@ static int netvsc_process_raw_pkt(struct hv_device *device,
const struct vmpacket_descriptor *desc,
int budget)
{
- struct net_device_context *net_device_ctx = netdev_priv(ndev);
- struct nvsp_message *nvmsg = hv_pkt_data(desc);
+ const struct nvsp_message *nvmsg = hv_pkt_data(desc);
trace_nvsp_recv(ndev, channel, nvmsg);
switch (desc->type) {
case VM_PKT_COMP:
- netvsc_send_completion(net_device, channel, device,
+ netvsc_send_completion(ndev, net_device, channel,
desc, budget);
break;
case VM_PKT_DATA_USING_XFER_PAGES:
- return netvsc_receive(ndev, net_device, net_device_ctx,
- device, channel, desc, nvmsg);
+ return netvsc_receive(ndev, net_device, channel,
+ desc, nvmsg);
break;
case VM_PKT_DATA_INBAND:
- netvsc_receive_inband(device, net_device_ctx, nvmsg);
+ netvsc_receive_inband(ndev, nvmsg);
break;
default:
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 6b127be781d9..3b6dbacaf77d 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -29,6 +29,7 @@
#include <linux/nls.h>
#include <linux/vmalloc.h>
#include <linux/rtnetlink.h>
+#include <linux/ucs2_string.h>
#include "hyperv_net.h"
#include "netvsc_trace.h"
@@ -1223,6 +1224,29 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device,
return ret;
}
+static void rndis_get_friendly_name(struct net_device *net,
+ struct rndis_device *rndis_device,
+ struct netvsc_device *net_device)
+{
+ ucs2_char_t wname[256];
+ unsigned long len;
+ u8 ifalias[256];
+ u32 size;
+
+ size = sizeof(wname);
+ if (rndis_filter_query_device(rndis_device, net_device,
+ RNDIS_OID_GEN_FRIENDLY_NAME,
+ wname, &size) != 0)
+ return;
+
+ /* Convert Windows Unicode string to UTF-8 */
+ len = ucs2_as_utf8(ifalias, wname, sizeof(ifalias));
+
+ /* ignore the default value from host */
+ if (strcmp(ifalias, "Network Adapter") != 0)
+ dev_set_alias(net, ifalias, len);
+}
+
struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
struct netvsc_device_info *device_info)
{
@@ -1276,6 +1300,10 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
memcpy(device_info->mac_adr, rndis_device->hw_mac_adr, ETH_ALEN);
+ /* Get friendly name as ifalias*/
+ if (!net->ifalias)
+ rndis_get_friendly_name(net, rndis_device, net_device);
+
/* Query and set hardware capabilities */
ret = rndis_netdev_set_hwcaps(rndis_device, net_device);
if (ret != 0)
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 725f4b4afc6d..adde8fc45588 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -514,6 +514,7 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
const struct macvlan_dev *vlan = netdev_priv(dev);
const struct macvlan_port *port = vlan->port;
const struct macvlan_dev *dest;
+ void *accel_priv = NULL;
if (vlan->mode == MACVLAN_MODE_BRIDGE) {
const struct ethhdr *eth = (void *)skb->data;
@@ -533,9 +534,14 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
}
}
+ /* For packets that are non-multicast and not bridged we will pass
+ * the necessary information so that the lowerdev can distinguish
+ * the source of the packets via the accel_priv value.
+ */
+ accel_priv = vlan->accel_priv;
xmit_world:
skb->dev = vlan->lowerdev;
- return dev_queue_xmit(skb);
+ return dev_queue_xmit_accel(skb, accel_priv);
}
static inline netdev_tx_t macvlan_netpoll_send_skb(struct macvlan_dev *vlan, struct sk_buff *skb)
@@ -552,19 +558,14 @@ static inline netdev_tx_t macvlan_netpoll_send_skb(struct macvlan_dev *vlan, str
static netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
struct net_device *dev)
{
+ struct macvlan_dev *vlan = netdev_priv(dev);
unsigned int len = skb->len;
int ret;
- struct macvlan_dev *vlan = netdev_priv(dev);
if (unlikely(netpoll_tx_running(dev)))
return macvlan_netpoll_send_skb(vlan, skb);
- if (vlan->fwd_priv) {
- skb->dev = vlan->lowerdev;
- ret = dev_queue_xmit_accel(skb, vlan->fwd_priv);
- } else {
- ret = macvlan_queue_xmit(skb, dev);
- }
+ ret = macvlan_queue_xmit(skb, dev);
if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
struct vlan_pcpu_stats *pcpu_stats;
@@ -613,26 +614,27 @@ static int macvlan_open(struct net_device *dev)
goto hash_add;
}
- if (lowerdev->features & NETIF_F_HW_L2FW_DOFFLOAD) {
- vlan->fwd_priv =
- lowerdev->netdev_ops->ndo_dfwd_add_station(lowerdev, dev);
-
- /* If we get a NULL pointer back, or if we get an error
- * then we should just fall through to the non accelerated path
- */
- if (IS_ERR_OR_NULL(vlan->fwd_priv)) {
- vlan->fwd_priv = NULL;
- } else
- return 0;
- }
-
err = -EBUSY;
if (macvlan_addr_busy(vlan->port, dev->dev_addr))
goto out;
- err = dev_uc_add(lowerdev, dev->dev_addr);
- if (err < 0)
- goto out;
+ /* Attempt to populate accel_priv which is used to offload the L2
+ * forwarding requests for unicast packets.
+ */
+ if (lowerdev->features & NETIF_F_HW_L2FW_DOFFLOAD)
+ vlan->accel_priv =
+ lowerdev->netdev_ops->ndo_dfwd_add_station(lowerdev, dev);
+
+ /* If earlier attempt to offload failed, or accel_priv is not
+ * populated we must add the unicast address to the lower device.
+ */
+ if (IS_ERR_OR_NULL(vlan->accel_priv)) {
+ vlan->accel_priv = NULL;
+ err = dev_uc_add(lowerdev, dev->dev_addr);
+ if (err < 0)
+ goto out;
+ }
+
if (dev->flags & IFF_ALLMULTI) {
err = dev_set_allmulti(lowerdev, 1);
if (err < 0)
@@ -653,13 +655,14 @@ clear_multi:
if (dev->flags & IFF_ALLMULTI)
dev_set_allmulti(lowerdev, -1);
del_unicast:
- dev_uc_del(lowerdev, dev->dev_addr);
-out:
- if (vlan->fwd_priv) {
+ if (vlan->accel_priv) {
lowerdev->netdev_ops->ndo_dfwd_del_station(lowerdev,
- vlan->fwd_priv);
- vlan->fwd_priv = NULL;
+ vlan->accel_priv);
+ vlan->accel_priv = NULL;
+ } else {
+ dev_uc_del(lowerdev, dev->dev_addr);
}
+out:
return err;
}
@@ -668,11 +671,10 @@ static int macvlan_stop(struct net_device *dev)
struct macvlan_dev *vlan = netdev_priv(dev);
struct net_device *lowerdev = vlan->lowerdev;
- if (vlan->fwd_priv) {
+ if (vlan->accel_priv) {
lowerdev->netdev_ops->ndo_dfwd_del_station(lowerdev,
- vlan->fwd_priv);
- vlan->fwd_priv = NULL;
- return 0;
+ vlan->accel_priv);
+ vlan->accel_priv = NULL;
}
dev_uc_unsync(lowerdev, dev);
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index bdfbabb86ee0..edb8b9ab827f 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -218,6 +218,12 @@ config AQUANTIA_PHY
---help---
Currently supports the Aquantia AQ1202, AQ2104, AQR105, AQR405
+config ASIX_PHY
+ tristate "Asix PHYs"
+ help
+ Currently supports the Asix Electronics PHY found in the X-Surf 100
+ AX88796B package.
+
config AT803X_PHY
tristate "AT803X PHYs"
---help---
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index 01acbcb2c798..701ca0b8717e 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -45,6 +45,7 @@ obj-y += $(sfp-obj-y) $(sfp-obj-m)
obj-$(CONFIG_AMD_PHY) += amd.o
obj-$(CONFIG_AQUANTIA_PHY) += aquantia.o
+obj-$(CONFIG_ASIX_PHY) += asix.o
obj-$(CONFIG_AT803X_PHY) += at803x.o
obj-$(CONFIG_BCM63XX_PHY) += bcm63xx.o
obj-$(CONFIG_BCM7XXX_PHY) += bcm7xxx.o
diff --git a/drivers/net/phy/asix.c b/drivers/net/phy/asix.c
new file mode 100644
index 000000000000..8ebe7f5484ae
--- /dev/null
+++ b/drivers/net/phy/asix.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Driver for Asix PHYs
+ *
+ * Author: Michael Schmitz <schmitzmic@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mii.h>
+#include <linux/phy.h>
+
+#define PHY_ID_ASIX_AX88796B 0x003b1841
+
+MODULE_DESCRIPTION("Asix PHY driver");
+MODULE_AUTHOR("Michael Schmitz <schmitzmic@gmail.com>");
+MODULE_LICENSE("GPL");
+
+/**
+ * asix_soft_reset - software reset the PHY via BMCR_RESET bit
+ * @phydev: target phy_device struct
+ *
+ * Description: Perform a software PHY reset using the standard
+ * BMCR_RESET bit and poll for the reset bit to be cleared.
+ * Toggle BMCR_RESET bit off to accommodate broken AX8796B PHY implementation
+ * such as used on the Individual Computers' X-Surf 100 Zorro card.
+ *
+ * Returns: 0 on success, < 0 on failure
+ */
+static int asix_soft_reset(struct phy_device *phydev)
+{
+ int ret;
+
+ /* Asix PHY won't reset unless reset bit toggles */
+ ret = phy_write(phydev, MII_BMCR, 0);
+ if (ret < 0)
+ return ret;
+
+ return genphy_soft_reset(phydev);
+}
+
+static struct phy_driver asix_driver[] = { {
+ .phy_id = PHY_ID_ASIX_AX88796B,
+ .name = "Asix Electronics AX88796B",
+ .phy_id_mask = 0xfffffff0,
+ .features = PHY_BASIC_FEATURES,
+ .soft_reset = asix_soft_reset,
+} };
+
+module_phy_driver(asix_driver);
+
+static struct mdio_device_id __maybe_unused asix_tbl[] = {
+ { PHY_ID_ASIX_AX88796B, 0xfffffff0 },
+ { }
+};
+
+MODULE_DEVICE_TABLE(mdio, asix_tbl);
diff --git a/drivers/net/phy/bcm-phy-lib.c b/drivers/net/phy/bcm-phy-lib.c
index 5ad130c3da43..0876aec7328c 100644
--- a/drivers/net/phy/bcm-phy-lib.c
+++ b/drivers/net/phy/bcm-phy-lib.c
@@ -346,10 +346,6 @@ void bcm_phy_get_strings(struct phy_device *phydev, u8 *data)
}
EXPORT_SYMBOL_GPL(bcm_phy_get_strings);
-#ifndef UINT64_MAX
-#define UINT64_MAX (u64)(~((u64)0))
-#endif
-
/* Caller is supposed to provide appropriate storage for the library code to
* access the shadow copy
*/
@@ -362,7 +358,7 @@ static u64 bcm_phy_get_stat(struct phy_device *phydev, u64 *shadow,
val = phy_read(phydev, stat.reg);
if (val < 0) {
- ret = UINT64_MAX;
+ ret = U64_MAX;
} else {
val >>= stat.shift;
val = val & ((1 << stat.bits) - 1);
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 25e2a099b71c..b8f57e9b9379 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -1482,9 +1482,6 @@ static void marvell_get_strings(struct phy_device *phydev, u8 *data)
}
}
-#ifndef UINT64_MAX
-#define UINT64_MAX (u64)(~((u64)0))
-#endif
static u64 marvell_get_stat(struct phy_device *phydev, int i)
{
struct marvell_hw_stat stat = marvell_hw_stats[i];
@@ -1494,7 +1491,7 @@ static u64 marvell_get_stat(struct phy_device *phydev, int i)
val = phy_read_paged(phydev, stat.page, stat.reg);
if (val < 0) {
- ret = UINT64_MAX;
+ ret = U64_MAX;
} else {
val = val & ((1 << stat.bits) - 1);
priv->stats[i] += val;
diff --git a/drivers/net/phy/mdio-bitbang.c b/drivers/net/phy/mdio-bitbang.c
index 403b085f0a89..15352f987bdf 100644
--- a/drivers/net/phy/mdio-bitbang.c
+++ b/drivers/net/phy/mdio-bitbang.c
@@ -205,14 +205,6 @@ static int mdiobb_write(struct mii_bus *bus, int phy, int reg, u16 val)
return 0;
}
-static int mdiobb_reset(struct mii_bus *bus)
-{
- struct mdiobb_ctrl *ctrl = bus->priv;
- if (ctrl->reset)
- ctrl->reset(bus);
- return 0;
-}
-
struct mii_bus *alloc_mdio_bitbang(struct mdiobb_ctrl *ctrl)
{
struct mii_bus *bus;
@@ -225,7 +217,6 @@ struct mii_bus *alloc_mdio_bitbang(struct mdiobb_ctrl *ctrl)
bus->read = mdiobb_read;
bus->write = mdiobb_write;
- bus->reset = mdiobb_reset;
bus->priv = ctrl;
return bus;
diff --git a/drivers/net/phy/mdio-boardinfo.c b/drivers/net/phy/mdio-boardinfo.c
index 1861f387820d..863496fa5d13 100644
--- a/drivers/net/phy/mdio-boardinfo.c
+++ b/drivers/net/phy/mdio-boardinfo.c
@@ -30,17 +30,20 @@ void mdiobus_setup_mdiodev_from_board_info(struct mii_bus *bus,
struct mdio_board_info *bi))
{
struct mdio_board_entry *be;
+ struct mdio_board_entry *tmp;
struct mdio_board_info *bi;
int ret;
mutex_lock(&mdio_board_lock);
- list_for_each_entry(be, &mdio_board_list, list) {
+ list_for_each_entry_safe(be, tmp, &mdio_board_list, list) {
bi = &be->board_info;
if (strcmp(bus->id, bi->bus_id))
continue;
+ mutex_unlock(&mdio_board_lock);
ret = cb(bus, bi);
+ mutex_lock(&mdio_board_lock);
if (ret)
continue;
diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c
index 4333c6e14742..b501221819e1 100644
--- a/drivers/net/phy/mdio-gpio.c
+++ b/drivers/net/phy/mdio-gpio.c
@@ -24,8 +24,10 @@
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/platform_device.h>
+#include <linux/mdio-bitbang.h>
+#include <linux/mdio-gpio.h>
#include <linux/gpio.h>
-#include <linux/platform_data/mdio-gpio.h>
+#include <linux/gpio/consumer.h>
#include <linux/of_gpio.h>
#include <linux/of_mdio.h>
@@ -35,37 +37,22 @@ struct mdio_gpio_info {
struct gpio_desc *mdc, *mdio, *mdo;
};
-static void *mdio_gpio_of_get_data(struct platform_device *pdev)
+static int mdio_gpio_get_data(struct device *dev,
+ struct mdio_gpio_info *bitbang)
{
- struct device_node *np = pdev->dev.of_node;
- struct mdio_gpio_platform_data *pdata;
- enum of_gpio_flags flags;
- int ret;
-
- pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
- if (!pdata)
- return NULL;
-
- ret = of_get_gpio_flags(np, 0, &flags);
- if (ret < 0)
- return NULL;
-
- pdata->mdc = ret;
- pdata->mdc_active_low = flags & OF_GPIO_ACTIVE_LOW;
-
- ret = of_get_gpio_flags(np, 1, &flags);
- if (ret < 0)
- return NULL;
- pdata->mdio = ret;
- pdata->mdio_active_low = flags & OF_GPIO_ACTIVE_LOW;
-
- ret = of_get_gpio_flags(np, 2, &flags);
- if (ret > 0) {
- pdata->mdo = ret;
- pdata->mdo_active_low = flags & OF_GPIO_ACTIVE_LOW;
- }
-
- return pdata;
+ bitbang->mdc = devm_gpiod_get_index(dev, NULL, MDIO_GPIO_MDC,
+ GPIOD_OUT_LOW);
+ if (IS_ERR(bitbang->mdc))
+ return PTR_ERR(bitbang->mdc);
+
+ bitbang->mdio = devm_gpiod_get_index(dev, NULL, MDIO_GPIO_MDIO,
+ GPIOD_IN);
+ if (IS_ERR(bitbang->mdio))
+ return PTR_ERR(bitbang->mdio);
+
+ bitbang->mdo = devm_gpiod_get_index_optional(dev, NULL, MDIO_GPIO_MDO,
+ GPIOD_OUT_LOW);
+ return PTR_ERR_OR_ZERO(bitbang->mdo);
}
static void mdio_dir(struct mdiobb_ctrl *ctrl, int dir)
@@ -125,78 +112,28 @@ static const struct mdiobb_ops mdio_gpio_ops = {
};
static struct mii_bus *mdio_gpio_bus_init(struct device *dev,
- struct mdio_gpio_platform_data *pdata,
+ struct mdio_gpio_info *bitbang,
int bus_id)
{
struct mii_bus *new_bus;
- struct mdio_gpio_info *bitbang;
- int i;
- int mdc, mdio, mdo;
- unsigned long mdc_flags = GPIOF_OUT_INIT_LOW;
- unsigned long mdio_flags = GPIOF_DIR_IN;
- unsigned long mdo_flags = GPIOF_OUT_INIT_HIGH;
-
- bitbang = devm_kzalloc(dev, sizeof(*bitbang), GFP_KERNEL);
- if (!bitbang)
- goto out;
bitbang->ctrl.ops = &mdio_gpio_ops;
- bitbang->ctrl.reset = pdata->reset;
- mdc = pdata->mdc;
- bitbang->mdc = gpio_to_desc(mdc);
- if (pdata->mdc_active_low)
- mdc_flags = GPIOF_OUT_INIT_HIGH | GPIOF_ACTIVE_LOW;
- mdio = pdata->mdio;
- bitbang->mdio = gpio_to_desc(mdio);
- if (pdata->mdio_active_low)
- mdio_flags |= GPIOF_ACTIVE_LOW;
- mdo = pdata->mdo;
- if (mdo) {
- bitbang->mdo = gpio_to_desc(mdo);
- if (pdata->mdo_active_low)
- mdo_flags = GPIOF_OUT_INIT_LOW | GPIOF_ACTIVE_LOW;
- }
new_bus = alloc_mdio_bitbang(&bitbang->ctrl);
if (!new_bus)
- goto out;
-
- new_bus->name = "GPIO Bitbanged MDIO",
+ return NULL;
- new_bus->phy_mask = pdata->phy_mask;
- new_bus->phy_ignore_ta_mask = pdata->phy_ignore_ta_mask;
- memcpy(new_bus->irq, pdata->irqs, sizeof(new_bus->irq));
+ new_bus->name = "GPIO Bitbanged MDIO";
new_bus->parent = dev;
- if (new_bus->phy_mask == ~0)
- goto out_free_bus;
-
- for (i = 0; i < PHY_MAX_ADDR; i++)
- if (!new_bus->irq[i])
- new_bus->irq[i] = PHY_POLL;
-
if (bus_id != -1)
snprintf(new_bus->id, MII_BUS_ID_SIZE, "gpio-%x", bus_id);
else
strncpy(new_bus->id, "gpio", MII_BUS_ID_SIZE);
- if (devm_gpio_request_one(dev, mdc, mdc_flags, "mdc"))
- goto out_free_bus;
-
- if (devm_gpio_request_one(dev, mdio, mdio_flags, "mdio"))
- goto out_free_bus;
-
- if (mdo && devm_gpio_request_one(dev, mdo, mdo_flags, "mdo"))
- goto out_free_bus;
-
dev_set_drvdata(dev, new_bus);
return new_bus;
-
-out_free_bus:
- free_mdio_bitbang(new_bus);
-out:
- return NULL;
}
static void mdio_gpio_bus_deinit(struct device *dev)
@@ -216,26 +153,29 @@ static void mdio_gpio_bus_destroy(struct device *dev)
static int mdio_gpio_probe(struct platform_device *pdev)
{
- struct mdio_gpio_platform_data *pdata;
+ struct mdio_gpio_info *bitbang;
struct mii_bus *new_bus;
int ret, bus_id;
+ bitbang = devm_kzalloc(&pdev->dev, sizeof(*bitbang), GFP_KERNEL);
+ if (!bitbang)
+ return -ENOMEM;
+
+ ret = mdio_gpio_get_data(&pdev->dev, bitbang);
+ if (ret)
+ return ret;
+
if (pdev->dev.of_node) {
- pdata = mdio_gpio_of_get_data(pdev);
bus_id = of_alias_get_id(pdev->dev.of_node, "mdio-gpio");
if (bus_id < 0) {
dev_warn(&pdev->dev, "failed to get alias id\n");
bus_id = 0;
}
} else {
- pdata = dev_get_platdata(&pdev->dev);
bus_id = pdev->id;
}
- if (!pdata)
- return -ENODEV;
-
- new_bus = mdio_gpio_bus_init(&pdev->dev, pdata, bus_id);
+ new_bus = mdio_gpio_bus_init(&pdev->dev, bitbang, bus_id);
if (!new_bus)
return -ENODEV;
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index f41b224a9cdb..de31c5170a5b 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -650,9 +650,6 @@ static void kszphy_get_strings(struct phy_device *phydev, u8 *data)
}
}
-#ifndef UINT64_MAX
-#define UINT64_MAX (u64)(~((u64)0))
-#endif
static u64 kszphy_get_stat(struct phy_device *phydev, int i)
{
struct kszphy_hw_stat stat = kszphy_hw_stats[i];
@@ -662,7 +659,7 @@ static u64 kszphy_get_stat(struct phy_device *phydev, int i)
val = phy_read(phydev, stat.reg);
if (val < 0) {
- ret = UINT64_MAX;
+ ret = U64_MAX;
} else {
val = val & ((1 << stat.bits) - 1);
priv->stats[i] += val;
diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c
index a97ac8c12c4c..2d67937866a3 100644
--- a/drivers/net/phy/microchip.c
+++ b/drivers/net/phy/microchip.c
@@ -21,6 +21,8 @@
#include <linux/phy.h>
#include <linux/microchipphy.h>
#include <linux/delay.h>
+#include <linux/of.h>
+#include <dt-bindings/net/microchip-lan78xx.h>
#define DRIVER_AUTHOR "WOOJUNG HUH <woojung.huh@microchip.com>"
#define DRIVER_DESC "Microchip LAN88XX PHY driver"
@@ -225,6 +227,8 @@ static int lan88xx_probe(struct phy_device *phydev)
{
struct device *dev = &phydev->mdio.dev;
struct lan88xx_priv *priv;
+ u32 led_modes[4];
+ int len;
priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
if (!priv)
@@ -232,6 +236,27 @@ static int lan88xx_probe(struct phy_device *phydev)
priv->wolopts = 0;
+ len = of_property_read_variable_u32_array(dev->of_node,
+ "microchip,led-modes",
+ led_modes,
+ 0,
+ ARRAY_SIZE(led_modes));
+ if (len >= 0) {
+ u32 reg = 0;
+ int i;
+
+ for (i = 0; i < len; i++) {
+ if (led_modes[i] > 15)
+ return -EINVAL;
+ reg |= led_modes[i] << (i * 4);
+ }
+ for (; i < ARRAY_SIZE(led_modes); i++)
+ reg |= LAN78XX_FORCE_LED_OFF << (i * 4);
+ (void)phy_write(phydev, LAN78XX_PHY_LED_MODE_SELECT, reg);
+ } else if (len == -EOVERFLOW) {
+ return -EINVAL;
+ }
+
/* these values can be used to identify internal PHY */
priv->chip_id = phy_read_mmd(phydev, 3, LAN88XX_MMD3_CHIP_ID);
priv->chip_rev = phy_read_mmd(phydev, 3, LAN88XX_MMD3_CHIP_REV);
diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c
index be399d645224..c328208388da 100644
--- a/drivers/net/phy/smsc.c
+++ b/drivers/net/phy/smsc.c
@@ -168,9 +168,6 @@ static void smsc_get_strings(struct phy_device *phydev, u8 *data)
}
}
-#ifndef UINT64_MAX
-#define UINT64_MAX (u64)(~((u64)0))
-#endif
static u64 smsc_get_stat(struct phy_device *phydev, int i)
{
struct smsc_hw_stat stat = smsc_hw_stats[i];
@@ -179,7 +176,7 @@ static u64 smsc_get_stat(struct phy_device *phydev, int i)
val = phy_read(phydev, stat.reg);
if (val < 0)
- ret = UINT64_MAX;
+ ret = U64_MAX;
else
ret = val;
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index ddb6bf85a59c..9dbd390ace34 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -2942,7 +2942,7 @@ static int team_device_event(struct notifier_block *unused,
case NETDEV_CHANGE:
if (netif_running(port->dev))
team_port_change_check(port,
- !!netif_carrier_ok(port->dev));
+ !!netif_oper_up(port->dev));
break;
case NETDEV_UNREGISTER:
team_del_slave(port->team->dev, dev);
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index ef33950a45d9..d3c04ab9752a 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -248,11 +248,11 @@ struct veth {
__be16 h_vlan_TCI;
};
-bool tun_is_xdp_buff(void *ptr)
+bool tun_is_xdp_frame(void *ptr)
{
return (unsigned long)ptr & TUN_XDP_FLAG;
}
-EXPORT_SYMBOL(tun_is_xdp_buff);
+EXPORT_SYMBOL(tun_is_xdp_frame);
void *tun_xdp_to_ptr(void *ptr)
{
@@ -525,11 +525,6 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
rcu_read_lock();
- /* We may get a very small possibility of OOO during switching, not
- * worth to optimize.*/
- if (tun->numqueues == 1 || tfile->detached)
- goto unlock;
-
e = tun_flow_find(head, rxhash);
if (likely(e)) {
/* TODO: keep queueing to old queue until it's empty? */
@@ -548,7 +543,6 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
spin_unlock_bh(&tun->lock);
}
-unlock:
rcu_read_unlock();
}
@@ -660,10 +654,10 @@ void tun_ptr_free(void *ptr)
{
if (!ptr)
return;
- if (tun_is_xdp_buff(ptr)) {
- struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
+ if (tun_is_xdp_frame(ptr)) {
+ struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);
- put_page(virt_to_head_page(xdp->data));
+ xdp_return_frame(xdpf);
} else {
__skb_array_destroy_skb(ptr);
}
@@ -854,6 +848,12 @@ static int tun_attach(struct tun_struct *tun, struct file *file,
tun->dev, tfile->queue_index);
if (err < 0)
goto out;
+ err = xdp_rxq_info_reg_mem_model(&tfile->xdp_rxq,
+ MEM_TYPE_PAGE_SHARED, NULL);
+ if (err < 0) {
+ xdp_rxq_info_unreg(&tfile->xdp_rxq);
+ goto out;
+ }
err = 0;
}
@@ -1290,21 +1290,13 @@ static const struct net_device_ops tun_netdev_ops = {
.ndo_get_stats64 = tun_net_get_stats64,
};
-static int tun_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
+static int tun_xdp_xmit(struct net_device *dev, struct xdp_frame *frame)
{
struct tun_struct *tun = netdev_priv(dev);
- struct xdp_buff *buff = xdp->data_hard_start;
- int headroom = xdp->data - xdp->data_hard_start;
struct tun_file *tfile;
u32 numqueues;
int ret = 0;
- /* Assure headroom is available and buff is properly aligned */
- if (unlikely(headroom < sizeof(*xdp) || tun_is_xdp_buff(xdp)))
- return -ENOSPC;
-
- *buff = *xdp;
-
rcu_read_lock();
numqueues = READ_ONCE(tun->numqueues);
@@ -1318,7 +1310,7 @@ static int tun_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
/* Encode the XDP flag into lowest bit for consumer to differ
* XDP buffer from sk_buff.
*/
- if (ptr_ring_produce(&tfile->tx_ring, tun_xdp_to_ptr(buff))) {
+ if (ptr_ring_produce(&tfile->tx_ring, tun_xdp_to_ptr(frame))) {
this_cpu_inc(tun->pcpu_stats->tx_dropped);
ret = -ENOSPC;
}
@@ -1328,6 +1320,16 @@ out:
return ret;
}
+static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
+{
+ struct xdp_frame *frame = convert_to_xdp_frame(xdp);
+
+ if (unlikely(!frame))
+ return -EOVERFLOW;
+
+ return tun_xdp_xmit(dev, frame);
+}
+
static void tun_xdp_flush(struct net_device *dev)
{
struct tun_struct *tun = netdev_priv(dev);
@@ -1675,7 +1677,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
case XDP_TX:
get_page(alloc_frag->page);
alloc_frag->offset += buflen;
- if (tun_xdp_xmit(tun->dev, &xdp))
+ if (tun_xdp_tx(tun->dev, &xdp))
goto err_redirect;
tun_xdp_flush(tun->dev);
rcu_read_unlock();
@@ -1683,6 +1685,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
return NULL;
case XDP_PASS:
delta = orig_data - xdp.data;
+ len = xdp.data_end - xdp.data;
break;
default:
bpf_warn_invalid_xdp_action(act);
@@ -1703,7 +1706,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
}
skb_reserve(skb, pad - delta);
- skb_put(skb, len + delta);
+ skb_put(skb, len);
get_page(alloc_frag->page);
alloc_frag->offset += buflen;
@@ -1924,10 +1927,13 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
rcu_read_unlock();
}
- rcu_read_lock();
- if (!rcu_dereference(tun->steering_prog))
+ /* Compute the costly rx hash only if needed for flow updates.
+ * We may get a very small possibility of OOO during switching, not
+ * worth to optimize.
+ */
+ if (!rcu_access_pointer(tun->steering_prog) && tun->numqueues > 1 &&
+ !tfile->detached)
rxhash = __skb_get_hash_symmetric(skb);
- rcu_read_unlock();
if (frags) {
/* Exercise flow dissector code path. */
@@ -1996,11 +2002,11 @@ static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from)
static ssize_t tun_put_user_xdp(struct tun_struct *tun,
struct tun_file *tfile,
- struct xdp_buff *xdp,
+ struct xdp_frame *xdp_frame,
struct iov_iter *iter)
{
int vnet_hdr_sz = 0;
- size_t size = xdp->data_end - xdp->data;
+ size_t size = xdp_frame->len;
struct tun_pcpu_stats *stats;
size_t ret;
@@ -2016,7 +2022,7 @@ static ssize_t tun_put_user_xdp(struct tun_struct *tun,
iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso));
}
- ret = copy_to_iter(xdp->data, size, iter) + vnet_hdr_sz;
+ ret = copy_to_iter(xdp_frame->data, size, iter) + vnet_hdr_sz;
stats = get_cpu_ptr(tun->pcpu_stats);
u64_stats_update_begin(&stats->syncp);
@@ -2184,11 +2190,11 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
return err;
}
- if (tun_is_xdp_buff(ptr)) {
- struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
+ if (tun_is_xdp_frame(ptr)) {
+ struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);
- ret = tun_put_user_xdp(tun, tfile, xdp, to);
- put_page(virt_to_head_page(xdp->data));
+ ret = tun_put_user_xdp(tun, tfile, xdpf, to);
+ xdp_return_frame(xdpf);
} else {
struct sk_buff *skb = ptr;
@@ -2427,10 +2433,10 @@ out_free:
static int tun_ptr_peek_len(void *ptr)
{
if (likely(ptr)) {
- if (tun_is_xdp_buff(ptr)) {
- struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
+ if (tun_is_xdp_frame(ptr)) {
+ struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);
- return xdp->data_end - xdp->data;
+ return xdpf->len;
}
return __skb_array_len_with_tag(ptr);
} else {
diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig
index f28bd74ac275..418b0904cecb 100644
--- a/drivers/net/usb/Kconfig
+++ b/drivers/net/usb/Kconfig
@@ -111,6 +111,7 @@ config USB_LAN78XX
select MII
select PHYLIB
select MICROCHIP_PHY
+ select FIXED_PHY
help
This option adds support for Microchip LAN78XX based USB 2
& USB 3 10/100/1000 Ethernet adapters.
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 0867f7275852..91761436709a 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -36,13 +36,14 @@
#include <linux/irq.h>
#include <linux/irqchip/chained_irq.h>
#include <linux/microchipphy.h>
-#include <linux/phy.h>
+#include <linux/phy_fixed.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
#include "lan78xx.h"
#define DRIVER_AUTHOR "WOOJUNG HUH <woojung.huh@microchip.com>"
#define DRIVER_DESC "LAN78XX USB 3.0 Gigabit Ethernet Devices"
#define DRIVER_NAME "lan78xx"
-#define DRIVER_VERSION "1.0.6"
#define TX_TIMEOUT_JIFFIES (5 * HZ)
#define THROTTLE_JIFFIES (HZ / 8)
@@ -278,6 +279,30 @@ struct lan78xx_statstage64 {
u64 eee_tx_lpi_time;
};
+static u32 lan78xx_regs[] = {
+ ID_REV,
+ INT_STS,
+ HW_CFG,
+ PMT_CTL,
+ E2P_CMD,
+ E2P_DATA,
+ USB_STATUS,
+ VLAN_TYPE,
+ MAC_CR,
+ MAC_RX,
+ MAC_TX,
+ FLOW,
+ ERR_STS,
+ MII_ACC,
+ MII_DATA,
+ EEE_TX_LPI_REQ_DLY,
+ EEE_TW_TX_SYS,
+ EEE_TX_LPI_REM_DLY,
+ WUCSR
+};
+
+#define PHY_REG_SIZE (32 * sizeof(u32))
+
struct lan78xx_net;
struct lan78xx_priv {
@@ -1477,7 +1502,6 @@ static void lan78xx_get_drvinfo(struct net_device *net,
struct lan78xx_net *dev = netdev_priv(net);
strncpy(info->driver, DRIVER_NAME, sizeof(info->driver));
- strncpy(info->version, DRIVER_VERSION, sizeof(info->version));
usb_make_path(dev->udev, info->bus_info, sizeof(info->bus_info));
}
@@ -1605,6 +1629,34 @@ exit:
return ret;
}
+static int lan78xx_get_regs_len(struct net_device *netdev)
+{
+ if (!netdev->phydev)
+ return (sizeof(lan78xx_regs));
+ else
+ return (sizeof(lan78xx_regs) + PHY_REG_SIZE);
+}
+
+static void
+lan78xx_get_regs(struct net_device *netdev, struct ethtool_regs *regs,
+ void *buf)
+{
+ u32 *data = buf;
+ int i, j;
+ struct lan78xx_net *dev = netdev_priv(netdev);
+
+ /* Read Device/MAC registers */
+ for (i = 0; i < (sizeof(lan78xx_regs) / sizeof(u32)); i++)
+ lan78xx_read_reg(dev, lan78xx_regs[i], &data[i]);
+
+ if (!netdev->phydev)
+ return;
+
+ /* Read PHY registers */
+ for (j = 0; j < 32; i++, j++)
+ data[i] = phy_read(netdev->phydev, j);
+}
+
static const struct ethtool_ops lan78xx_ethtool_ops = {
.get_link = lan78xx_get_link,
.nway_reset = phy_ethtool_nway_reset,
@@ -1625,6 +1677,8 @@ static const struct ethtool_ops lan78xx_ethtool_ops = {
.set_pauseparam = lan78xx_set_pause,
.get_link_ksettings = lan78xx_get_link_ksettings,
.set_link_ksettings = lan78xx_set_link_ksettings,
+ .get_regs_len = lan78xx_get_regs_len,
+ .get_regs = lan78xx_get_regs,
};
static int lan78xx_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd)
@@ -1652,34 +1706,31 @@ static void lan78xx_init_mac_address(struct lan78xx_net *dev)
addr[5] = (addr_hi >> 8) & 0xFF;
if (!is_valid_ether_addr(addr)) {
- /* reading mac address from EEPROM or OTP */
- if ((lan78xx_read_eeprom(dev, EEPROM_MAC_OFFSET, ETH_ALEN,
- addr) == 0) ||
- (lan78xx_read_otp(dev, EEPROM_MAC_OFFSET, ETH_ALEN,
- addr) == 0)) {
- if (is_valid_ether_addr(addr)) {
- /* eeprom values are valid so use them */
- netif_dbg(dev, ifup, dev->net,
- "MAC address read from EEPROM");
- } else {
- /* generate random MAC */
- random_ether_addr(addr);
- netif_dbg(dev, ifup, dev->net,
- "MAC address set to random addr");
- }
-
- addr_lo = addr[0] | (addr[1] << 8) |
- (addr[2] << 16) | (addr[3] << 24);
- addr_hi = addr[4] | (addr[5] << 8);
-
- ret = lan78xx_write_reg(dev, RX_ADDRL, addr_lo);
- ret = lan78xx_write_reg(dev, RX_ADDRH, addr_hi);
+ if (!eth_platform_get_mac_address(&dev->udev->dev, addr)) {
+ /* valid address present in Device Tree */
+ netif_dbg(dev, ifup, dev->net,
+ "MAC address read from Device Tree");
+ } else if (((lan78xx_read_eeprom(dev, EEPROM_MAC_OFFSET,
+ ETH_ALEN, addr) == 0) ||
+ (lan78xx_read_otp(dev, EEPROM_MAC_OFFSET,
+ ETH_ALEN, addr) == 0)) &&
+ is_valid_ether_addr(addr)) {
+ /* eeprom values are valid so use them */
+ netif_dbg(dev, ifup, dev->net,
+ "MAC address read from EEPROM");
} else {
/* generate random MAC */
random_ether_addr(addr);
netif_dbg(dev, ifup, dev->net,
"MAC address set to random addr");
}
+
+ addr_lo = addr[0] | (addr[1] << 8) |
+ (addr[2] << 16) | (addr[3] << 24);
+ addr_hi = addr[4] | (addr[5] << 8);
+
+ ret = lan78xx_write_reg(dev, RX_ADDRL, addr_lo);
+ ret = lan78xx_write_reg(dev, RX_ADDRH, addr_hi);
}
ret = lan78xx_write_reg(dev, MAF_LO(0), addr_lo);
@@ -1762,6 +1813,7 @@ done:
static int lan78xx_mdio_init(struct lan78xx_net *dev)
{
+ struct device_node *node;
int ret;
dev->mdiobus = mdiobus_alloc();
@@ -1790,7 +1842,13 @@ static int lan78xx_mdio_init(struct lan78xx_net *dev)
break;
}
- ret = mdiobus_register(dev->mdiobus);
+ node = of_get_child_by_name(dev->udev->dev.of_node, "mdio");
+ if (node) {
+ ret = of_mdiobus_register(dev->mdiobus, node);
+ of_node_put(node);
+ } else {
+ ret = mdiobus_register(dev->mdiobus);
+ }
if (ret) {
netdev_err(dev->net, "can't register MDIO bus\n");
goto exit1;
@@ -2003,52 +2061,91 @@ static int ksz9031rnx_fixup(struct phy_device *phydev)
return 1;
}
-static int lan78xx_phy_init(struct lan78xx_net *dev)
+static struct phy_device *lan7801_phy_init(struct lan78xx_net *dev)
{
+ u32 buf;
int ret;
- u32 mii_adv;
+ struct fixed_phy_status fphy_status = {
+ .link = 1,
+ .speed = SPEED_1000,
+ .duplex = DUPLEX_FULL,
+ };
struct phy_device *phydev;
phydev = phy_find_first(dev->mdiobus);
if (!phydev) {
- netdev_err(dev->net, "no PHY found\n");
- return -EIO;
- }
-
- if ((dev->chipid == ID_REV_CHIP_ID_7800_) ||
- (dev->chipid == ID_REV_CHIP_ID_7850_)) {
- phydev->is_internal = true;
- dev->interface = PHY_INTERFACE_MODE_GMII;
-
- } else if (dev->chipid == ID_REV_CHIP_ID_7801_) {
+ netdev_dbg(dev->net, "PHY Not Found!! Registering Fixed PHY\n");
+ phydev = fixed_phy_register(PHY_POLL, &fphy_status, -1,
+ NULL);
+ if (IS_ERR(phydev)) {
+ netdev_err(dev->net, "No PHY/fixed_PHY found\n");
+ return NULL;
+ }
+ netdev_dbg(dev->net, "Registered FIXED PHY\n");
+ dev->interface = PHY_INTERFACE_MODE_RGMII;
+ ret = lan78xx_write_reg(dev, MAC_RGMII_ID,
+ MAC_RGMII_ID_TXC_DELAY_EN_);
+ ret = lan78xx_write_reg(dev, RGMII_TX_BYP_DLL, 0x3D00);
+ ret = lan78xx_read_reg(dev, HW_CFG, &buf);
+ buf |= HW_CFG_CLK125_EN_;
+ buf |= HW_CFG_REFCLK25_EN_;
+ ret = lan78xx_write_reg(dev, HW_CFG, buf);
+ } else {
if (!phydev->drv) {
netdev_err(dev->net, "no PHY driver found\n");
- return -EIO;
+ return NULL;
}
-
dev->interface = PHY_INTERFACE_MODE_RGMII;
-
/* external PHY fixup for KSZ9031RNX */
ret = phy_register_fixup_for_uid(PHY_KSZ9031RNX, 0xfffffff0,
ksz9031rnx_fixup);
if (ret < 0) {
- netdev_err(dev->net, "fail to register fixup\n");
- return ret;
+ netdev_err(dev->net, "Failed to register fixup for PHY_KSZ9031RNX\n");
+ return NULL;
}
/* external PHY fixup for LAN8835 */
ret = phy_register_fixup_for_uid(PHY_LAN8835, 0xfffffff0,
lan8835_fixup);
if (ret < 0) {
- netdev_err(dev->net, "fail to register fixup\n");
- return ret;
+ netdev_err(dev->net, "Failed to register fixup for PHY_LAN8835\n");
+ return NULL;
}
/* add more external PHY fixup here if needed */
phydev->is_internal = false;
- } else {
- netdev_err(dev->net, "unknown ID found\n");
- ret = -EIO;
- goto error;
+ }
+ return phydev;
+}
+
+static int lan78xx_phy_init(struct lan78xx_net *dev)
+{
+ int ret;
+ u32 mii_adv;
+ struct phy_device *phydev;
+
+ switch (dev->chipid) {
+ case ID_REV_CHIP_ID_7801_:
+ phydev = lan7801_phy_init(dev);
+ if (!phydev) {
+ netdev_err(dev->net, "lan7801: PHY Init Failed");
+ return -EIO;
+ }
+ break;
+
+ case ID_REV_CHIP_ID_7800_:
+ case ID_REV_CHIP_ID_7850_:
+ phydev = phy_find_first(dev->mdiobus);
+ if (!phydev) {
+ netdev_err(dev->net, "no PHY found\n");
+ return -EIO;
+ }
+ phydev->is_internal = true;
+ dev->interface = PHY_INTERFACE_MODE_GMII;
+ break;
+
+ default:
+ netdev_err(dev->net, "Unknown CHIP ID found\n");
+ return -EIO;
}
/* if phyirq is not set, use polling mode in phylib */
@@ -2067,6 +2164,16 @@ static int lan78xx_phy_init(struct lan78xx_net *dev)
if (ret) {
netdev_err(dev->net, "can't attach PHY to %s\n",
dev->mdiobus->id);
+ if (dev->chipid == ID_REV_CHIP_ID_7801_) {
+ if (phy_is_pseudo_fixed_link(phydev)) {
+ fixed_phy_unregister(phydev);
+ } else {
+ phy_unregister_fixup_for_uid(PHY_KSZ9031RNX,
+ 0xfffffff0);
+ phy_unregister_fixup_for_uid(PHY_LAN8835,
+ 0xfffffff0);
+ }
+ }
return -EIO;
}
@@ -2079,17 +2186,33 @@ static int lan78xx_phy_init(struct lan78xx_net *dev)
mii_adv = (u32)mii_advertise_flowctrl(dev->fc_request_control);
phydev->advertising |= mii_adv_to_ethtool_adv_t(mii_adv);
+ if (phydev->mdio.dev.of_node) {
+ u32 reg;
+ int len;
+
+ len = of_property_count_elems_of_size(phydev->mdio.dev.of_node,
+ "microchip,led-modes",
+ sizeof(u32));
+ if (len >= 0) {
+ /* Ensure the appropriate LEDs are enabled */
+ lan78xx_read_reg(dev, HW_CFG, &reg);
+ reg &= ~(HW_CFG_LED0_EN_ |
+ HW_CFG_LED1_EN_ |
+ HW_CFG_LED2_EN_ |
+ HW_CFG_LED3_EN_);
+ reg |= (len > 0) * HW_CFG_LED0_EN_ |
+ (len > 1) * HW_CFG_LED1_EN_ |
+ (len > 2) * HW_CFG_LED2_EN_ |
+ (len > 3) * HW_CFG_LED3_EN_;
+ lan78xx_write_reg(dev, HW_CFG, reg);
+ }
+ }
+
genphy_config_aneg(phydev);
dev->fc_autoneg = phydev->autoneg;
return 0;
-
-error:
- phy_unregister_fixup_for_uid(PHY_KSZ9031RNX, 0xfffffff0);
- phy_unregister_fixup_for_uid(PHY_LAN8835, 0xfffffff0);
-
- return ret;
}
static int lan78xx_set_rx_max_frame_length(struct lan78xx_net *dev, int size)
@@ -3487,6 +3610,7 @@ static void lan78xx_disconnect(struct usb_interface *intf)
struct lan78xx_net *dev;
struct usb_device *udev;
struct net_device *net;
+ struct phy_device *phydev;
dev = usb_get_intfdata(intf);
usb_set_intfdata(intf, NULL);
@@ -3495,12 +3619,16 @@ static void lan78xx_disconnect(struct usb_interface *intf)
udev = interface_to_usbdev(intf);
net = dev->net;
+ phydev = net->phydev;
phy_unregister_fixup_for_uid(PHY_KSZ9031RNX, 0xfffffff0);
phy_unregister_fixup_for_uid(PHY_LAN8835, 0xfffffff0);
phy_disconnect(net->phydev);
+ if (phy_is_pseudo_fixed_link(phydev))
+ fixed_phy_unregister(phydev);
+
unregister_netdev(net);
cancel_delayed_work_sync(&dev->wq);
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 770422e953f7..f34794a76c4d 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -419,46 +419,51 @@ static void virtnet_xdp_flush(struct net_device *dev)
virtqueue_kick(sq->vq);
}
-static bool __virtnet_xdp_xmit(struct virtnet_info *vi,
- struct xdp_buff *xdp)
+static int __virtnet_xdp_xmit(struct virtnet_info *vi,
+ struct xdp_frame *xdpf)
{
struct virtio_net_hdr_mrg_rxbuf *hdr;
- unsigned int len;
+ struct xdp_frame *xdpf_sent;
struct send_queue *sq;
+ unsigned int len;
unsigned int qp;
- void *xdp_sent;
int err;
qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
sq = &vi->sq[qp];
/* Free up any pending old buffers before queueing new ones. */
- while ((xdp_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) {
- struct page *sent_page = virt_to_head_page(xdp_sent);
+ while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
+ xdp_return_frame(xdpf_sent);
- put_page(sent_page);
- }
+ /* virtqueue want to use data area in-front of packet */
+ if (unlikely(xdpf->metasize > 0))
+ return -EOPNOTSUPP;
- xdp->data -= vi->hdr_len;
+ if (unlikely(xdpf->headroom < vi->hdr_len))
+ return -EOVERFLOW;
+
+ /* Make room for virtqueue hdr (also change xdpf->headroom?) */
+ xdpf->data -= vi->hdr_len;
/* Zero header and leave csum up to XDP layers */
- hdr = xdp->data;
+ hdr = xdpf->data;
memset(hdr, 0, vi->hdr_len);
+ xdpf->len += vi->hdr_len;
- sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data);
+ sg_init_one(sq->sg, xdpf->data, xdpf->len);
- err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp->data, GFP_ATOMIC);
+ err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdpf, GFP_ATOMIC);
if (unlikely(err))
- return false; /* Caller handle free/refcnt */
+ return -ENOSPC; /* Caller handle free/refcnt */
- return true;
+ return 0;
}
-static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
+static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
{
struct virtnet_info *vi = netdev_priv(dev);
struct receive_queue *rq = vi->rq;
struct bpf_prog *xdp_prog;
- bool sent;
/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
* indicate XDP resources have been successfully allocated.
@@ -467,10 +472,7 @@ static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
if (!xdp_prog)
return -ENXIO;
- sent = __virtnet_xdp_xmit(vi, xdp);
- if (!sent)
- return -ENOSPC;
- return 0;
+ return __virtnet_xdp_xmit(vi, xdpf);
}
static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
@@ -559,7 +561,6 @@ static struct sk_buff *receive_small(struct net_device *dev,
struct page *page = virt_to_head_page(buf);
unsigned int delta = 0;
struct page *xdp_page;
- bool sent;
int err;
len -= vi->hdr_len;
@@ -568,6 +569,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
xdp_prog = rcu_dereference(rq->xdp_prog);
if (xdp_prog) {
struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset;
+ struct xdp_frame *xdpf;
struct xdp_buff xdp;
void *orig_data;
u32 act;
@@ -608,10 +610,14 @@ static struct sk_buff *receive_small(struct net_device *dev,
case XDP_PASS:
/* Recalculate length in case bpf program changed it */
delta = orig_data - xdp.data;
+ len = xdp.data_end - xdp.data;
break;
case XDP_TX:
- sent = __virtnet_xdp_xmit(vi, &xdp);
- if (unlikely(!sent)) {
+ xdpf = convert_to_xdp_frame(&xdp);
+ if (unlikely(!xdpf))
+ goto err_xdp;
+ err = __virtnet_xdp_xmit(vi, xdpf);
+ if (unlikely(err)) {
trace_xdp_exception(vi->dev, xdp_prog, act);
goto err_xdp;
}
@@ -641,7 +647,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
goto err;
}
skb_reserve(skb, headroom - delta);
- skb_put(skb, len + delta);
+ skb_put(skb, len);
if (!delta) {
buf += header_offset;
memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
@@ -694,7 +700,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
struct bpf_prog *xdp_prog;
unsigned int truesize;
unsigned int headroom = mergeable_ctx_to_headroom(ctx);
- bool sent;
int err;
head_skb = NULL;
@@ -702,6 +707,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
rcu_read_lock();
xdp_prog = rcu_dereference(rq->xdp_prog);
if (xdp_prog) {
+ struct xdp_frame *xdpf;
struct page *xdp_page;
struct xdp_buff xdp;
void *data;
@@ -756,6 +762,10 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
offset = xdp.data -
page_address(xdp_page) - vi->hdr_len;
+ /* recalculate len if xdp.data or xdp.data_end were
+ * adjusted
+ */
+ len = xdp.data_end - xdp.data + vi->hdr_len;
/* We can only create skb based on xdp_page. */
if (unlikely(xdp_page != page)) {
rcu_read_unlock();
@@ -766,8 +776,11 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
}
break;
case XDP_TX:
- sent = __virtnet_xdp_xmit(vi, &xdp);
- if (unlikely(!sent)) {
+ xdpf = convert_to_xdp_frame(&xdp);
+ if (unlikely(!xdpf))
+ goto err_xdp;
+ err = __virtnet_xdp_xmit(vi, xdpf);
+ if (unlikely(err)) {
trace_xdp_exception(vi->dev, xdp_prog, act);
if (unlikely(xdp_page != page))
put_page(xdp_page);
@@ -1312,6 +1325,13 @@ static int virtnet_open(struct net_device *dev)
if (err < 0)
return err;
+ err = xdp_rxq_info_reg_mem_model(&vi->rq[i].xdp_rxq,
+ MEM_TYPE_PAGE_SHARED, NULL);
+ if (err < 0) {
+ xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq);
+ return err;
+ }
+
virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
virtnet_napi_tx_enable(vi, vi->sq[i].vq, &vi->sq[i].napi);
}
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 0a2b180d138a..90b5f3900c22 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -48,6 +48,9 @@ static unsigned int vrf_net_id;
struct net_vrf {
struct rtable __rcu *rth;
struct rt6_info __rcu *rt6;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct fib6_table *fib6_table;
+#endif
u32 tb_id;
};
@@ -496,7 +499,6 @@ static int vrf_rt6_create(struct net_device *dev)
int flags = DST_HOST | DST_NOPOLICY | DST_NOXFRM;
struct net_vrf *vrf = netdev_priv(dev);
struct net *net = dev_net(dev);
- struct fib6_table *rt6i_table;
struct rt6_info *rt6;
int rc = -ENOMEM;
@@ -504,8 +506,8 @@ static int vrf_rt6_create(struct net_device *dev)
if (!ipv6_mod_enabled())
return 0;
- rt6i_table = fib6_new_table(net, vrf->tb_id);
- if (!rt6i_table)
+ vrf->fib6_table = fib6_new_table(net, vrf->tb_id);
+ if (!vrf->fib6_table)
goto out;
/* create a dst for routing packets out a VRF device */
@@ -513,7 +515,6 @@ static int vrf_rt6_create(struct net_device *dev)
if (!rt6)
goto out;
- rt6->rt6i_table = rt6i_table;
rt6->dst.output = vrf_output6;
rcu_assign_pointer(vrf->rt6, rt6);
@@ -946,22 +947,8 @@ static struct rt6_info *vrf_ip6_route_lookup(struct net *net,
int flags)
{
struct net_vrf *vrf = netdev_priv(dev);
- struct fib6_table *table = NULL;
- struct rt6_info *rt6;
-
- rcu_read_lock();
-
- /* fib6_table does not have a refcnt and can not be freed */
- rt6 = rcu_dereference(vrf->rt6);
- if (likely(rt6))
- table = rt6->rt6i_table;
-
- rcu_read_unlock();
-
- if (!table)
- return NULL;
- return ip6_pol_route(net, table, ifindex, fl6, skb, flags);
+ return ip6_pol_route(net, vrf->fib6_table, ifindex, fl6, skb, flags);
}
static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev,
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index fab7a4db249e..aee0e60471f1 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2085,9 +2085,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
local_ip = vxlan->cfg.saddr;
dst_cache = &rdst->dst_cache;
md->gbp = skb->mark;
- ttl = vxlan->cfg.ttl;
- if (!ttl && vxlan_addr_multicast(dst))
- ttl = 1;
+ if (flags & VXLAN_F_TTL_INHERIT) {
+ ttl = ip_tunnel_get_ttl(old_iph, skb);
+ } else {
+ ttl = vxlan->cfg.ttl;
+ if (!ttl && vxlan_addr_multicast(dst))
+ ttl = 1;
+ }
tos = vxlan->cfg.tos;
if (tos == 1)
@@ -2709,6 +2713,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
[IFLA_VXLAN_GBP] = { .type = NLA_FLAG, },
[IFLA_VXLAN_GPE] = { .type = NLA_FLAG, },
[IFLA_VXLAN_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG },
+ [IFLA_VXLAN_TTL_INHERIT] = { .type = NLA_FLAG },
};
static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -3254,6 +3259,12 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
if (data[IFLA_VXLAN_TTL])
conf->ttl = nla_get_u8(data[IFLA_VXLAN_TTL]);
+ if (data[IFLA_VXLAN_TTL_INHERIT]) {
+ if (changelink)
+ return -EOPNOTSUPP;
+ conf->flags |= VXLAN_F_TTL_INHERIT;
+ }
+
if (data[IFLA_VXLAN_LABEL])
conf->label = nla_get_be32(data[IFLA_VXLAN_LABEL]) &
IPV6_FLOWLABEL_MASK;
diff --git a/drivers/ptp/ptp_pch.c b/drivers/ptp/ptp_pch.c
index b3285175f20f..78ccf936d356 100644
--- a/drivers/ptp/ptp_pch.c
+++ b/drivers/ptp/ptp_pch.c
@@ -452,7 +452,6 @@ static int ptp_pch_adjtime(struct ptp_clock_info *ptp, s64 delta)
static int ptp_pch_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
{
u64 ns;
- u32 remainder;
unsigned long flags;
struct pch_dev *pch_dev = container_of(ptp, struct pch_dev, caps);
struct pch_ts_regs __iomem *regs = pch_dev->regs;
@@ -461,8 +460,7 @@ static int ptp_pch_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
ns = pch_systime_read(regs);
spin_unlock_irqrestore(&pch_dev->register_lock, flags);
- ts->tv_sec = div_u64_rem(ns, 1000000000, &remainder);
- ts->tv_nsec = remainder;
+ *ts = ns_to_timespec64(ns);
return 0;
}
@@ -474,8 +472,7 @@ static int ptp_pch_settime(struct ptp_clock_info *ptp,
struct pch_dev *pch_dev = container_of(ptp, struct pch_dev, caps);
struct pch_ts_regs __iomem *regs = pch_dev->regs;
- ns = ts->tv_sec * 1000000000ULL;
- ns += ts->tv_nsec;
+ ns = timespec64_to_ns(ts);
spin_lock_irqsave(&pch_dev->register_lock, flags);
pch_systime_write(regs, ns);
diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c
index 0ee8f33efb54..2d9fe7e4ee40 100644
--- a/drivers/s390/net/lcs.c
+++ b/drivers/s390/net/lcs.c
@@ -1928,6 +1928,8 @@ lcs_portno_store (struct device *dev, struct device_attribute *attr, const char
return -EINVAL;
/* TODO: sanity checks */
card->portno = value;
+ if (card->dev)
+ card->dev->dev_port = card->portno;
return count;
@@ -2158,6 +2160,7 @@ lcs_new_device(struct ccwgroup_device *ccwgdev)
card->dev = dev;
card->dev->ml_priv = card;
card->dev->netdev_ops = &lcs_netdev_ops;
+ card->dev->dev_port = card->portno;
memcpy(card->dev->dev_addr, card->mac, LCS_MAC_LENGTH);
#ifdef CONFIG_IP_MULTICAST
if (!lcs_check_multicast_support(card))
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 78b98b3e7efa..2a5fec55bf60 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -148,6 +148,7 @@ struct qeth_perf_stats {
unsigned int tx_csum;
unsigned int tx_lin;
unsigned int tx_linfail;
+ unsigned int rx_csum;
};
/* Routing stuff */
@@ -712,9 +713,6 @@ enum qeth_discipline_id {
struct qeth_discipline {
const struct device_type *devtype;
- void (*start_poll)(struct ccw_device *, int, unsigned long);
- qdio_handler_t *input_handler;
- qdio_handler_t *output_handler;
int (*process_rx_buffer)(struct qeth_card *card, int budget, int *done);
int (*recover)(void *ptr);
int (*setup) (struct ccwgroup_device *);
@@ -780,9 +778,9 @@ struct qeth_card {
struct qeth_card_options options;
wait_queue_head_t wait_q;
- spinlock_t vlanlock;
spinlock_t mclock;
unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+ struct mutex vid_list_mutex; /* vid_list */
struct list_head vid_list;
DECLARE_HASHTABLE(mac_htable, 4);
DECLARE_HASHTABLE(ip_htable, 4);
@@ -867,6 +865,32 @@ static inline int qeth_get_ip_version(struct sk_buff *skb)
}
}
+static inline void qeth_rx_csum(struct qeth_card *card, struct sk_buff *skb,
+ u8 flags)
+{
+ if ((card->dev->features & NETIF_F_RXCSUM) &&
+ (flags & QETH_HDR_EXT_CSUM_TRANSP_REQ)) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ if (card->options.performance_stats)
+ card->perf_stats.rx_csum++;
+ } else {
+ skb->ip_summed = CHECKSUM_NONE;
+ }
+}
+
+static inline void qeth_tx_csum(struct sk_buff *skb, u8 *flags, int ipv)
+{
+ *flags |= QETH_HDR_EXT_CSUM_TRANSP_REQ;
+ if ((ipv == 4 && ip_hdr(skb)->protocol == IPPROTO_UDP) ||
+ (ipv == 6 && ipv6_hdr(skb)->nexthdr == IPPROTO_UDP))
+ *flags |= QETH_HDR_EXT_UDP;
+ if (ipv == 4) {
+ /* some HW requires combined L3+L4 csum offload: */
+ *flags |= QETH_HDR_EXT_CSUM_HDR_REQ;
+ ip_hdr(skb)->check = 0;
+ }
+}
+
static inline void qeth_put_buffer_pool_entry(struct qeth_card *card,
struct qeth_buffer_pool_entry *entry)
{
@@ -879,6 +903,27 @@ static inline int qeth_is_diagass_supported(struct qeth_card *card,
return card->info.diagass_support & (__u32)cmd;
}
+int qeth_send_simple_setassparms_prot(struct qeth_card *card,
+ enum qeth_ipa_funcs ipa_func,
+ u16 cmd_code, long data,
+ enum qeth_prot_versions prot);
+/* IPv4 variant */
+static inline int qeth_send_simple_setassparms(struct qeth_card *card,
+ enum qeth_ipa_funcs ipa_func,
+ u16 cmd_code, long data)
+{
+ return qeth_send_simple_setassparms_prot(card, ipa_func, cmd_code,
+ data, QETH_PROT_IPV4);
+}
+
+static inline int qeth_send_simple_setassparms_v6(struct qeth_card *card,
+ enum qeth_ipa_funcs ipa_func,
+ u16 cmd_code, long data)
+{
+ return qeth_send_simple_setassparms_prot(card, ipa_func, cmd_code,
+ data, QETH_PROT_IPV6);
+}
+
extern struct qeth_discipline qeth_l2_discipline;
extern struct qeth_discipline qeth_l3_discipline;
extern const struct attribute_group *qeth_generic_attr_groups[];
@@ -921,13 +966,7 @@ struct sk_buff *qeth_core_get_next_skb(struct qeth_card *,
struct qeth_qdio_buffer *, struct qdio_buffer_element **, int *,
struct qeth_hdr **);
void qeth_schedule_recovery(struct qeth_card *);
-void qeth_qdio_start_poll(struct ccw_device *, int, unsigned long);
int qeth_poll(struct napi_struct *napi, int budget);
-void qeth_qdio_input_handler(struct ccw_device *,
- unsigned int, unsigned int, int,
- int, unsigned long);
-void qeth_qdio_output_handler(struct ccw_device *, unsigned int,
- int, int, int, unsigned long);
void qeth_clear_ipacmd_list(struct qeth_card *);
int qeth_qdio_clear_card(struct qeth_card *, int);
void qeth_clear_working_pool_list(struct qeth_card *);
@@ -979,8 +1018,6 @@ int qeth_hw_trap(struct qeth_card *, enum qeth_diags_trap_action);
int qeth_query_ipassists(struct qeth_card *, enum qeth_prot_versions prot);
void qeth_trace_features(struct qeth_card *);
void qeth_close_dev(struct qeth_card *);
-int qeth_send_simple_setassparms(struct qeth_card *, enum qeth_ipa_funcs,
- __u16, long);
int qeth_send_setassparms(struct qeth_card *, struct qeth_cmd_buffer *, __u16,
long,
int (*reply_cb)(struct qeth_card *,
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index dffd820731f2..06415b6a8f68 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -1467,13 +1467,13 @@ static int qeth_setup_card(struct qeth_card *card)
card->lan_online = 0;
card->read_or_write_problem = 0;
card->dev = NULL;
- spin_lock_init(&card->vlanlock);
spin_lock_init(&card->mclock);
spin_lock_init(&card->lock);
spin_lock_init(&card->ip_lock);
spin_lock_init(&card->thread_mask_lock);
mutex_init(&card->conf_mutex);
mutex_init(&card->discipline_mutex);
+ mutex_init(&card->vid_list_mutex);
card->thread_start_mask = 0;
card->thread_allowed_mask = 0;
card->thread_running_mask = 0;
@@ -3588,15 +3588,14 @@ static void qeth_check_outbound_queue(struct qeth_qdio_out_q *queue)
}
}
-void qeth_qdio_start_poll(struct ccw_device *ccwdev, int queue,
- unsigned long card_ptr)
+static void qeth_qdio_start_poll(struct ccw_device *ccwdev, int queue,
+ unsigned long card_ptr)
{
struct qeth_card *card = (struct qeth_card *)card_ptr;
if (card->dev && (card->dev->flags & IFF_UP))
napi_schedule(&card->napi);
}
-EXPORT_SYMBOL_GPL(qeth_qdio_start_poll);
int qeth_configure_cq(struct qeth_card *card, enum qeth_cq cq)
{
@@ -3698,9 +3697,10 @@ out:
return;
}
-void qeth_qdio_input_handler(struct ccw_device *ccwdev, unsigned int qdio_err,
- unsigned int queue, int first_elem, int count,
- unsigned long card_ptr)
+static void qeth_qdio_input_handler(struct ccw_device *ccwdev,
+ unsigned int qdio_err, int queue,
+ int first_elem, int count,
+ unsigned long card_ptr)
{
struct qeth_card *card = (struct qeth_card *)card_ptr;
@@ -3711,14 +3711,12 @@ void qeth_qdio_input_handler(struct ccw_device *ccwdev, unsigned int qdio_err,
qeth_qdio_cq_handler(card, qdio_err, queue, first_elem, count);
else if (qdio_err)
qeth_schedule_recovery(card);
-
-
}
-EXPORT_SYMBOL_GPL(qeth_qdio_input_handler);
-void qeth_qdio_output_handler(struct ccw_device *ccwdev,
- unsigned int qdio_error, int __queue, int first_element,
- int count, unsigned long card_ptr)
+static void qeth_qdio_output_handler(struct ccw_device *ccwdev,
+ unsigned int qdio_error, int __queue,
+ int first_element, int count,
+ unsigned long card_ptr)
{
struct qeth_card *card = (struct qeth_card *) card_ptr;
struct qeth_qdio_out_q *queue = card->qdio.out_qs[__queue];
@@ -3787,7 +3785,6 @@ void qeth_qdio_output_handler(struct ccw_device *ccwdev,
card->perf_stats.outbound_handler_time += qeth_get_micros() -
card->perf_stats.outbound_handler_start_time;
}
-EXPORT_SYMBOL_GPL(qeth_qdio_output_handler);
/* We cannot use outbound queue 3 for unicast packets on HiperSockets */
static inline int qeth_cut_iqd_prio(struct qeth_card *card, int queue_num)
@@ -4995,7 +4992,7 @@ static int qeth_qdio_establish(struct qeth_card *card)
goto out_free_in_sbals;
}
for (i = 0; i < card->qdio.no_in_queues; ++i)
- queue_start_poll[i] = card->discipline->start_poll;
+ queue_start_poll[i] = qeth_qdio_start_poll;
qeth_qdio_establish_cq(card, in_sbal_ptrs, queue_start_poll);
@@ -5019,8 +5016,8 @@ static int qeth_qdio_establish(struct qeth_card *card)
init_data.qib_param_field = qib_param_field;
init_data.no_input_qs = card->qdio.no_in_queues;
init_data.no_output_qs = card->qdio.no_out_queues;
- init_data.input_handler = card->discipline->input_handler;
- init_data.output_handler = card->discipline->output_handler;
+ init_data.input_handler = qeth_qdio_input_handler;
+ init_data.output_handler = qeth_qdio_output_handler;
init_data.queue_start_poll_array = queue_start_poll;
init_data.int_parm = (unsigned long) card;
init_data.input_sbal_addr_array = (void **) in_sbal_ptrs;
@@ -5204,6 +5201,11 @@ retriable:
rc = qeth_query_ipassists(card, QETH_PROT_IPV4);
if (rc == -ENOMEM)
goto out;
+ if (qeth_is_supported(card, IPA_IPV6)) {
+ rc = qeth_query_ipassists(card, QETH_PROT_IPV6);
+ if (rc == -ENOMEM)
+ goto out;
+ }
if (qeth_is_supported(card, IPA_SETADAPTERPARMS)) {
rc = qeth_query_setadapterparms(card);
if (rc < 0) {
@@ -5511,26 +5513,26 @@ int qeth_send_setassparms(struct qeth_card *card,
}
EXPORT_SYMBOL_GPL(qeth_send_setassparms);
-int qeth_send_simple_setassparms(struct qeth_card *card,
- enum qeth_ipa_funcs ipa_func,
- __u16 cmd_code, long data)
+int qeth_send_simple_setassparms_prot(struct qeth_card *card,
+ enum qeth_ipa_funcs ipa_func,
+ u16 cmd_code, long data,
+ enum qeth_prot_versions prot)
{
int rc;
int length = 0;
struct qeth_cmd_buffer *iob;
- QETH_CARD_TEXT(card, 4, "simassp4");
+ QETH_CARD_TEXT_(card, 4, "simassp%i", prot);
if (data)
length = sizeof(__u32);
- iob = qeth_get_setassparms_cmd(card, ipa_func, cmd_code,
- length, QETH_PROT_IPV4);
+ iob = qeth_get_setassparms_cmd(card, ipa_func, cmd_code, length, prot);
if (!iob)
return -ENOMEM;
rc = qeth_send_setassparms(card, iob, length, data,
qeth_setassparms_cb, NULL);
return rc;
}
-EXPORT_SYMBOL_GPL(qeth_send_simple_setassparms);
+EXPORT_SYMBOL_GPL(qeth_send_simple_setassparms_prot);
static void qeth_unregister_dbf_views(void)
{
@@ -6008,7 +6010,8 @@ static struct {
{"tx lin"},
{"tx linfail"},
{"cq handler count"},
- {"cq handler time"}
+ {"cq handler time"},
+ {"rx csum"}
};
int qeth_core_get_sset_count(struct net_device *dev, int stringset)
@@ -6070,6 +6073,7 @@ void qeth_core_get_ethtool_stats(struct net_device *dev,
data[35] = card->perf_stats.tx_linfail;
data[36] = card->perf_stats.cq_cnt;
data[37] = card->perf_stats.cq_time;
+ data[38] = card->perf_stats.rx_csum;
}
EXPORT_SYMBOL_GPL(qeth_core_get_ethtool_stats);
@@ -6326,14 +6330,15 @@ static int qeth_ipa_checksum_run_cmd_cb(struct qeth_card *card,
static int qeth_ipa_checksum_run_cmd(struct qeth_card *card,
enum qeth_ipa_funcs ipa_func,
__u16 cmd_code, long data,
- struct qeth_checksum_cmd *chksum_cb)
+ struct qeth_checksum_cmd *chksum_cb,
+ enum qeth_prot_versions prot)
{
struct qeth_cmd_buffer *iob;
int rc = -ENOMEM;
QETH_CARD_TEXT(card, 4, "chkdocmd");
iob = qeth_get_setassparms_cmd(card, ipa_func, cmd_code,
- sizeof(__u32), QETH_PROT_IPV4);
+ sizeof(__u32), prot);
if (iob)
rc = qeth_send_setassparms(card, iob, sizeof(__u32), data,
qeth_ipa_checksum_run_cmd_cb,
@@ -6341,16 +6346,17 @@ static int qeth_ipa_checksum_run_cmd(struct qeth_card *card,
return rc;
}
-static int qeth_send_checksum_on(struct qeth_card *card, int cstype)
+static int qeth_send_checksum_on(struct qeth_card *card, int cstype,
+ enum qeth_prot_versions prot)
{
- const __u32 required_features = QETH_IPA_CHECKSUM_IP_HDR |
- QETH_IPA_CHECKSUM_UDP |
- QETH_IPA_CHECKSUM_TCP;
+ u32 required_features = QETH_IPA_CHECKSUM_UDP | QETH_IPA_CHECKSUM_TCP;
struct qeth_checksum_cmd chksum_cb;
int rc;
+ if (prot == QETH_PROT_IPV4)
+ required_features |= QETH_IPA_CHECKSUM_IP_HDR;
rc = qeth_ipa_checksum_run_cmd(card, cstype, IPA_CMD_ASS_START, 0,
- &chksum_cb);
+ &chksum_cb, prot);
if (!rc) {
if ((required_features & chksum_cb.supported) !=
required_features)
@@ -6362,37 +6368,42 @@ static int qeth_send_checksum_on(struct qeth_card *card, int cstype)
QETH_CARD_IFNAME(card));
}
if (rc) {
- qeth_send_simple_setassparms(card, cstype, IPA_CMD_ASS_STOP, 0);
+ qeth_send_simple_setassparms_prot(card, cstype,
+ IPA_CMD_ASS_STOP, 0, prot);
dev_warn(&card->gdev->dev,
- "Starting HW checksumming for %s failed, using SW checksumming\n",
- QETH_CARD_IFNAME(card));
+ "Starting HW IPv%d checksumming for %s failed, using SW checksumming\n",
+ prot, QETH_CARD_IFNAME(card));
return rc;
}
rc = qeth_ipa_checksum_run_cmd(card, cstype, IPA_CMD_ASS_ENABLE,
- chksum_cb.supported, &chksum_cb);
+ chksum_cb.supported, &chksum_cb,
+ prot);
if (!rc) {
if ((required_features & chksum_cb.enabled) !=
required_features)
rc = -EIO;
}
if (rc) {
- qeth_send_simple_setassparms(card, cstype, IPA_CMD_ASS_STOP, 0);
+ qeth_send_simple_setassparms_prot(card, cstype,
+ IPA_CMD_ASS_STOP, 0, prot);
dev_warn(&card->gdev->dev,
- "Enabling HW checksumming for %s failed, using SW checksumming\n",
- QETH_CARD_IFNAME(card));
+ "Enabling HW IPv%d checksumming for %s failed, using SW checksumming\n",
+ prot, QETH_CARD_IFNAME(card));
return rc;
}
- dev_info(&card->gdev->dev, "HW Checksumming (%sbound) enabled\n",
- cstype == IPA_INBOUND_CHECKSUM ? "in" : "out");
+ dev_info(&card->gdev->dev, "HW Checksumming (%sbound IPv%d) enabled\n",
+ cstype == IPA_INBOUND_CHECKSUM ? "in" : "out", prot);
return 0;
}
-static int qeth_set_ipa_csum(struct qeth_card *card, int on, int cstype)
+static int qeth_set_ipa_csum(struct qeth_card *card, bool on, int cstype,
+ enum qeth_prot_versions prot)
{
- int rc = (on) ? qeth_send_checksum_on(card, cstype)
- : qeth_send_simple_setassparms(card, cstype,
- IPA_CMD_ASS_STOP, 0);
+ int rc = (on) ? qeth_send_checksum_on(card, cstype, prot)
+ : qeth_send_simple_setassparms_prot(card, cstype,
+ IPA_CMD_ASS_STOP, 0,
+ prot);
return rc ? -EIO : 0;
}
@@ -6419,8 +6430,31 @@ static int qeth_set_ipa_tso(struct qeth_card *card, int on)
return rc;
}
-#define QETH_HW_FEATURES (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_TSO)
+static int qeth_set_ipa_rx_csum(struct qeth_card *card, bool on)
+{
+ int rc_ipv4 = (on) ? -EOPNOTSUPP : 0;
+ int rc_ipv6;
+
+ if (qeth_is_supported(card, IPA_INBOUND_CHECKSUM))
+ rc_ipv4 = qeth_set_ipa_csum(card, on, IPA_INBOUND_CHECKSUM,
+ QETH_PROT_IPV4);
+ if (!qeth_is_supported6(card, IPA_INBOUND_CHECKSUM_V6))
+ /* no/one Offload Assist available, so the rc is trivial */
+ return rc_ipv4;
+ rc_ipv6 = qeth_set_ipa_csum(card, on, IPA_INBOUND_CHECKSUM,
+ QETH_PROT_IPV6);
+
+ if (on)
+ /* enable: success if any Assist is active */
+ return (rc_ipv6) ? rc_ipv4 : 0;
+
+ /* disable: failure if any Assist is still active */
+ return (rc_ipv6) ? rc_ipv6 : rc_ipv4;
+}
+
+#define QETH_HW_FEATURES (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_TSO | \
+ NETIF_F_IPV6_CSUM)
/**
* qeth_recover_features() - Restore device features after recovery
* @dev: the recovering net_device
@@ -6455,16 +6489,19 @@ int qeth_set_features(struct net_device *dev, netdev_features_t features)
QETH_DBF_HEX(SETUP, 2, &features, sizeof(features));
if ((changed & NETIF_F_IP_CSUM)) {
- rc = qeth_set_ipa_csum(card,
- features & NETIF_F_IP_CSUM ? 1 : 0,
- IPA_OUTBOUND_CHECKSUM);
+ rc = qeth_set_ipa_csum(card, features & NETIF_F_IP_CSUM,
+ IPA_OUTBOUND_CHECKSUM, QETH_PROT_IPV4);
if (rc)
changed ^= NETIF_F_IP_CSUM;
}
- if ((changed & NETIF_F_RXCSUM)) {
- rc = qeth_set_ipa_csum(card,
- features & NETIF_F_RXCSUM ? 1 : 0,
- IPA_INBOUND_CHECKSUM);
+ if (changed & NETIF_F_IPV6_CSUM) {
+ rc = qeth_set_ipa_csum(card, features & NETIF_F_IPV6_CSUM,
+ IPA_OUTBOUND_CHECKSUM, QETH_PROT_IPV6);
+ if (rc)
+ changed ^= NETIF_F_IPV6_CSUM;
+ }
+ if (changed & NETIF_F_RXCSUM) {
+ rc = qeth_set_ipa_rx_csum(card, features & NETIF_F_RXCSUM);
if (rc)
changed ^= NETIF_F_RXCSUM;
}
@@ -6491,7 +6528,10 @@ netdev_features_t qeth_fix_features(struct net_device *dev,
QETH_DBF_TEXT(SETUP, 2, "fixfeat");
if (!qeth_is_supported(card, IPA_OUTBOUND_CHECKSUM))
features &= ~NETIF_F_IP_CSUM;
- if (!qeth_is_supported(card, IPA_INBOUND_CHECKSUM))
+ if (!qeth_is_supported6(card, IPA_OUTBOUND_CHECKSUM_V6))
+ features &= ~NETIF_F_IPV6_CSUM;
+ if (!qeth_is_supported(card, IPA_INBOUND_CHECKSUM) &&
+ !qeth_is_supported6(card, IPA_INBOUND_CHECKSUM_V6))
features &= ~NETIF_F_RXCSUM;
if (!qeth_is_supported(card, IPA_OUTBOUND_TSO))
features &= ~NETIF_F_TSO;
diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h
index f4d1ec0b8f5a..878e62f35169 100644
--- a/drivers/s390/net/qeth_core_mpc.h
+++ b/drivers/s390/net/qeth_core_mpc.h
@@ -246,6 +246,8 @@ enum qeth_ipa_funcs {
IPA_QUERY_ARP_ASSIST = 0x00040000L,
IPA_INBOUND_TSO = 0x00080000L,
IPA_OUTBOUND_TSO = 0x00100000L,
+ IPA_INBOUND_CHECKSUM_V6 = 0x00400000L,
+ IPA_OUTBOUND_CHECKSUM_V6 = 0x00800000L,
};
/* SETIP/DELIP IPA Command: ***************************************************/
diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c
index ae81534de912..c3f18afb368b 100644
--- a/drivers/s390/net/qeth_core_sys.c
+++ b/drivers/s390/net/qeth_core_sys.c
@@ -144,6 +144,8 @@ static ssize_t qeth_dev_portno_store(struct device *dev,
goto out;
}
card->info.portno = portno;
+ if (card->dev)
+ card->dev->dev_port = portno;
out:
mutex_unlock(&card->conf_mutex);
return rc ? rc : count;
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index b8079f2a65b3..a7cb37da6a21 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -17,7 +17,6 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/etherdevice.h>
-#include <linux/ip.h>
#include <linux/list.h>
#include <linux/hash.h>
#include <linux/hashtable.h>
@@ -195,23 +194,6 @@ static int qeth_l2_get_cast_type(struct qeth_card *card, struct sk_buff *skb)
return RTN_UNSPEC;
}
-static void qeth_l2_hdr_csum(struct qeth_card *card, struct qeth_hdr *hdr,
- struct sk_buff *skb)
-{
- struct iphdr *iph = ip_hdr(skb);
-
- /* tcph->check contains already the pseudo hdr checksum
- * so just set the header flags
- */
- if (iph->protocol == IPPROTO_UDP)
- hdr->hdr.l2.flags[1] |= QETH_HDR_EXT_UDP;
- hdr->hdr.l2.flags[1] |= QETH_HDR_EXT_CSUM_TRANSP_REQ |
- QETH_HDR_EXT_CSUM_HDR_REQ;
- iph->check = 0;
- if (card->options.performance_stats)
- card->perf_stats.tx_csum++;
-}
-
static void qeth_l2_fill_header(struct qeth_hdr *hdr, struct sk_buff *skb,
int cast_type, unsigned int data_len)
{
@@ -297,12 +279,13 @@ static int qeth_l2_send_setdelvlan(struct qeth_card *card, __u16 i,
static void qeth_l2_process_vlans(struct qeth_card *card)
{
struct qeth_vlan_vid *id;
+
QETH_CARD_TEXT(card, 3, "L2prcvln");
- spin_lock_bh(&card->vlanlock);
+ mutex_lock(&card->vid_list_mutex);
list_for_each_entry(id, &card->vid_list, list) {
qeth_l2_send_setdelvlan(card, id->vid, IPA_CMD_SETVLAN);
}
- spin_unlock_bh(&card->vlanlock);
+ mutex_unlock(&card->vid_list_mutex);
}
static int qeth_l2_vlan_rx_add_vid(struct net_device *dev,
@@ -319,7 +302,7 @@ static int qeth_l2_vlan_rx_add_vid(struct net_device *dev,
QETH_CARD_TEXT(card, 3, "aidREC");
return 0;
}
- id = kmalloc(sizeof(struct qeth_vlan_vid), GFP_ATOMIC);
+ id = kmalloc(sizeof(*id), GFP_KERNEL);
if (id) {
id->vid = vid;
rc = qeth_l2_send_setdelvlan(card, vid, IPA_CMD_SETVLAN);
@@ -327,9 +310,9 @@ static int qeth_l2_vlan_rx_add_vid(struct net_device *dev,
kfree(id);
return rc;
}
- spin_lock_bh(&card->vlanlock);
+ mutex_lock(&card->vid_list_mutex);
list_add_tail(&id->list, &card->vid_list);
- spin_unlock_bh(&card->vlanlock);
+ mutex_unlock(&card->vid_list_mutex);
} else {
return -ENOMEM;
}
@@ -348,7 +331,7 @@ static int qeth_l2_vlan_rx_kill_vid(struct net_device *dev,
QETH_CARD_TEXT(card, 3, "kidREC");
return 0;
}
- spin_lock_bh(&card->vlanlock);
+ mutex_lock(&card->vid_list_mutex);
list_for_each_entry(id, &card->vid_list, list) {
if (id->vid == vid) {
list_del(&id->list);
@@ -356,7 +339,7 @@ static int qeth_l2_vlan_rx_kill_vid(struct net_device *dev,
break;
}
}
- spin_unlock_bh(&card->vlanlock);
+ mutex_unlock(&card->vid_list_mutex);
if (tmpid) {
rc = qeth_l2_send_setdelvlan(card, vid, IPA_CMD_DELVLAN);
kfree(tmpid);
@@ -423,15 +406,7 @@ static int qeth_l2_process_inbound_buffer(struct qeth_card *card,
switch (hdr->hdr.l2.id) {
case QETH_HEADER_TYPE_LAYER2:
skb->protocol = eth_type_trans(skb, skb->dev);
- if ((card->dev->features & NETIF_F_RXCSUM)
- && ((hdr->hdr.l2.flags[1] &
- (QETH_HDR_EXT_CSUM_HDR_REQ |
- QETH_HDR_EXT_CSUM_TRANSP_REQ)) ==
- (QETH_HDR_EXT_CSUM_HDR_REQ |
- QETH_HDR_EXT_CSUM_TRANSP_REQ)))
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- else
- skb->ip_summed = CHECKSUM_NONE;
+ qeth_rx_csum(card, skb, hdr->hdr.l2.flags[1]);
if (skb->protocol == htons(ETH_P_802_2))
*((__u32 *)skb->cb) = ++card->seqno.pkt_seqno;
len = skb->len;
@@ -464,7 +439,6 @@ static int qeth_l2_process_inbound_buffer(struct qeth_card *card,
static int qeth_l2_request_initial_mac(struct qeth_card *card)
{
int rc = 0;
- char vendor_pre[] = {0x02, 0x00, 0x00};
QETH_DBF_TEXT(SETUP, 2, "l2reqmac");
QETH_DBF_TEXT_(SETUP, 2, "doL2%s", CARD_BUS_ID(card));
@@ -484,16 +458,20 @@ static int qeth_l2_request_initial_mac(struct qeth_card *card)
card->info.type == QETH_CARD_TYPE_OSX ||
card->info.guestlan) {
rc = qeth_setadpparms_change_macaddr(card);
- if (rc) {
- QETH_DBF_MESSAGE(2, "couldn't get MAC address on "
- "device %s: x%x\n", CARD_BUS_ID(card), rc);
- QETH_DBF_TEXT_(SETUP, 2, "1err%04x", rc);
- return rc;
- }
- } else {
- eth_random_addr(card->dev->dev_addr);
- memcpy(card->dev->dev_addr, vendor_pre, 3);
+ if (!rc)
+ goto out;
+ QETH_DBF_MESSAGE(2, "READ_MAC Assist failed on device %s: x%x\n",
+ CARD_BUS_ID(card), rc);
+ QETH_DBF_TEXT_(SETUP, 2, "1err%04x", rc);
+ /* fall back once more: */
}
+
+ /* some devices don't support a custom MAC address: */
+ if (card->info.type == QETH_CARD_TYPE_OSM ||
+ card->info.type == QETH_CARD_TYPE_OSX)
+ return (rc) ? rc : -EADDRNOTAVAIL;
+ eth_hw_addr_random(card->dev);
+
out:
QETH_DBF_HEX(SETUP, 2, card->dev->dev_addr, card->dev->addr_len);
return 0;
@@ -685,7 +663,8 @@ out:
}
static int qeth_l2_xmit_osa(struct qeth_card *card, struct sk_buff *skb,
- struct qeth_qdio_out_q *queue, int cast_type)
+ struct qeth_qdio_out_q *queue, int cast_type,
+ int ipv)
{
int push_len = sizeof(struct qeth_hdr);
unsigned int elements, nr_frags;
@@ -723,8 +702,11 @@ static int qeth_l2_xmit_osa(struct qeth_card *card, struct sk_buff *skb,
hdr_elements = 1;
}
qeth_l2_fill_header(hdr, skb, cast_type, skb->len - push_len);
- if (skb->ip_summed == CHECKSUM_PARTIAL)
- qeth_l2_hdr_csum(card, hdr, skb);
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ qeth_tx_csum(skb, &hdr->hdr.l2.flags[1], ipv);
+ if (card->options.performance_stats)
+ card->perf_stats.tx_csum++;
+ }
elements = qeth_get_elements_no(card, skb, hdr_elements, 0);
if (!elements) {
@@ -776,6 +758,7 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb,
{
struct qeth_card *card = dev->ml_priv;
int cast_type = qeth_l2_get_cast_type(card, skb);
+ int ipv = qeth_get_ip_version(skb);
struct qeth_qdio_out_q *queue;
int tx_bytes = skb->len;
int rc;
@@ -783,7 +766,7 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb,
if (card->qdio.do_prio_queueing || (cast_type &&
card->info.is_multicast_different))
queue = card->qdio.out_qs[qeth_get_priority_queue(card, skb,
- qeth_get_ip_version(skb), cast_type)];
+ ipv, cast_type)];
else
queue = card->qdio.out_qs[card->qdio.default_out_queue];
@@ -806,7 +789,7 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb,
rc = qeth_l2_xmit_iqd(card, skb, queue, cast_type);
break;
default:
- rc = qeth_l2_xmit_osa(card, skb, queue, cast_type);
+ rc = qeth_l2_xmit_osa(card, skb, queue, cast_type, ipv);
}
if (!rc) {
@@ -983,6 +966,7 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
card->dev->mtu = card->info.initial_mtu;
card->dev->min_mtu = 64;
card->dev->max_mtu = ETH_MAX_MTU;
+ card->dev->dev_port = card->info.portno;
card->dev->netdev_ops = &qeth_l2_netdev_ops;
if (card->info.type == QETH_CARD_TYPE_OSN) {
card->dev->ethtool_ops = &qeth_l2_osn_ops;
@@ -1011,10 +995,15 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
card->dev->hw_features |= NETIF_F_IP_CSUM;
card->dev->vlan_features |= NETIF_F_IP_CSUM;
}
- if (qeth_is_supported(card, IPA_INBOUND_CHECKSUM)) {
- card->dev->hw_features |= NETIF_F_RXCSUM;
- card->dev->vlan_features |= NETIF_F_RXCSUM;
- }
+ }
+ if (qeth_is_supported6(card, IPA_OUTBOUND_CHECKSUM_V6)) {
+ card->dev->hw_features |= NETIF_F_IPV6_CSUM;
+ card->dev->vlan_features |= NETIF_F_IPV6_CSUM;
+ }
+ if (qeth_is_supported(card, IPA_INBOUND_CHECKSUM) ||
+ qeth_is_supported6(card, IPA_INBOUND_CHECKSUM_V6)) {
+ card->dev->hw_features |= NETIF_F_RXCSUM;
+ card->dev->vlan_features |= NETIF_F_RXCSUM;
}
card->info.broadcast_capable = 1;
@@ -1315,9 +1304,6 @@ static int qeth_l2_control_event(struct qeth_card *card,
struct qeth_discipline qeth_l2_discipline = {
.devtype = &qeth_l2_devtype,
- .start_poll = qeth_qdio_start_poll,
- .input_handler = (qdio_handler_t *) qeth_qdio_input_handler,
- .output_handler = (qdio_handler_t *) qeth_qdio_output_handler,
.process_rx_buffer = qeth_l2_process_inbound_buffer,
.recover = qeth_l2_recover,
.setup = qeth_l2_probe_device,
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index c1a16a74aa83..e7fa479adf47 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -735,22 +735,6 @@ static int qeth_l3_setadapter_parms(struct qeth_card *card)
return rc;
}
-static int qeth_l3_send_simple_setassparms_ipv6(struct qeth_card *card,
- enum qeth_ipa_funcs ipa_func, __u16 cmd_code)
-{
- int rc;
- struct qeth_cmd_buffer *iob;
-
- QETH_CARD_TEXT(card, 4, "simassp6");
- iob = qeth_get_setassparms_cmd(card, ipa_func, cmd_code,
- 0, QETH_PROT_IPV6);
- if (!iob)
- return -ENOMEM;
- rc = qeth_send_setassparms(card, iob, 0, 0,
- qeth_setassparms_cb, NULL);
- return rc;
-}
-
static int qeth_l3_start_ipa_arp_processing(struct qeth_card *card)
{
int rc;
@@ -851,14 +835,6 @@ static int qeth_l3_softsetup_ipv6(struct qeth_card *card)
QETH_CARD_TEXT(card, 3, "softipv6");
- rc = qeth_query_ipassists(card, QETH_PROT_IPV6);
- if (rc) {
- dev_err(&card->gdev->dev,
- "Activating IPv6 support for %s failed\n",
- QETH_CARD_IFNAME(card));
- return rc;
- }
-
if (card->info.type == QETH_CARD_TYPE_IQD)
goto out;
@@ -870,16 +846,16 @@ static int qeth_l3_softsetup_ipv6(struct qeth_card *card)
QETH_CARD_IFNAME(card));
return rc;
}
- rc = qeth_l3_send_simple_setassparms_ipv6(card, IPA_IPV6,
- IPA_CMD_ASS_START);
+ rc = qeth_send_simple_setassparms_v6(card, IPA_IPV6,
+ IPA_CMD_ASS_START, 0);
if (rc) {
dev_err(&card->gdev->dev,
"Activating IPv6 support for %s failed\n",
QETH_CARD_IFNAME(card));
return rc;
}
- rc = qeth_l3_send_simple_setassparms_ipv6(card, IPA_PASSTHRU,
- IPA_CMD_ASS_START);
+ rc = qeth_send_simple_setassparms_v6(card, IPA_PASSTHRU,
+ IPA_CMD_ASS_START, 0);
if (rc) {
dev_warn(&card->gdev->dev,
"Enabling the passthrough mode for %s failed\n",
@@ -1293,91 +1269,6 @@ static void qeth_l3_add_multicast_ipv6(struct qeth_card *card)
in6_dev_put(in6_dev);
}
-static void qeth_l3_free_vlan_addresses4(struct qeth_card *card,
- unsigned short vid)
-{
- struct in_device *in_dev;
- struct in_ifaddr *ifa;
- struct qeth_ipaddr *addr;
- struct net_device *netdev;
-
- QETH_CARD_TEXT(card, 4, "frvaddr4");
-
- netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q), vid);
- if (!netdev)
- return;
- in_dev = in_dev_get(netdev);
- if (!in_dev)
- return;
-
- addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV4);
- if (!addr)
- goto out;
-
- spin_lock_bh(&card->ip_lock);
-
- for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
- addr->u.a4.addr = be32_to_cpu(ifa->ifa_address);
- addr->u.a4.mask = be32_to_cpu(ifa->ifa_mask);
- addr->type = QETH_IP_TYPE_NORMAL;
- qeth_l3_delete_ip(card, addr);
- }
-
- spin_unlock_bh(&card->ip_lock);
-
- kfree(addr);
-out:
- in_dev_put(in_dev);
-}
-
-static void qeth_l3_free_vlan_addresses6(struct qeth_card *card,
- unsigned short vid)
-{
- struct inet6_dev *in6_dev;
- struct inet6_ifaddr *ifa;
- struct qeth_ipaddr *addr;
- struct net_device *netdev;
-
- QETH_CARD_TEXT(card, 4, "frvaddr6");
-
- netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q), vid);
- if (!netdev)
- return;
-
- in6_dev = in6_dev_get(netdev);
- if (!in6_dev)
- return;
-
- addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV6);
- if (!addr)
- goto out;
-
- spin_lock_bh(&card->ip_lock);
-
- list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
- memcpy(&addr->u.a6.addr, &ifa->addr,
- sizeof(struct in6_addr));
- addr->u.a6.pfxlen = ifa->prefix_len;
- addr->type = QETH_IP_TYPE_NORMAL;
- qeth_l3_delete_ip(card, addr);
- }
-
- spin_unlock_bh(&card->ip_lock);
-
- kfree(addr);
-out:
- in6_dev_put(in6_dev);
-}
-
-static void qeth_l3_free_vlan_addresses(struct qeth_card *card,
- unsigned short vid)
-{
- rcu_read_lock();
- qeth_l3_free_vlan_addresses4(card, vid);
- qeth_l3_free_vlan_addresses6(card, vid);
- rcu_read_unlock();
-}
-
static int qeth_l3_vlan_rx_add_vid(struct net_device *dev,
__be16 proto, u16 vid)
{
@@ -1398,8 +1289,6 @@ static int qeth_l3_vlan_rx_kill_vid(struct net_device *dev,
QETH_CARD_TEXT(card, 3, "kidREC");
return 0;
}
- /* unregister IP addresses of vlan device */
- qeth_l3_free_vlan_addresses(card, vid);
clear_bit(vid, card->active_vlans);
qeth_l3_set_rx_mode(dev);
return 0;
@@ -1454,17 +1343,7 @@ static void qeth_l3_rebuild_skb(struct qeth_card *card, struct sk_buff *skb,
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), tag);
}
- if (card->dev->features & NETIF_F_RXCSUM) {
- if ((hdr->hdr.l3.ext_flags &
- (QETH_HDR_EXT_CSUM_HDR_REQ |
- QETH_HDR_EXT_CSUM_TRANSP_REQ)) ==
- (QETH_HDR_EXT_CSUM_HDR_REQ |
- QETH_HDR_EXT_CSUM_TRANSP_REQ))
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- else
- skb->ip_summed = CHECKSUM_NONE;
- } else
- skb->ip_summed = CHECKSUM_NONE;
+ qeth_rx_csum(card, skb, hdr->hdr.l3.ext_flags);
}
static int qeth_l3_process_inbound_buffer(struct qeth_card *card,
@@ -2210,23 +2089,6 @@ static void qeth_l3_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
rcu_read_unlock();
}
-static void qeth_l3_hdr_csum(struct qeth_card *card, struct qeth_hdr *hdr,
- struct sk_buff *skb)
-{
- struct iphdr *iph = ip_hdr(skb);
-
- /* tcph->check contains already the pseudo hdr checksum
- * so just set the header flags
- */
- if (iph->protocol == IPPROTO_UDP)
- hdr->hdr.l3.ext_flags |= QETH_HDR_EXT_UDP;
- hdr->hdr.l3.ext_flags |= QETH_HDR_EXT_CSUM_TRANSP_REQ |
- QETH_HDR_EXT_CSUM_HDR_REQ;
- iph->check = 0;
- if (card->options.performance_stats)
- card->perf_stats.tx_csum++;
-}
-
static void qeth_tso_fill_header(struct qeth_card *card,
struct qeth_hdr *qhdr, struct sk_buff *skb)
{
@@ -2418,8 +2280,11 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb,
}
}
- if (skb->ip_summed == CHECKSUM_PARTIAL)
- qeth_l3_hdr_csum(card, hdr, new_skb);
+ if (new_skb->ip_summed == CHECKSUM_PARTIAL) {
+ qeth_tx_csum(new_skb, &hdr->hdr.l3.ext_flags, ipv);
+ if (card->options.performance_stats)
+ card->perf_stats.tx_csum++;
+ }
}
elements = use_tso ?
@@ -2620,28 +2485,32 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
(card->info.link_type == QETH_LINK_TYPE_HSTR)) {
pr_info("qeth_l3: ignoring TR device\n");
return -ENODEV;
- } else {
- card->dev = alloc_etherdev(0);
- if (!card->dev)
- return -ENODEV;
- card->dev->netdev_ops = &qeth_l3_osa_netdev_ops;
-
- /*IPv6 address autoconfiguration stuff*/
- qeth_l3_get_unique_id(card);
- if (!(card->info.unique_id & UNIQUE_ID_NOT_BY_CARD))
- card->dev->dev_id = card->info.unique_id &
- 0xffff;
-
- card->dev->hw_features |= NETIF_F_SG;
- card->dev->vlan_features |= NETIF_F_SG;
-
- if (!card->info.guestlan) {
- card->dev->features |= NETIF_F_SG;
- card->dev->hw_features |= NETIF_F_TSO |
- NETIF_F_RXCSUM | NETIF_F_IP_CSUM;
- card->dev->vlan_features |= NETIF_F_TSO |
- NETIF_F_RXCSUM | NETIF_F_IP_CSUM;
- }
+ }
+
+ card->dev = alloc_etherdev(0);
+ if (!card->dev)
+ return -ENODEV;
+ card->dev->netdev_ops = &qeth_l3_osa_netdev_ops;
+
+ /*IPv6 address autoconfiguration stuff*/
+ qeth_l3_get_unique_id(card);
+ if (!(card->info.unique_id & UNIQUE_ID_NOT_BY_CARD))
+ card->dev->dev_id = card->info.unique_id & 0xffff;
+
+ card->dev->hw_features |= NETIF_F_SG;
+ card->dev->vlan_features |= NETIF_F_SG;
+
+ if (!card->info.guestlan) {
+ card->dev->features |= NETIF_F_SG;
+ card->dev->hw_features |= NETIF_F_TSO |
+ NETIF_F_RXCSUM | NETIF_F_IP_CSUM;
+ card->dev->vlan_features |= NETIF_F_TSO |
+ NETIF_F_RXCSUM | NETIF_F_IP_CSUM;
+ }
+
+ if (qeth_is_supported6(card, IPA_OUTBOUND_CHECKSUM_V6)) {
+ card->dev->hw_features |= NETIF_F_IPV6_CSUM;
+ card->dev->vlan_features |= NETIF_F_IPV6_CSUM;
}
} else if (card->info.type == QETH_CARD_TYPE_IQD) {
card->dev = alloc_netdev(0, "hsi%d", NET_NAME_UNKNOWN,
@@ -2663,6 +2532,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
card->dev->mtu = card->info.initial_mtu;
card->dev->min_mtu = 64;
card->dev->max_mtu = ETH_MAX_MTU;
+ card->dev->dev_port = card->info.portno;
card->dev->ethtool_ops = &qeth_l3_ethtool_ops;
card->dev->features |= NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_CTAG_RX |
@@ -2960,9 +2830,6 @@ static int qeth_l3_control_event(struct qeth_card *card,
struct qeth_discipline qeth_l3_discipline = {
.devtype = &qeth_l3_devtype,
- .start_poll = qeth_qdio_start_poll,
- .input_handler = (qdio_handler_t *) qeth_qdio_input_handler,
- .output_handler = (qdio_handler_t *) qeth_qdio_output_handler,
.process_rx_buffer = qeth_l3_process_inbound_buffer,
.recover = qeth_l3_recover,
.setup = qeth_l3_probe_device,
diff --git a/drivers/soc/ti/knav_dma.c b/drivers/soc/ti/knav_dma.c
index 026182d3b27c..224d7ddeeb76 100644
--- a/drivers/soc/ti/knav_dma.c
+++ b/drivers/soc/ti/knav_dma.c
@@ -134,6 +134,13 @@ struct knav_dma_chan {
static struct knav_dma_pool_device *kdev;
+static bool device_ready;
+bool knav_dma_device_ready(void)
+{
+ return device_ready;
+}
+EXPORT_SYMBOL_GPL(knav_dma_device_ready);
+
static bool check_config(struct knav_dma_chan *chan, struct knav_dma_cfg *cfg)
{
if (!memcmp(&chan->cfg, cfg, sizeof(*cfg)))
@@ -773,6 +780,7 @@ static int knav_dma_probe(struct platform_device *pdev)
debugfs_create_file("knav_dma", S_IFREG | S_IRUGO, NULL, NULL,
&knav_dma_debug_ops);
+ device_ready = true;
return ret;
}
diff --git a/drivers/soc/ti/knav_qmss.h b/drivers/soc/ti/knav_qmss.h
index 905b974d1bdc..56866ba4cfc4 100644
--- a/drivers/soc/ti/knav_qmss.h
+++ b/drivers/soc/ti/knav_qmss.h
@@ -292,6 +292,11 @@ struct knav_queue {
struct list_head list;
};
+enum qmss_version {
+ QMSS,
+ QMSS_66AK2G,
+};
+
struct knav_device {
struct device *dev;
unsigned base_id;
@@ -305,6 +310,7 @@ struct knav_device {
struct list_head pools;
struct list_head pdsps;
struct list_head qmgrs;
+ enum qmss_version version;
};
struct knav_range_ops {
diff --git a/drivers/soc/ti/knav_qmss_queue.c b/drivers/soc/ti/knav_qmss_queue.c
index 77d6b5c03aae..419365a8d1c2 100644
--- a/drivers/soc/ti/knav_qmss_queue.c
+++ b/drivers/soc/ti/knav_qmss_queue.c
@@ -42,6 +42,15 @@ static DEFINE_MUTEX(knav_dev_lock);
#define KNAV_QUEUE_PUSH_REG_INDEX 4
#define KNAV_QUEUE_POP_REG_INDEX 5
+/* Queue manager register indices in DTS for QMSS in K2G NAVSS.
+ * There are no status and vbusm push registers on this version
+ * of QMSS. Push registers are same as pop, So all indices above 1
+ * are to be re-defined
+ */
+#define KNAV_L_QUEUE_CONFIG_REG_INDEX 1
+#define KNAV_L_QUEUE_REGION_REG_INDEX 2
+#define KNAV_L_QUEUE_PUSH_REG_INDEX 3
+
/* PDSP register indices in DTS */
#define KNAV_QUEUE_PDSP_IRAM_REG_INDEX 0
#define KNAV_QUEUE_PDSP_REGS_REG_INDEX 1
@@ -65,6 +74,13 @@ static DEFINE_MUTEX(knav_dev_lock);
*/
const char *knav_acc_firmwares[] = {"ks2_qmss_pdsp_acc48.bin"};
+static bool device_ready;
+bool knav_qmss_device_ready(void)
+{
+ return device_ready;
+}
+EXPORT_SYMBOL_GPL(knav_qmss_device_ready);
+
/**
* knav_queue_notify: qmss queue notfier call
*
@@ -1169,8 +1185,12 @@ static int knav_queue_setup_link_ram(struct knav_device *kdev)
dev_dbg(kdev->dev, "linkram0: dma:%pad, virt:%p, size:%x\n",
&block->dma, block->virt, block->size);
writel_relaxed((u32)block->dma, &qmgr->reg_config->link_ram_base0);
- writel_relaxed(block->size, &qmgr->reg_config->link_ram_size0);
-
+ if (kdev->version == QMSS_66AK2G)
+ writel_relaxed(block->size,
+ &qmgr->reg_config->link_ram_size0);
+ else
+ writel_relaxed(block->size - 1,
+ &qmgr->reg_config->link_ram_size0);
block++;
if (!block->size)
continue;
@@ -1387,42 +1407,64 @@ static int knav_queue_init_qmgrs(struct knav_device *kdev,
qmgr->reg_peek =
knav_queue_map_reg(kdev, child,
KNAV_QUEUE_PEEK_REG_INDEX);
- qmgr->reg_status =
- knav_queue_map_reg(kdev, child,
- KNAV_QUEUE_STATUS_REG_INDEX);
+
+ if (kdev->version == QMSS) {
+ qmgr->reg_status =
+ knav_queue_map_reg(kdev, child,
+ KNAV_QUEUE_STATUS_REG_INDEX);
+ }
+
qmgr->reg_config =
knav_queue_map_reg(kdev, child,
+ (kdev->version == QMSS_66AK2G) ?
+ KNAV_L_QUEUE_CONFIG_REG_INDEX :
KNAV_QUEUE_CONFIG_REG_INDEX);
qmgr->reg_region =
knav_queue_map_reg(kdev, child,
+ (kdev->version == QMSS_66AK2G) ?
+ KNAV_L_QUEUE_REGION_REG_INDEX :
KNAV_QUEUE_REGION_REG_INDEX);
+
qmgr->reg_push =
knav_queue_map_reg(kdev, child,
- KNAV_QUEUE_PUSH_REG_INDEX);
- qmgr->reg_pop =
- knav_queue_map_reg(kdev, child,
- KNAV_QUEUE_POP_REG_INDEX);
+ (kdev->version == QMSS_66AK2G) ?
+ KNAV_L_QUEUE_PUSH_REG_INDEX :
+ KNAV_QUEUE_PUSH_REG_INDEX);
+
+ if (kdev->version == QMSS) {
+ qmgr->reg_pop =
+ knav_queue_map_reg(kdev, child,
+ KNAV_QUEUE_POP_REG_INDEX);
+ }
- if (IS_ERR(qmgr->reg_peek) || IS_ERR(qmgr->reg_status) ||
+ if (IS_ERR(qmgr->reg_peek) ||
+ ((kdev->version == QMSS) &&
+ (IS_ERR(qmgr->reg_status) || IS_ERR(qmgr->reg_pop))) ||
IS_ERR(qmgr->reg_config) || IS_ERR(qmgr->reg_region) ||
- IS_ERR(qmgr->reg_push) || IS_ERR(qmgr->reg_pop)) {
+ IS_ERR(qmgr->reg_push)) {
dev_err(dev, "failed to map qmgr regs\n");
+ if (kdev->version == QMSS) {
+ if (!IS_ERR(qmgr->reg_status))
+ devm_iounmap(dev, qmgr->reg_status);
+ if (!IS_ERR(qmgr->reg_pop))
+ devm_iounmap(dev, qmgr->reg_pop);
+ }
if (!IS_ERR(qmgr->reg_peek))
devm_iounmap(dev, qmgr->reg_peek);
- if (!IS_ERR(qmgr->reg_status))
- devm_iounmap(dev, qmgr->reg_status);
if (!IS_ERR(qmgr->reg_config))
devm_iounmap(dev, qmgr->reg_config);
if (!IS_ERR(qmgr->reg_region))
devm_iounmap(dev, qmgr->reg_region);
if (!IS_ERR(qmgr->reg_push))
devm_iounmap(dev, qmgr->reg_push);
- if (!IS_ERR(qmgr->reg_pop))
- devm_iounmap(dev, qmgr->reg_pop);
devm_kfree(dev, qmgr);
continue;
}
+ /* Use same push register for pop as well */
+ if (kdev->version == QMSS_66AK2G)
+ qmgr->reg_pop = qmgr->reg_push;
+
list_add_tail(&qmgr->list, &kdev->qmgrs);
dev_info(dev, "added qmgr start queue %d, num of queues %d, reg_peek %p, reg_status %p, reg_config %p, reg_region %p, reg_push %p, reg_pop %p\n",
qmgr->start_queue, qmgr->num_queues,
@@ -1681,10 +1723,24 @@ static int knav_queue_init_queues(struct knav_device *kdev)
return 0;
}
+/* Match table for of_platform binding */
+static const struct of_device_id keystone_qmss_of_match[] = {
+ {
+ .compatible = "ti,keystone-navigator-qmss",
+ },
+ {
+ .compatible = "ti,66ak2g-navss-qm",
+ .data = (void *)QMSS_66AK2G,
+ },
+ {},
+};
+MODULE_DEVICE_TABLE(of, keystone_qmss_of_match);
+
static int knav_queue_probe(struct platform_device *pdev)
{
struct device_node *node = pdev->dev.of_node;
struct device_node *qmgrs, *queue_pools, *regions, *pdsps;
+ const struct of_device_id *match;
struct device *dev = &pdev->dev;
u32 temp[2];
int ret;
@@ -1700,6 +1756,10 @@ static int knav_queue_probe(struct platform_device *pdev)
return -ENOMEM;
}
+ match = of_match_device(of_match_ptr(keystone_qmss_of_match), dev);
+ if (match && match->data)
+ kdev->version = QMSS_66AK2G;
+
platform_set_drvdata(pdev, kdev);
kdev->dev = dev;
INIT_LIST_HEAD(&kdev->queue_ranges);
@@ -1796,6 +1856,7 @@ static int knav_queue_probe(struct platform_device *pdev)
debugfs_create_file("qmss", S_IFREG | S_IRUGO, NULL, NULL,
&knav_queue_debug_ops);
+ device_ready = true;
return 0;
err:
@@ -1815,13 +1876,6 @@ static int knav_queue_remove(struct platform_device *pdev)
return 0;
}
-/* Match table for of_platform binding */
-static struct of_device_id keystone_qmss_of_match[] = {
- { .compatible = "ti,keystone-navigator-qmss", },
- {},
-};
-MODULE_DEVICE_TABLE(of, keystone_qmss_of_match);
-
static struct platform_driver keystone_qmss_driver = {
.probe = knav_queue_probe,
.remove = knav_queue_remove,
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 986058a57917..c4b49fca4871 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -32,6 +32,7 @@
#include <linux/skbuff.h>
#include <net/sock.h>
+#include <net/xdp.h>
#include "vhost.h"
@@ -45,8 +46,10 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
#define VHOST_NET_WEIGHT 0x80000
/* Max number of packets transferred before requeueing the job.
- * Using this limit prevents one virtqueue from starving rx. */
-#define VHOST_NET_PKT_WEIGHT(vq) ((vq)->num * 2)
+ * Using this limit prevents one virtqueue from starving others with small
+ * pkts.
+ */
+#define VHOST_NET_PKT_WEIGHT 256
/* MAX number of TX used buffers for outstanding zerocopy */
#define VHOST_MAX_PEND 128
@@ -181,10 +184,10 @@ static void vhost_net_buf_unproduce(struct vhost_net_virtqueue *nvq)
static int vhost_net_buf_peek_len(void *ptr)
{
- if (tun_is_xdp_buff(ptr)) {
- struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
+ if (tun_is_xdp_frame(ptr)) {
+ struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);
- return xdp->data_end - xdp->data;
+ return xdpf->len;
}
return __skb_array_len_with_tag(ptr);
@@ -586,7 +589,7 @@ static void handle_tx(struct vhost_net *net)
vhost_zerocopy_signal_used(net, vq);
vhost_net_tx_packet(net);
if (unlikely(total_len >= VHOST_NET_WEIGHT) ||
- unlikely(++sent_pkts >= VHOST_NET_PKT_WEIGHT(vq))) {
+ unlikely(++sent_pkts >= VHOST_NET_PKT_WEIGHT)) {
vhost_poll_queue(&vq->poll);
break;
}
@@ -768,6 +771,7 @@ static void handle_rx(struct vhost_net *net)
struct socket *sock;
struct iov_iter fixup;
__virtio16 num_buffers;
+ int recv_pkts = 0;
mutex_lock_nested(&vq->mutex, 0);
sock = vq->private_data;
@@ -871,7 +875,8 @@ static void handle_rx(struct vhost_net *net)
if (unlikely(vq_log))
vhost_log_write(vq, vq_log, log, vhost_len);
total_len += vhost_len;
- if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
+ if (unlikely(total_len >= VHOST_NET_WEIGHT) ||
+ unlikely(++recv_pkts >= VHOST_NET_PKT_WEIGHT)) {
vhost_poll_queue(&vq->poll);
goto out;
}
diff --git a/include/dt-bindings/net/microchip-lan78xx.h b/include/dt-bindings/net/microchip-lan78xx.h
new file mode 100644
index 000000000000..0742ff075307
--- /dev/null
+++ b/include/dt-bindings/net/microchip-lan78xx.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _DT_BINDINGS_MICROCHIP_LAN78XX_H
+#define _DT_BINDINGS_MICROCHIP_LAN78XX_H
+
+/* LED modes for LAN7800/LAN7850 embedded PHY */
+
+#define LAN78XX_LINK_ACTIVITY 0
+#define LAN78XX_LINK_1000_ACTIVITY 1
+#define LAN78XX_LINK_100_ACTIVITY 2
+#define LAN78XX_LINK_10_ACTIVITY 3
+#define LAN78XX_LINK_100_1000_ACTIVITY 4
+#define LAN78XX_LINK_10_1000_ACTIVITY 5
+#define LAN78XX_LINK_10_100_ACTIVITY 6
+#define LAN78XX_DUPLEX_COLLISION 8
+#define LAN78XX_COLLISION 9
+#define LAN78XX_ACTIVITY 10
+#define LAN78XX_AUTONEG_FAULT 12
+#define LAN78XX_FORCE_LED_OFF 14
+#define LAN78XX_FORCE_LED_ON 15
+
+#endif
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 469b20e1dd7e..38ebbc61ed99 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -22,6 +22,8 @@ struct perf_event;
struct bpf_prog;
struct bpf_map;
struct sock;
+struct seq_file;
+struct btf;
/* map is generic key/value storage optionally accesible by eBPF programs */
struct bpf_map_ops {
@@ -44,10 +46,14 @@ struct bpf_map_ops {
void (*map_fd_put_ptr)(void *ptr);
u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
u32 (*map_fd_sys_lookup_elem)(void *ptr);
+ void (*map_seq_show_elem)(struct bpf_map *map, void *key,
+ struct seq_file *m);
+ int (*map_check_btf)(const struct bpf_map *map, const struct btf *btf,
+ u32 key_type_id, u32 value_type_id);
};
struct bpf_map {
- /* 1st cacheline with read-mostly members of which some
+ /* The first two cachelines with read-mostly members of which some
* are also accessed in fast-path (e.g. ops, max_entries).
*/
const struct bpf_map_ops *ops ____cacheline_aligned;
@@ -63,10 +69,13 @@ struct bpf_map {
u32 pages;
u32 id;
int numa_node;
+ u32 btf_key_id;
+ u32 btf_value_id;
+ struct btf *btf;
bool unpriv_array;
- /* 7 bytes hole */
+ /* 55 bytes hole */
- /* 2nd cacheline with misc members to avoid false sharing
+ /* The 3rd and 4th cacheline with misc members to avoid false sharing
* particularly with refcounting.
*/
struct user_struct *user ____cacheline_aligned;
@@ -101,6 +110,11 @@ static inline struct bpf_offloaded_map *map_to_offmap(struct bpf_map *map)
return container_of(map, struct bpf_offloaded_map, map);
}
+static inline bool bpf_map_support_seq_show(const struct bpf_map *map)
+{
+ return map->ops->map_seq_show_elem && map->ops->map_check_btf;
+}
+
extern const struct bpf_map_ops bpf_map_offload_ops;
/* function argument constraints */
diff --git a/include/linux/btf.h b/include/linux/btf.h
new file mode 100644
index 000000000000..a966dc6d61ee
--- /dev/null
+++ b/include/linux/btf.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+
+#ifndef _LINUX_BTF_H
+#define _LINUX_BTF_H 1
+
+#include <linux/types.h>
+
+struct btf;
+struct btf_type;
+union bpf_attr;
+
+extern const struct file_operations btf_fops;
+
+void btf_put(struct btf *btf);
+int btf_new_fd(const union bpf_attr *attr);
+struct btf *btf_get_by_fd(int fd);
+int btf_get_info_by_fd(const struct btf *btf,
+ const union bpf_attr *attr,
+ union bpf_attr __user *uattr);
+/* Figure out the size of a type_id. If type_id is a modifier
+ * (e.g. const), it will be resolved to find out the type with size.
+ *
+ * For example:
+ * In describing "const void *", type_id is "const" and "const"
+ * refers to "void *". The return type will be "void *".
+ *
+ * If type_id is a simple "int", then return type will be "int".
+ *
+ * @btf: struct btf object
+ * @type_id: Find out the size of type_id. The type_id of the return
+ * type is set to *type_id.
+ * @ret_size: It can be NULL. If not NULL, the size of the return
+ * type is set to *ret_size.
+ * Return: The btf_type (resolved to another type with size info if needed).
+ * NULL is returned if type_id itself does not have size info
+ * (e.g. void) or it cannot be resolved to another type that
+ * has size info.
+ * *type_id and *ret_size will not be changed in the
+ * NULL return case.
+ */
+const struct btf_type *btf_type_id_size(const struct btf *btf,
+ u32 *type_id,
+ u32 *ret_size);
+void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
+ struct seq_file *m);
+
+#endif
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index b32cd2062f18..f8a2245b70ac 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -312,6 +312,9 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
* by kernel. Returns a negative error code or zero.
* @get_fecparam: Get the network device Forward Error Correction parameters.
* @set_fecparam: Set the network device Forward Error Correction parameters.
+ * @get_ethtool_phy_stats: Return extended statistics about the PHY device.
+ * This is only useful if the device maintains PHY statistics and
+ * cannot use the standard PHY library helpers.
*
* All operations are optional (i.e. the function pointer may be set
* to %NULL) and callers must take this into account. Callers must
@@ -407,5 +410,7 @@ struct ethtool_ops {
struct ethtool_fecparam *);
int (*set_fecparam)(struct net_device *,
struct ethtool_fecparam *);
+ void (*get_ethtool_phy_stats)(struct net_device *,
+ struct ethtool_stats *, u64 *);
};
#endif /* _LINUX_ETHTOOL_H */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index fc4e8f91b03d..4da8b2308174 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -30,6 +30,7 @@ struct sock;
struct seccomp_data;
struct bpf_prog_aux;
struct xdp_rxq_info;
+struct xdp_buff;
/* ArgX, context and stack frame pointer register positions. Note,
* Arg1, Arg2, Arg3, etc are used as argument mappings of function
@@ -500,14 +501,6 @@ struct bpf_skb_data_end {
void *data_end;
};
-struct xdp_buff {
- void *data;
- void *data_end;
- void *data_meta;
- void *data_hard_start;
- struct xdp_rxq_info *rxq;
-};
-
struct sk_msg_buff {
void *data;
void *data_end;
@@ -772,21 +765,6 @@ int xdp_do_redirect(struct net_device *dev,
struct bpf_prog *prog);
void xdp_do_flush_map(void);
-/* Drivers not supporting XDP metadata can use this helper, which
- * rejects any room expansion for metadata as a result.
- */
-static __always_inline void
-xdp_set_data_meta_invalid(struct xdp_buff *xdp)
-{
- xdp->data_meta = xdp->data + 1;
-}
-
-static __always_inline bool
-xdp_data_meta_unsupported(const struct xdp_buff *xdp)
-{
- return unlikely(xdp->data_meta > xdp->data);
-}
-
void bpf_warn_invalid_xdp_action(u32 act);
struct sock *do_sk_redirect_map(struct sk_buff *skb);
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 02639ebea2f0..585d27182425 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -93,11 +93,39 @@ static inline bool br_multicast_router(const struct net_device *dev)
#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING)
bool br_vlan_enabled(const struct net_device *dev);
+int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid);
+int br_vlan_get_info(const struct net_device *dev, u16 vid,
+ struct bridge_vlan_info *p_vinfo);
#else
static inline bool br_vlan_enabled(const struct net_device *dev)
{
return false;
}
+
+static inline int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid)
+{
+ return -1;
+}
+
+static inline int br_vlan_get_info(const struct net_device *dev, u16 vid,
+ struct bridge_vlan_info *p_vinfo)
+{
+ return -1;
+}
+#endif
+
+#if IS_ENABLED(CONFIG_BRIDGE)
+struct net_device *br_fdb_find_port(const struct net_device *br_dev,
+ const unsigned char *addr,
+ __u16 vid);
+#else
+static inline struct net_device *
+br_fdb_find_port(const struct net_device *br_dev,
+ const unsigned char *addr,
+ __u16 vid)
+{
+ return NULL;
+}
#endif
#endif
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 4cb7aeeafce0..2e55e4cdbd8a 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -21,7 +21,7 @@ struct macvlan_dev {
struct hlist_node hlist;
struct macvlan_port *port;
struct net_device *lowerdev;
- void *fwd_priv;
+ void *accel_priv;
struct vlan_pcpu_stats __percpu *pcpu_stats;
DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ);
@@ -61,10 +61,6 @@ extern int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack);
-extern void macvlan_count_rx(const struct macvlan_dev *vlan,
- unsigned int len, bool success,
- bool multicast);
-
extern void macvlan_dellink(struct net_device *dev, struct list_head *head);
extern int macvlan_link_register(struct rtnl_link_ops *ops);
@@ -86,4 +82,27 @@ macvlan_dev_real_dev(const struct net_device *dev)
}
#endif
+static inline void *macvlan_accel_priv(struct net_device *dev)
+{
+ struct macvlan_dev *macvlan = netdev_priv(dev);
+
+ return macvlan->accel_priv;
+}
+
+static inline bool macvlan_supports_dest_filter(struct net_device *dev)
+{
+ struct macvlan_dev *macvlan = netdev_priv(dev);
+
+ return macvlan->mode == MACVLAN_MODE_PRIVATE ||
+ macvlan->mode == MACVLAN_MODE_VEPA ||
+ macvlan->mode == MACVLAN_MODE_BRIDGE;
+}
+
+static inline int macvlan_release_l2fw_offload(struct net_device *dev)
+{
+ struct macvlan_dev *macvlan = netdev_priv(dev);
+
+ macvlan->accel_priv = NULL;
+ return dev_uc_add(macvlan->lowerdev, dev->dev_addr);
+}
#endif /* _LINUX_IF_MACVLAN_H */
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index fd00170b494f..3d2996dc7d85 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -22,7 +22,7 @@
#if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
struct socket *tun_get_socket(struct file *);
struct ptr_ring *tun_get_tx_ring(struct file *file);
-bool tun_is_xdp_buff(void *ptr);
+bool tun_is_xdp_frame(void *ptr);
void *tun_xdp_to_ptr(void *ptr);
void *tun_ptr_to_xdp(void *ptr);
void tun_ptr_free(void *ptr);
@@ -39,7 +39,7 @@ static inline struct ptr_ring *tun_get_tx_ring(struct file *f)
{
return ERR_PTR(-EINVAL);
}
-static inline bool tun_is_xdp_buff(void *ptr)
+static inline bool tun_is_xdp_frame(void *ptr)
{
return false;
}
diff --git a/include/linux/mdio-bitbang.h b/include/linux/mdio-bitbang.h
index a8ac9cfa014c..5d71e8a8500f 100644
--- a/include/linux/mdio-bitbang.h
+++ b/include/linux/mdio-bitbang.h
@@ -33,8 +33,6 @@ struct mdiobb_ops {
struct mdiobb_ctrl {
const struct mdiobb_ops *ops;
- /* reset callback */
- int (*reset)(struct mii_bus *bus);
};
/* The returned bus is not yet registered with the phy layer. */
diff --git a/include/linux/mdio-gpio.h b/include/linux/mdio-gpio.h
new file mode 100644
index 000000000000..cea443a672cb
--- /dev/null
+++ b/include/linux/mdio-gpio.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_MDIO_GPIO_H
+#define __LINUX_MDIO_GPIO_H
+
+#define MDIO_GPIO_MDC 0
+#define MDIO_GPIO_MDIO 1
+#define MDIO_GPIO_MDO 2
+
+#endif
diff --git a/include/linux/microchipphy.h b/include/linux/microchipphy.h
index 8f9c90379732..8c40128af240 100644
--- a/include/linux/microchipphy.h
+++ b/include/linux/microchipphy.h
@@ -70,6 +70,9 @@
#define LAN88XX_MMD3_CHIP_ID (32877)
#define LAN88XX_MMD3_CHIP_REV (32878)
+/* Registers specific to the LAN7800/LAN7850 embedded phy */
+#define LAN78XX_PHY_LED_MODE_SELECT (0x1D)
+
/* DSP registers */
#define PHY_ARDENNES_MMD_DEV_3_PHY_CFG (0x806A)
#define PHY_ARDENNES_MMD_DEV_3_PHY_CFG_ZD_DLY_EN_ (0x2000)
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 1aad455538f4..b8918a1da11f 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -356,22 +356,6 @@ struct mlx5_ifc_odp_per_transport_service_cap_bits {
u8 reserved_at_6[0x1a];
};
-struct mlx5_ifc_ipv4_layout_bits {
- u8 reserved_at_0[0x60];
-
- u8 ipv4[0x20];
-};
-
-struct mlx5_ifc_ipv6_layout_bits {
- u8 ipv6[16][0x8];
-};
-
-union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits {
- struct mlx5_ifc_ipv6_layout_bits ipv6_layout;
- struct mlx5_ifc_ipv4_layout_bits ipv4_layout;
- u8 reserved_at_0[0x80];
-};
-
struct mlx5_ifc_fte_match_set_lyr_2_4_bits {
u8 smac_47_16[0x20];
diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h
index ec052491ba3d..193091537cb6 100644
--- a/include/linux/mlx5/mlx5_ifc_fpga.h
+++ b/include/linux/mlx5/mlx5_ifc_fpga.h
@@ -32,12 +32,29 @@
#ifndef MLX5_IFC_FPGA_H
#define MLX5_IFC_FPGA_H
+struct mlx5_ifc_ipv4_layout_bits {
+ u8 reserved_at_0[0x60];
+
+ u8 ipv4[0x20];
+};
+
+struct mlx5_ifc_ipv6_layout_bits {
+ u8 ipv6[16][0x8];
+};
+
+union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits {
+ struct mlx5_ifc_ipv6_layout_bits ipv6_layout;
+ struct mlx5_ifc_ipv4_layout_bits ipv4_layout;
+ u8 reserved_at_0[0x80];
+};
+
enum {
MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX = 0x2c9,
};
enum {
MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC = 0x2,
+ MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_TLS = 0x3,
};
struct mlx5_ifc_fpga_shell_caps_bits {
@@ -370,6 +387,27 @@ struct mlx5_ifc_fpga_destroy_qp_out_bits {
u8 reserved_at_40[0x40];
};
+struct mlx5_ifc_tls_extended_cap_bits {
+ u8 aes_gcm_128[0x1];
+ u8 aes_gcm_256[0x1];
+ u8 reserved_at_2[0x1e];
+ u8 reserved_at_20[0x20];
+ u8 context_capacity_total[0x20];
+ u8 context_capacity_rx[0x20];
+ u8 context_capacity_tx[0x20];
+ u8 reserved_at_a0[0x10];
+ u8 tls_counter_size[0x10];
+ u8 tls_counters_addr_low[0x20];
+ u8 tls_counters_addr_high[0x20];
+ u8 rx[0x1];
+ u8 tx[0x1];
+ u8 tls_v12[0x1];
+ u8 tls_v13[0x1];
+ u8 lro[0x1];
+ u8 ipv6[0x1];
+ u8 reserved_at_106[0x1a];
+};
+
struct mlx5_ifc_ipsec_extended_cap_bits {
u8 encapsulation[0x20];
@@ -519,4 +557,43 @@ struct mlx5_ifc_fpga_ipsec_sa {
__be16 reserved2;
} __packed;
+enum fpga_tls_cmds {
+ CMD_SETUP_STREAM = 0x1001,
+ CMD_TEARDOWN_STREAM = 0x1002,
+};
+
+#define MLX5_TLS_1_2 (0)
+
+#define MLX5_TLS_ALG_AES_GCM_128 (0)
+#define MLX5_TLS_ALG_AES_GCM_256 (1)
+
+struct mlx5_ifc_tls_cmd_bits {
+ u8 command_type[0x20];
+ u8 ipv6[0x1];
+ u8 direction_sx[0x1];
+ u8 tls_version[0x2];
+ u8 reserved[0x1c];
+ u8 swid[0x20];
+ u8 src_port[0x10];
+ u8 dst_port[0x10];
+ union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits src_ipv4_src_ipv6;
+ union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6;
+ u8 tls_rcd_sn[0x40];
+ u8 tcp_sn[0x20];
+ u8 tls_implicit_iv[0x20];
+ u8 tls_xor_iv[0x40];
+ u8 encryption_key[0x100];
+ u8 alg[4];
+ u8 reserved2[0x1c];
+ u8 reserved3[0x4a0];
+};
+
+struct mlx5_ifc_tls_resp_bits {
+ u8 syndrome[0x20];
+ u8 stream_id[0x20];
+ u8 reserverd[0x40];
+};
+
+#define MLX5_TLS_COMMAND_SIZE (0x100)
+
#endif /* MLX5_IFC_FPGA_H */
diff --git a/include/linux/net.h b/include/linux/net.h
index 2248a052061d..6554d3ba4396 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -197,6 +197,7 @@ struct proto_ops {
int offset, size_t size, int flags);
int (*sendmsg_locked)(struct sock *sk, struct msghdr *msg,
size_t size);
+ int (*set_rcvlowat)(struct sock *sk, int val);
};
#define DECLARE_SOCKADDR(type, dst, src) \
diff --git a/include/linux/net_dim.h b/include/linux/net_dim.h
index 29ed8fd6379a..db99240d00bd 100644
--- a/include/linux/net_dim.h
+++ b/include/linux/net_dim.h
@@ -103,11 +103,12 @@ enum {
#define NET_DIM_PARAMS_NUM_PROFILES 5
/* Adaptive moderation profiles */
#define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256
+#define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128
#define NET_DIM_DEF_PROFILE_CQE 1
#define NET_DIM_DEF_PROFILE_EQE 1
/* All profiles sizes must be NET_PARAMS_DIM_NUM_PROFILES */
-#define NET_DIM_EQE_PROFILES { \
+#define NET_DIM_RX_EQE_PROFILES { \
{1, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
{8, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
{64, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
@@ -115,7 +116,7 @@ enum {
{256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
}
-#define NET_DIM_CQE_PROFILES { \
+#define NET_DIM_RX_CQE_PROFILES { \
{2, 256}, \
{8, 128}, \
{16, 64}, \
@@ -123,32 +124,68 @@ enum {
{64, 64} \
}
+#define NET_DIM_TX_EQE_PROFILES { \
+ {1, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
+ {8, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
+ {32, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
+ {64, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
+ {128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE} \
+}
+
+#define NET_DIM_TX_CQE_PROFILES { \
+ {5, 128}, \
+ {8, 64}, \
+ {16, 32}, \
+ {32, 32}, \
+ {64, 32} \
+}
+
static const struct net_dim_cq_moder
-profile[NET_DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
- NET_DIM_EQE_PROFILES,
- NET_DIM_CQE_PROFILES,
+rx_profile[NET_DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
+ NET_DIM_RX_EQE_PROFILES,
+ NET_DIM_RX_CQE_PROFILES,
};
-static inline struct net_dim_cq_moder net_dim_get_profile(u8 cq_period_mode,
- int ix)
+static const struct net_dim_cq_moder
+tx_profile[NET_DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
+ NET_DIM_TX_EQE_PROFILES,
+ NET_DIM_TX_CQE_PROFILES,
+};
+
+static inline struct net_dim_cq_moder
+net_dim_get_rx_moderation(u8 cq_period_mode, int ix)
{
- struct net_dim_cq_moder cq_moder;
+ struct net_dim_cq_moder cq_moder = rx_profile[cq_period_mode][ix];
- cq_moder = profile[cq_period_mode][ix];
cq_moder.cq_period_mode = cq_period_mode;
return cq_moder;
}
-static inline struct net_dim_cq_moder net_dim_get_def_profile(u8 rx_cq_period_mode)
+static inline struct net_dim_cq_moder
+net_dim_get_def_rx_moderation(u8 cq_period_mode)
+{
+ u8 profile_ix = cq_period_mode == NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE ?
+ NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE;
+
+ return net_dim_get_rx_moderation(cq_period_mode, profile_ix);
+}
+
+static inline struct net_dim_cq_moder
+net_dim_get_tx_moderation(u8 cq_period_mode, int ix)
{
- int default_profile_ix;
+ struct net_dim_cq_moder cq_moder = tx_profile[cq_period_mode][ix];
+
+ cq_moder.cq_period_mode = cq_period_mode;
+ return cq_moder;
+}
- if (rx_cq_period_mode == NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE)
- default_profile_ix = NET_DIM_DEF_PROFILE_CQE;
- else /* NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE */
- default_profile_ix = NET_DIM_DEF_PROFILE_EQE;
+static inline struct net_dim_cq_moder
+net_dim_get_def_tx_moderation(u8 cq_period_mode)
+{
+ u8 profile_ix = cq_period_mode == NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE ?
+ NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE;
- return net_dim_get_profile(rx_cq_period_mode, default_profile_ix);
+ return net_dim_get_tx_moderation(cq_period_mode, profile_ix);
}
static inline bool net_dim_on_top(struct net_dim *dim)
diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 35b79f47a13d..c87c3a3453c1 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -55,8 +55,9 @@ enum {
NETIF_F_GSO_SCTP_BIT, /* ... SCTP fragmentation */
NETIF_F_GSO_ESP_BIT, /* ... ESP with TSO */
NETIF_F_GSO_UDP_BIT, /* ... UFO, deprecated except tuntap */
+ NETIF_F_GSO_UDP_L4_BIT, /* ... UDP payload GSO (not UFO) */
/**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */
- NETIF_F_GSO_UDP_BIT,
+ NETIF_F_GSO_UDP_L4_BIT,
NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */
NETIF_F_SCTP_CRC_BIT, /* SCTP checksum offload */
@@ -77,6 +78,7 @@ enum {
NETIF_F_HW_ESP_BIT, /* Hardware ESP transformation offload */
NETIF_F_HW_ESP_TX_CSUM_BIT, /* ESP with TX checksum offload */
NETIF_F_RX_UDP_TUNNEL_PORT_BIT, /* Offload of RX port for UDP tunnels */
+ NETIF_F_HW_TLS_TX_BIT, /* Hardware TLS TX offload */
NETIF_F_GRO_HW_BIT, /* Hardware Generic receive offload */
NETIF_F_HW_TLS_RECORD_BIT, /* Offload TLS record */
@@ -147,6 +149,8 @@ enum {
#define NETIF_F_HW_ESP_TX_CSUM __NETIF_F(HW_ESP_TX_CSUM)
#define NETIF_F_RX_UDP_TUNNEL_PORT __NETIF_F(RX_UDP_TUNNEL_PORT)
#define NETIF_F_HW_TLS_RECORD __NETIF_F(HW_TLS_RECORD)
+#define NETIF_F_GSO_UDP_L4 __NETIF_F(GSO_UDP_L4)
+#define NETIF_F_HW_TLS_TX __NETIF_F(HW_TLS_TX)
#define for_each_netdev_feature(mask_addr, bit) \
for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT)
@@ -216,6 +220,7 @@ enum {
NETIF_F_GSO_GRE_CSUM | \
NETIF_F_GSO_IPXIP4 | \
NETIF_F_GSO_IPXIP6 | \
+ NETIF_F_GSO_UDP_L4 | \
NETIF_F_GSO_UDP_TUNNEL | \
NETIF_F_GSO_UDP_TUNNEL_CSUM)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cf44503ea81a..46dcb5f7522f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -865,6 +865,26 @@ struct xfrmdev_ops {
};
#endif
+#if IS_ENABLED(CONFIG_TLS_DEVICE)
+enum tls_offload_ctx_dir {
+ TLS_OFFLOAD_CTX_DIR_RX,
+ TLS_OFFLOAD_CTX_DIR_TX,
+};
+
+struct tls_crypto_info;
+struct tls_context;
+
+struct tlsdev_ops {
+ int (*tls_dev_add)(struct net_device *netdev, struct sock *sk,
+ enum tls_offload_ctx_dir direction,
+ struct tls_crypto_info *crypto_info,
+ u32 start_offload_tcp_sn);
+ void (*tls_dev_del)(struct net_device *netdev,
+ struct tls_context *ctx,
+ enum tls_offload_ctx_dir direction);
+};
+#endif
+
struct dev_ifalias {
struct rcu_head rcuhead;
char ifalias[];
@@ -1165,7 +1185,7 @@ struct dev_ifalias {
* This function is used to set or query state related to XDP on the
* netdevice and manage BPF offload. See definition of
* enum bpf_netdev_command for details.
- * int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_buff *xdp);
+ * int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_frame *xdp);
* This function is used to submit a XDP packet for transmit on a
* netdevice.
* void (*ndo_xdp_flush)(struct net_device *dev);
@@ -1356,7 +1376,7 @@ struct net_device_ops {
int (*ndo_bpf)(struct net_device *dev,
struct netdev_bpf *bpf);
int (*ndo_xdp_xmit)(struct net_device *dev,
- struct xdp_buff *xdp);
+ struct xdp_frame *xdp);
void (*ndo_xdp_flush)(struct net_device *dev);
};
@@ -1750,6 +1770,10 @@ struct net_device {
const struct xfrmdev_ops *xfrmdev_ops;
#endif
+#if IS_ENABLED(CONFIG_TLS_DEVICE)
+ const struct tlsdev_ops *tlsdev_ops;
+#endif
+
const struct header_ops *header_ops;
unsigned int flags;
@@ -3213,19 +3237,6 @@ static inline int netif_set_xps_queue(struct net_device *dev,
}
#endif
-u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
- unsigned int num_tx_queues);
-
-/*
- * Returns a Tx hash for the given packet when dev->real_num_tx_queues is used
- * as a distribution range limit for the returned value.
- */
-static inline u16 skb_tx_hash(const struct net_device *dev,
- struct sk_buff *skb)
-{
- return __skb_tx_hash(dev, skb, dev->real_num_tx_queues);
-}
-
/**
* netif_is_multiqueue - test if device has multiple transmit queues
* @dev: network device
@@ -4186,6 +4197,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type)
BUILD_BUG_ON(SKB_GSO_SCTP != (NETIF_F_GSO_SCTP >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_ESP != (NETIF_F_GSO_ESP >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_GSO_UDP >> NETIF_F_GSO_SHIFT));
+ BUILD_BUG_ON(SKB_GSO_UDP_L4 != (NETIF_F_GSO_UDP_L4 >> NETIF_F_GSO_SHIFT));
return (features & feature) == feature;
}
diff --git a/include/linux/phy.h b/include/linux/phy.h
index f0b5870a6d40..073235e70442 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1068,6 +1068,52 @@ int __init mdio_bus_init(void);
void mdio_bus_exit(void);
#endif
+/* Inline function for use within net/core/ethtool.c (built-in) */
+static inline int phy_ethtool_get_strings(struct phy_device *phydev, u8 *data)
+{
+ if (!phydev->drv)
+ return -EIO;
+
+ mutex_lock(&phydev->lock);
+ phydev->drv->get_strings(phydev, data);
+ mutex_unlock(&phydev->lock);
+
+ return 0;
+}
+
+static inline int phy_ethtool_get_sset_count(struct phy_device *phydev)
+{
+ int ret;
+
+ if (!phydev->drv)
+ return -EIO;
+
+ if (phydev->drv->get_sset_count &&
+ phydev->drv->get_strings &&
+ phydev->drv->get_stats) {
+ mutex_lock(&phydev->lock);
+ ret = phydev->drv->get_sset_count(phydev);
+ mutex_unlock(&phydev->lock);
+
+ return ret;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static inline int phy_ethtool_get_stats(struct phy_device *phydev,
+ struct ethtool_stats *stats, u64 *data)
+{
+ if (!phydev->drv)
+ return -EIO;
+
+ mutex_lock(&phydev->lock);
+ phydev->drv->get_stats(phydev, stats, data);
+ mutex_unlock(&phydev->lock);
+
+ return 0;
+}
+
extern struct bus_type mdio_bus_type;
struct mdio_board_info {
diff --git a/include/linux/platform_data/mdio-gpio.h b/include/linux/platform_data/mdio-gpio.h
deleted file mode 100644
index 11f00cdabe3d..000000000000
--- a/include/linux/platform_data/mdio-gpio.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * MDIO-GPIO bus platform data structures
- *
- * Copyright (C) 2008, Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-
-#ifndef __LINUX_MDIO_GPIO_H
-#define __LINUX_MDIO_GPIO_H
-
-#include <linux/mdio-bitbang.h>
-
-struct mdio_gpio_platform_data {
- /* GPIO numbers for bus pins */
- unsigned int mdc;
- unsigned int mdio;
- unsigned int mdo;
-
- bool mdc_active_low;
- bool mdio_active_low;
- bool mdo_active_low;
-
- u32 phy_mask;
- u32 phy_ignore_ta_mask;
- int irqs[PHY_MAX_ADDR];
- /* reset callback */
- int (*reset)(struct mii_bus *bus);
-};
-
-#endif /* __LINUX_MDIO_GPIO_H */
diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index 147d08ccf813..7f9756fe9180 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -352,6 +352,7 @@ struct qed_eth_ops {
int (*configure_arfs_searcher)(struct qed_dev *cdev,
enum qed_filter_config_mode mode);
int (*get_coalesce)(struct qed_dev *cdev, u16 *coal, void *handle);
+ int (*req_bulletin_update_mac)(struct qed_dev *cdev, u8 *mac);
};
const struct qed_eth_ops *qed_get_eth_ops(void);
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index b5b2bc9eacca..e53f9c7c2809 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -159,6 +159,9 @@ struct qed_dcbx_get {
enum qed_nvm_images {
QED_NVM_IMAGE_ISCSI_CFG,
QED_NVM_IMAGE_FCOE_CFG,
+ QED_NVM_IMAGE_NVM_CFG1,
+ QED_NVM_IMAGE_DEFAULT_CFG,
+ QED_NVM_IMAGE_NVM_META,
};
struct qed_link_eee_params {
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 1f8ad121eb43..4e1f535c2034 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -836,9 +836,8 @@ out:
*
* It is safe to call this function from atomic context.
*
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
+ * Will trigger an automatic deferred table resizing if residency in the
+ * table grows beyond 70%.
*/
static inline int rhashtable_insert_fast(
struct rhashtable *ht, struct rhash_head *obj,
@@ -866,9 +865,8 @@ static inline int rhashtable_insert_fast(
*
* It is safe to call this function from atomic context.
*
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
+ * Will trigger an automatic deferred table resizing if residency in the
+ * table grows beyond 70%.
*/
static inline int rhltable_insert_key(
struct rhltable *hlt, const void *key, struct rhlist_head *list,
@@ -890,9 +888,8 @@ static inline int rhltable_insert_key(
*
* It is safe to call this function from atomic context.
*
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
+ * Will trigger an automatic deferred table resizing if residency in the
+ * table grows beyond 70%.
*/
static inline int rhltable_insert(
struct rhltable *hlt, struct rhlist_head *list,
@@ -922,9 +919,8 @@ static inline int rhltable_insert(
*
* It is safe to call this function from atomic context.
*
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
+ * Will trigger an automatic deferred table resizing if residency in the
+ * table grows beyond 70%.
*/
static inline int rhashtable_lookup_insert_fast(
struct rhashtable *ht, struct rhash_head *obj,
@@ -981,9 +977,8 @@ static inline void *rhashtable_lookup_get_insert_fast(
*
* Lookups may occur in parallel with hashtable mutations and resizing.
*
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
+ * Will trigger an automatic deferred table resizing if residency in the
+ * table grows beyond 70%.
*
* Returns zero on success.
*/
@@ -1134,8 +1129,8 @@ static inline int __rhashtable_remove_fast(
* walk the bucket chain upon removal. The removal operation is thus
* considerable slow if the hash table is not correctly sized.
*
- * Will automatically shrink the table via rhashtable_expand() if the
- * shrink_decision function specified at rhashtable_init() returns true.
+ * Will automatically shrink the table if permitted when residency drops
+ * below 30%.
*
* Returns zero on success, -ENOENT if the entry could not be found.
*/
@@ -1156,8 +1151,8 @@ static inline int rhashtable_remove_fast(
* walk the bucket chain upon removal. The removal operation is thus
* considerable slow if the hash table is not correctly sized.
*
- * Will automatically shrink the table via rhashtable_expand() if the
- * shrink_decision function specified at rhashtable_init() returns true.
+ * Will automatically shrink the table if permitted when residency drops
+ * below 30%
*
* Returns zero on success, -ENOENT if the entry could not be found.
*/
@@ -1273,8 +1268,9 @@ static inline int rhashtable_walk_init(struct rhashtable *ht,
* For a completely stable walk you should construct your own data
* structure outside the hash table.
*
- * This function may sleep so you must not call it from interrupt
- * context or with spin locks held.
+ * This function may be called from any process context, including
+ * non-preemptable context, but cannot be called from softirq or
+ * hardirq context.
*
* You must call rhashtable_walk_exit after this function returns.
*/
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9065477ed255..908d66e55b14 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -573,6 +573,8 @@ enum {
SKB_GSO_ESP = 1 << 15,
SKB_GSO_UDP = 1 << 16,
+
+ SKB_GSO_UDP_L4 = 1 << 17,
};
#if BITS_PER_LONG > 32
@@ -1032,6 +1034,7 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask);
struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority);
+void skb_copy_header(struct sk_buff *new, const struct sk_buff *old);
struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t priority);
struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
gfp_t gfp_mask, bool fclone);
@@ -3131,6 +3134,7 @@ static inline void *skb_push_rcsum(struct sk_buff *skb, unsigned int len)
return skb->data;
}
+int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len);
/**
* pskb_trim_rcsum - trim received skb and update checksum
* @skb: buffer to trim
@@ -3144,9 +3148,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
{
if (likely(len >= skb->len))
return 0;
- if (skb->ip_summed == CHECKSUM_COMPLETE)
- skb->ip_summed = CHECKSUM_NONE;
- return __pskb_trim(skb, len);
+ return pskb_trim_rcsum_slow(skb, len);
}
static inline int __skb_trim_rcsum(struct sk_buff *skb, unsigned int len)
diff --git a/include/linux/soc/ti/knav_dma.h b/include/linux/soc/ti/knav_dma.h
index 66693bc4c6ad..7127ec301537 100644
--- a/include/linux/soc/ti/knav_dma.h
+++ b/include/linux/soc/ti/knav_dma.h
@@ -167,6 +167,8 @@ struct knav_dma_desc {
void *knav_dma_open_channel(struct device *dev, const char *name,
struct knav_dma_cfg *config);
void knav_dma_close_channel(void *channel);
+int knav_dma_get_flow(void *channel);
+bool knav_dma_device_ready(void);
#else
static inline void *knav_dma_open_channel(struct device *dev, const char *name,
struct knav_dma_cfg *config)
@@ -176,6 +178,16 @@ static inline void *knav_dma_open_channel(struct device *dev, const char *name,
static inline void knav_dma_close_channel(void *channel)
{}
+static inline int knav_dma_get_flow(void *channel)
+{
+ return -EINVAL;
+}
+
+static inline bool knav_dma_device_ready(void)
+{
+ return false;
+}
+
#endif
#endif /* __SOC_TI_KEYSTONE_NAVIGATOR_DMA_H__ */
diff --git a/include/linux/soc/ti/knav_qmss.h b/include/linux/soc/ti/knav_qmss.h
index 9f0ebb3bad27..9745df6ed9d3 100644
--- a/include/linux/soc/ti/knav_qmss.h
+++ b/include/linux/soc/ti/knav_qmss.h
@@ -86,5 +86,6 @@ int knav_pool_desc_map(void *ph, void *desc, unsigned size,
void *knav_pool_desc_unmap(void *ph, dma_addr_t dma, unsigned dma_sz);
dma_addr_t knav_pool_desc_virt_to_dma(void *ph, void *virt);
void *knav_pool_desc_dma_to_virt(void *ph, dma_addr_t dma);
+bool knav_qmss_device_ready(void);
#endif /* __SOC_TI_KNAV_QMSS_H__ */
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 8f4c54986f97..807776928cb8 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -228,7 +228,7 @@ struct tcp_sock {
unused:2;
u8 nonagle : 4,/* Disable Nagle algorithm? */
thin_lto : 1,/* Use linear timeouts for thin streams */
- unused1 : 1,
+ recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */
repair : 1,
frto : 1;/* F-RTO (RFC5682) activated in CA_Loss */
u8 repair_queue;
@@ -281,6 +281,7 @@ struct tcp_sock {
* receiver in Recovery. */
u32 prr_out; /* Total number of pkts sent during Recovery. */
u32 delivered; /* Total data packets delivered incl. rexmits */
+ u32 delivered_ce; /* Like the above but only ECE marked packets */
u32 lost; /* Total data packets lost incl. rexmits */
u32 app_limited; /* limited until "delivered" reaches this val */
u64 first_tx_mstamp; /* start of window send phase */
diff --git a/include/linux/udp.h b/include/linux/udp.h
index eaea63bc79bb..ca840345571b 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -55,6 +55,7 @@ struct udp_sock {
* when the socket is uncorked.
*/
__u16 len; /* total length of pending frames */
+ __u16 gso_size;
/*
* Fields specific to UDP-Lite.
*/
@@ -87,6 +88,8 @@ struct udp_sock {
int forward_deficit;
};
+#define UDP_MAX_SEGMENTS (1 << 6UL)
+
static inline struct udp_sock *udp_sk(const struct sock *sk)
{
return (struct udp_sock *)sk;
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 378d601258be..8312cc25a3af 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -308,6 +308,20 @@ static inline struct inet6_dev *__in6_dev_get(const struct net_device *dev)
}
/**
+ * __in6_dev_get_safely - get inet6_dev pointer from netdevice
+ * @dev: network device
+ *
+ * This is a safer version of __in6_dev_get
+ */
+static inline struct inet6_dev *__in6_dev_get_safely(const struct net_device *dev)
+{
+ if (likely(dev))
+ return rcu_dereference_rtnl(dev->ip6_ptr);
+ else
+ return NULL;
+}
+
+/**
* in6_dev_get - get inet6_dev pointer from netdevice
* @dev: network device
*
diff --git a/include/net/ax88796.h b/include/net/ax88796.h
index b9a3beca0ce4..84b3785d0e66 100644
--- a/include/net/ax88796.h
+++ b/include/net/ax88796.h
@@ -12,6 +12,10 @@
#ifndef __NET_AX88796_PLAT_H
#define __NET_AX88796_PLAT_H
+struct sk_buff;
+struct net_device;
+struct platform_device;
+
#define AXFLG_HAS_EEPROM (1<<0)
#define AXFLG_MAC_FROMDEV (1<<1) /* device already has MAC */
#define AXFLG_HAS_93CX6 (1<<2) /* use eeprom_93cx6 driver */
@@ -26,6 +30,16 @@ struct ax_plat_data {
u32 *reg_offsets; /* register offsets */
u8 *mac_addr; /* MAC addr (only used when
AXFLG_MAC_FROMPLATFORM is used */
+
+ /* uses default ax88796 buffer if set to NULL */
+ void (*block_output)(struct net_device *dev, int count,
+ const unsigned char *buf, int star_page);
+ void (*block_input)(struct net_device *dev, int count,
+ struct sk_buff *skb, int ring_offset);
+ /* returns nonzero if a pending interrupt request might by caused by
+ * the ax88786. Handles all interrupts if set to NULL
+ */
+ int (*check_irq)(struct platform_device *pdev);
};
#endif /* __NET_AX88796_PLAT_H */
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 60fb4ec8ba61..462e9741b210 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -356,10 +356,13 @@ struct dsa_switch_ops {
/*
* ethtool hardware statistics.
*/
- void (*get_strings)(struct dsa_switch *ds, int port, uint8_t *data);
+ void (*get_strings)(struct dsa_switch *ds, int port,
+ u32 stringset, uint8_t *data);
void (*get_ethtool_stats)(struct dsa_switch *ds,
int port, uint64_t *data);
- int (*get_sset_count)(struct dsa_switch *ds, int port);
+ int (*get_sset_count)(struct dsa_switch *ds, int port, int sset);
+ void (*get_ethtool_phy_stats)(struct dsa_switch *ds,
+ int port, uint64_t *data);
/*
* ethtool Wake-on-LAN
@@ -588,4 +591,9 @@ static inline int call_dsa_notifiers(unsigned long val, struct net_device *dev,
#define BRCM_TAG_GET_PORT(v) ((v) >> 8)
#define BRCM_TAG_GET_QUEUE(v) ((v) & 0xff)
+
+int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data);
+int dsa_port_get_ethtool_phy_stats(struct dsa_port *dp, uint64_t *data);
+int dsa_port_get_phy_sset_count(struct dsa_port *dp);
+
#endif
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index e5cfcfc7dd93..b473df5b9512 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -75,7 +75,8 @@ struct fib_rules_ops {
int (*configure)(struct fib_rule *,
struct sk_buff *,
struct fib_rule_hdr *,
- struct nlattr **);
+ struct nlattr **,
+ struct netlink_ext_ack *);
int (*delete)(struct fib_rule *);
int (*compare)(struct fib_rule *,
struct fib_rule_hdr *,
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index d4088d1a688d..db389253dc2a 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -64,7 +64,7 @@ struct inet6_ifaddr {
struct delayed_work dad_work;
struct inet6_dev *idev;
- struct rt6_info *rt;
+ struct fib6_info *rt;
struct hlist_node addr_lst;
struct list_head if_list;
@@ -143,8 +143,7 @@ struct ipv6_ac_socklist {
struct ifacaddr6 {
struct in6_addr aca_addr;
- struct inet6_dev *aca_idev;
- struct rt6_info *aca_rt;
+ struct fib6_info *aca_rt;
struct ifacaddr6 *aca_next;
int aca_users;
refcount_t aca_refcnt;
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index b68fea022a82..2ab6667275df 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -77,6 +77,7 @@ struct inet_connection_sock_af_ops {
* @icsk_af_ops Operations which are AF_INET{4,6} specific
* @icsk_ulp_ops Pluggable ULP control hook
* @icsk_ulp_data ULP private data
+ * @icsk_clean_acked Clean acked data hook
* @icsk_listen_portaddr_node hash to the portaddr listener hashtable
* @icsk_ca_state: Congestion control state
* @icsk_retransmits: Number of unrecovered [RTO] timeouts
@@ -102,6 +103,7 @@ struct inet_connection_sock {
const struct inet_connection_sock_af_ops *icsk_af_ops;
const struct tcp_ulp_ops *icsk_ulp_ops;
void *icsk_ulp_data;
+ void (*icsk_clean_acked)(struct sock *sk, u32 acked_seq);
struct hlist_node icsk_listen_portaddr_node;
unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
__u8 icsk_ca_state:6,
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 0a671c32d6b9..83d5b3c2ac42 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -147,6 +147,7 @@ struct inet_cork {
__u8 ttl;
__s16 tos;
char priority;
+ __u16 gso_size;
};
struct inet_cork_full {
diff --git a/include/net/ip.h b/include/net/ip.h
index ecffd843e7b8..bada1f1f871e 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -76,6 +76,7 @@ struct ipcm_cookie {
__u8 ttl;
__s16 tos;
char priority;
+ __u16 gso_size;
};
#define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))
@@ -171,7 +172,7 @@ struct sk_buff *ip_make_skb(struct sock *sk, struct flowi4 *fl4,
int len, int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen,
struct ipcm_cookie *ipc, struct rtable **rtp,
- unsigned int flags);
+ struct inet_cork *cork, unsigned int flags);
static inline struct sk_buff *ip_finish_skb(struct sock *sk, struct flowi4 *fl4)
{
@@ -396,6 +397,9 @@ static inline unsigned int ip_skb_dst_mtu(struct sock *sk,
return min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU);
}
+int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, int fc_mx_len,
+ u32 *metrics);
+
u32 ip_idents_reserve(u32 hash, int segs);
void __ip_select_ident(struct net *net, struct iphdr *iph, int segs);
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 5e86fd9dc857..1af450d4e923 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -38,6 +38,7 @@
#endif
struct rt6_info;
+struct fib6_info;
struct fib6_config {
u32 fc_table;
@@ -74,12 +75,12 @@ struct fib6_node {
#ifdef CONFIG_IPV6_SUBTREES
struct fib6_node __rcu *subtree;
#endif
- struct rt6_info __rcu *leaf;
+ struct fib6_info __rcu *leaf;
__u16 fn_bit; /* bit key */
__u16 fn_flags;
int fn_sernum;
- struct rt6_info __rcu *rr_ptr;
+ struct fib6_info __rcu *rr_ptr;
struct rcu_head rcu;
};
@@ -94,11 +95,6 @@ struct fib6_gc_args {
#define FIB6_SUBTREE(fn) (rcu_dereference_protected((fn)->subtree, 1))
#endif
-struct mx6_config {
- const u32 *mx;
- DECLARE_BITMAP(mx_valid, RTAX_MAX);
-};
-
/*
* routing information
*
@@ -127,56 +123,71 @@ struct rt6_exception {
#define FIB6_EXCEPTION_BUCKET_SIZE (1 << FIB6_EXCEPTION_BUCKET_SIZE_SHIFT)
#define FIB6_MAX_DEPTH 5
-struct rt6_info {
- struct dst_entry dst;
- struct rt6_info __rcu *rt6_next;
- struct rt6_info *from;
+struct fib6_nh {
+ struct in6_addr nh_gw;
+ struct net_device *nh_dev;
+ struct lwtunnel_state *nh_lwtstate;
- /*
- * Tail elements of dst_entry (__refcnt etc.)
- * and these elements (rarely used in hot path) are in
- * the same cache line.
- */
- struct fib6_table *rt6i_table;
- struct fib6_node __rcu *rt6i_node;
+ unsigned int nh_flags;
+ atomic_t nh_upper_bound;
+ int nh_weight;
+};
- struct in6_addr rt6i_gateway;
+struct fib6_info {
+ struct fib6_table *fib6_table;
+ struct fib6_info __rcu *rt6_next;
+ struct fib6_node __rcu *fib6_node;
/* Multipath routes:
- * siblings is a list of rt6_info that have the the same metric/weight,
+ * siblings is a list of fib6_info that have the the same metric/weight,
* destination, but not the same gateway. nsiblings is just a cache
* to speed up lookup.
*/
- struct list_head rt6i_siblings;
- unsigned int rt6i_nsiblings;
- atomic_t rt6i_nh_upper_bound;
+ struct list_head fib6_siblings;
+ unsigned int fib6_nsiblings;
- atomic_t rt6i_ref;
+ atomic_t fib6_ref;
+ unsigned long expires;
+ struct dst_metrics *fib6_metrics;
+#define fib6_pmtu fib6_metrics->metrics[RTAX_MTU-1]
- unsigned int rt6i_nh_flags;
+ struct rt6key fib6_dst;
+ u32 fib6_flags;
+ struct rt6key fib6_src;
+ struct rt6key fib6_prefsrc;
- /* These are in a separate cache line. */
- struct rt6key rt6i_dst ____cacheline_aligned_in_smp;
- u32 rt6i_flags;
+ struct rt6_info * __percpu *rt6i_pcpu;
+ struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
+
+ u32 fib6_metric;
+ u8 fib6_protocol;
+ u8 fib6_type;
+ u8 exception_bucket_flushed:1,
+ should_flush:1,
+ dst_nocount:1,
+ dst_nopolicy:1,
+ dst_host:1,
+ unused:3;
+
+ struct fib6_nh fib6_nh;
+};
+
+struct rt6_info {
+ struct dst_entry dst;
+ struct fib6_info __rcu *from;
+
+ struct rt6key rt6i_dst;
struct rt6key rt6i_src;
+ struct in6_addr rt6i_gateway;
+ struct inet6_dev *rt6i_idev;
+ u32 rt6i_flags;
struct rt6key rt6i_prefsrc;
struct list_head rt6i_uncached;
struct uncached_list *rt6i_uncached_list;
- struct inet6_dev *rt6i_idev;
- struct rt6_info * __percpu *rt6i_pcpu;
- struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
-
- u32 rt6i_metric;
- u32 rt6i_pmtu;
/* more non-fragment space at head required */
- int rt6i_nh_weight;
unsigned short rt6i_nfheader_len;
- u8 rt6i_protocol;
- u8 exception_bucket_flushed:1,
- should_flush:1,
- unused:6;
};
#define for_each_fib6_node_rt_rcu(fn) \
@@ -192,27 +203,24 @@ static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst)
return ((struct rt6_info *)dst)->rt6i_idev;
}
-static inline void rt6_clean_expires(struct rt6_info *rt)
+static inline void fib6_clean_expires(struct fib6_info *f6i)
{
- rt->rt6i_flags &= ~RTF_EXPIRES;
- rt->dst.expires = 0;
+ f6i->fib6_flags &= ~RTF_EXPIRES;
+ f6i->expires = 0;
}
-static inline void rt6_set_expires(struct rt6_info *rt, unsigned long expires)
+static inline void fib6_set_expires(struct fib6_info *f6i,
+ unsigned long expires)
{
- rt->dst.expires = expires;
- rt->rt6i_flags |= RTF_EXPIRES;
+ f6i->expires = expires;
+ f6i->fib6_flags |= RTF_EXPIRES;
}
-static inline void rt6_update_expires(struct rt6_info *rt0, int timeout)
+static inline bool fib6_check_expired(const struct fib6_info *f6i)
{
- struct rt6_info *rt;
-
- for (rt = rt0; rt && !(rt->rt6i_flags & RTF_EXPIRES); rt = rt->from);
- if (rt && rt != rt0)
- rt0->dst.expires = rt->dst.expires;
- dst_set_expires(&rt0->dst, timeout);
- rt0->rt6i_flags |= RTF_EXPIRES;
+ if (f6i->fib6_flags & RTF_EXPIRES)
+ return time_after(jiffies, f6i->expires);
+ return false;
}
/* Function to safely get fn->sernum for passed in rt
@@ -220,14 +228,13 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout)
* Return true if we can get cookie safely
* Return false if not
*/
-static inline bool rt6_get_cookie_safe(const struct rt6_info *rt,
- u32 *cookie)
+static inline bool fib6_get_cookie_safe(const struct fib6_info *f6i,
+ u32 *cookie)
{
struct fib6_node *fn;
bool status = false;
- rcu_read_lock();
- fn = rcu_dereference(rt->rt6i_node);
+ fn = rcu_dereference(f6i->fib6_node);
if (fn) {
*cookie = fn->fn_sernum;
@@ -236,19 +243,22 @@ static inline bool rt6_get_cookie_safe(const struct rt6_info *rt,
status = true;
}
- rcu_read_unlock();
return status;
}
static inline u32 rt6_get_cookie(const struct rt6_info *rt)
{
+ struct fib6_info *from;
u32 cookie = 0;
- if (rt->rt6i_flags & RTF_PCPU ||
- (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
- rt = rt->from;
+ rcu_read_lock();
+
+ from = rcu_dereference(rt->from);
+ if (from && (rt->rt6i_flags & RTF_PCPU ||
+ unlikely(!list_empty(&rt->rt6i_uncached))))
+ fib6_get_cookie_safe(from, &cookie);
- rt6_get_cookie_safe(rt, &cookie);
+ rcu_read_unlock();
return cookie;
}
@@ -262,20 +272,18 @@ static inline void ip6_rt_put(struct rt6_info *rt)
dst_release(&rt->dst);
}
-void rt6_free_pcpu(struct rt6_info *non_pcpu_rt);
+struct fib6_info *fib6_info_alloc(gfp_t gfp_flags);
+void fib6_info_destroy(struct fib6_info *f6i);
-static inline void rt6_hold(struct rt6_info *rt)
+static inline void fib6_info_hold(struct fib6_info *f6i)
{
- atomic_inc(&rt->rt6i_ref);
+ atomic_inc(&f6i->fib6_ref);
}
-static inline void rt6_release(struct rt6_info *rt)
+static inline void fib6_info_release(struct fib6_info *f6i)
{
- if (atomic_dec_and_test(&rt->rt6i_ref)) {
- rt6_free_pcpu(rt);
- dst_dev_put(&rt->dst);
- dst_release(&rt->dst);
- }
+ if (f6i && atomic_dec_and_test(&f6i->fib6_ref))
+ fib6_info_destroy(f6i);
}
enum fib6_walk_state {
@@ -291,7 +299,7 @@ enum fib6_walk_state {
struct fib6_walker {
struct list_head lh;
struct fib6_node *root, *node;
- struct rt6_info *leaf;
+ struct fib6_info *leaf;
enum fib6_walk_state state;
unsigned int skip;
unsigned int count;
@@ -355,7 +363,7 @@ typedef struct rt6_info *(*pol_lookup_t)(struct net *,
struct fib6_entry_notifier_info {
struct fib_notifier_info info; /* must be first */
- struct rt6_info *rt;
+ struct fib6_info *rt;
};
/*
@@ -377,15 +385,19 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
const struct in6_addr *saddr, int src_len,
bool exact_match);
-void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
+void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *arg),
void *arg);
-int fib6_add(struct fib6_node *root, struct rt6_info *rt,
- struct nl_info *info, struct mx6_config *mxc,
- struct netlink_ext_ack *extack);
-int fib6_del(struct rt6_info *rt, struct nl_info *info);
+int fib6_add(struct fib6_node *root, struct fib6_info *rt,
+ struct nl_info *info, struct netlink_ext_ack *extack);
+int fib6_del(struct fib6_info *rt, struct nl_info *info);
+
+static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i)
+{
+ return f6i->fib6_nh.nh_dev;
+}
-void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
+void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
unsigned int flags);
void fib6_run_gc(unsigned long expires, struct net *net, bool force);
@@ -408,8 +420,14 @@ void __net_exit fib6_notifier_exit(struct net *net);
unsigned int fib6_tables_seq_read(struct net *net);
int fib6_tables_dump(struct net *net, struct notifier_block *nb);
-void fib6_update_sernum(struct rt6_info *rt);
-void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt);
+void fib6_update_sernum(struct net *net, struct fib6_info *rt);
+void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt);
+
+void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val);
+static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric)
+{
+ return !!(f6i->fib6_metrics->metrics[RTAX_LOCK - 1] & (1 << metric));
+}
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
int fib6_rules_init(void);
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 08b132381984..8df4ff798b04 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -66,9 +66,9 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr)
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
}
-static inline bool rt6_qualify_for_ecmp(const struct rt6_info *rt)
+static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i)
{
- return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
+ return (f6i->fib6_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
RTF_GATEWAY;
}
@@ -100,29 +100,29 @@ void ip6_route_cleanup(void);
int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg);
-int ip6_route_add(struct fib6_config *cfg, struct netlink_ext_ack *extack);
-int ip6_ins_rt(struct rt6_info *);
-int ip6_del_rt(struct rt6_info *);
+int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
+ struct netlink_ext_ack *extack);
+int ip6_ins_rt(struct net *net, struct fib6_info *f6i);
+int ip6_del_rt(struct net *net, struct fib6_info *f6i);
-void rt6_flush_exceptions(struct rt6_info *rt);
-int rt6_remove_exception_rt(struct rt6_info *rt);
-void rt6_age_exceptions(struct rt6_info *rt, struct fib6_gc_args *gc_args,
+void rt6_flush_exceptions(struct fib6_info *f6i);
+void rt6_age_exceptions(struct fib6_info *f6i, struct fib6_gc_args *gc_args,
unsigned long now);
-static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
+static inline int ip6_route_get_saddr(struct net *net, struct fib6_info *f6i,
const struct in6_addr *daddr,
unsigned int prefs,
struct in6_addr *saddr)
{
- struct inet6_dev *idev =
- rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
int err = 0;
- if (rt && rt->rt6i_prefsrc.plen)
- *saddr = rt->rt6i_prefsrc.addr;
- else
- err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
- daddr, prefs, saddr);
+ if (f6i && f6i->fib6_prefsrc.plen) {
+ *saddr = f6i->fib6_prefsrc.addr;
+ } else {
+ struct net_device *dev = f6i ? fib6_info_nh_dev(f6i) : NULL;
+
+ err = ipv6_dev_get_saddr(net, dev, daddr, prefs, saddr);
+ }
return err;
}
@@ -137,8 +137,9 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);
void fib6_force_start_gc(struct net *net);
-struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
- const struct in6_addr *addr, bool anycast);
+struct fib6_info *addrconf_f6i_alloc(struct net *net, struct inet6_dev *idev,
+ const struct in6_addr *addr, bool anycast,
+ gfp_t gfp_flags);
struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
int flags);
@@ -147,9 +148,11 @@ struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
* support functions for ND
*
*/
-struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr,
+struct fib6_info *rt6_get_dflt_router(struct net *net,
+ const struct in6_addr *addr,
struct net_device *dev);
-struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
+struct fib6_info *rt6_add_dflt_router(struct net *net,
+ const struct in6_addr *gwaddr,
struct net_device *dev, unsigned int pref);
void rt6_purge_dflt_routers(struct net *net);
@@ -174,14 +177,14 @@ struct rt6_rtnl_dump_arg {
struct net *net;
};
-int rt6_dump_route(struct rt6_info *rt, void *p_arg);
+int rt6_dump_route(struct fib6_info *f6i, void *p_arg);
void rt6_mtu_change(struct net_device *dev, unsigned int mtu);
void rt6_remove_prefsrc(struct inet6_ifaddr *ifp);
void rt6_clean_tohost(struct net *net, struct in6_addr *gateway);
void rt6_sync_up(struct net_device *dev, unsigned int nh_flags);
void rt6_disable_ip(struct net_device *dev, unsigned long event);
void rt6_sync_down_dev(struct net_device *dev, unsigned long event);
-void rt6_multipath_rebalance(struct rt6_info *rt);
+void rt6_multipath_rebalance(struct fib6_info *f6i);
void rt6_uncached_list_add(struct rt6_info *rt);
void rt6_uncached_list_del(struct rt6_info *rt);
@@ -269,12 +272,14 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt,
return daddr;
}
-static inline bool rt6_duplicate_nexthop(struct rt6_info *a, struct rt6_info *b)
+static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *b)
{
- return a->dst.dev == b->dst.dev &&
- a->rt6i_idev == b->rt6i_idev &&
- ipv6_addr_equal(&a->rt6i_gateway, &b->rt6i_gateway) &&
- !lwtunnel_cmp_encap(a->dst.lwtstate, b->dst.lwtstate);
+ return a->fib6_nh.nh_dev == b->fib6_nh.nh_dev &&
+ ipv6_addr_equal(&a->fib6_nh.nh_gw, &b->fib6_nh.nh_gw) &&
+ !lwtunnel_cmp_encap(a->fib6_nh.nh_lwtstate, b->fib6_nh.nh_lwtstate);
}
+struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
+ struct net_device *dev, struct sk_buff *skb,
+ const void *daddr);
#endif
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 540a4b4417bf..751646adc769 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -379,6 +379,17 @@ static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph,
return 0;
}
+static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph,
+ const struct sk_buff *skb)
+{
+ if (skb->protocol == htons(ETH_P_IP))
+ return iph->ttl;
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ return ((const struct ipv6hdr *)iph)->hop_limit;
+ else
+ return 0;
+}
+
/* Propogate ECN bits out */
static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph,
const struct sk_buff *skb)
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 836f31af1369..0a872a7c33c8 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -298,6 +298,7 @@ struct ipcm6_cookie {
__s16 tclass;
__s8 dontfrag;
struct ipv6_txoptions *opt;
+ __u16 gso_size;
};
static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np)
@@ -950,6 +951,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
void *from, int length, int transhdrlen,
struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
struct rt6_info *rt, unsigned int flags,
+ struct inet_cork_full *cork,
const struct sockcm_cookie *sockc);
static inline struct sk_buff *ip6_finish_skb(struct sock *sk)
@@ -1044,8 +1046,6 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info);
void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu);
int inet6_release(struct socket *sock);
-int __inet6_bind(struct sock *sock, struct sockaddr *uaddr, int addr_len,
- bool force_bind_address_no_port, bool with_lock);
int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
int peer);
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index e421f86af043..6c1eecd56a4d 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -246,6 +246,7 @@ static inline void *neighbour_priv(const struct neighbour *n)
#define NEIGH_UPDATE_F_OVERRIDE 0x00000001
#define NEIGH_UPDATE_F_WEAK_OVERRIDE 0x00000002
#define NEIGH_UPDATE_F_OVERRIDE_ISROUTER 0x00000004
+#define NEIGH_UPDATE_F_EXT_LEARNED 0x20000000
#define NEIGH_UPDATE_F_ISROUTER 0x40000000
#define NEIGH_UPDATE_F_ADMIN 0x80000000
@@ -526,5 +527,21 @@ static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n,
} while (read_seqretry(&n->ha_lock, seq));
}
-
+static inline void neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
+ int *notify)
+{
+ u8 ndm_flags = 0;
+
+ if (!(flags & NEIGH_UPDATE_F_ADMIN))
+ return;
+
+ ndm_flags |= (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
+ if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) {
+ if (ndm_flags & NTF_EXT_LEARNED)
+ neigh->flags |= NTF_EXT_LEARNED;
+ else
+ neigh->flags &= ~NTF_EXT_LEARNED;
+ *notify = 1;
+ }
+}
#endif
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index c29f09cfc9d7..c978a31b0f84 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -43,6 +43,7 @@ struct netns_sysctl_ipv6 {
int max_hbh_opts_cnt;
int max_dst_opts_len;
int max_hbh_opts_len;
+ int seg6_flowlabel;
};
struct netns_ipv6 {
@@ -60,7 +61,8 @@ struct netns_ipv6 {
#endif
struct xt_table *ip6table_nat;
#endif
- struct rt6_info *ip6_null_entry;
+ struct fib6_info *fib6_null_entry;
+ struct rt6_info *ip6_null_entry;
struct rt6_statistics *rt6_stats;
struct timer_list ip6_fib_timer;
struct hlist_head *fib_table_hash;
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
new file mode 100644
index 000000000000..c79087153148
--- /dev/null
+++ b/include/net/page_pool.h
@@ -0,0 +1,143 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * page_pool.h
+ * Author: Jesper Dangaard Brouer <netoptimizer@brouer.com>
+ * Copyright (C) 2016 Red Hat, Inc.
+ */
+
+/**
+ * DOC: page_pool allocator
+ *
+ * This page_pool allocator is optimized for the XDP mode that
+ * uses one-frame-per-page, but have fallbacks that act like the
+ * regular page allocator APIs.
+ *
+ * Basic use involve replacing alloc_pages() calls with the
+ * page_pool_alloc_pages() call. Drivers should likely use
+ * page_pool_dev_alloc_pages() replacing dev_alloc_pages().
+ *
+ * If page_pool handles DMA mapping (use page->private), then API user
+ * is responsible for invoking page_pool_put_page() once. In-case of
+ * elevated refcnt, the DMA state is released, assuming other users of
+ * the page will eventually call put_page().
+ *
+ * If no DMA mapping is done, then it can act as shim-layer that
+ * fall-through to alloc_page. As no state is kept on the page, the
+ * regular put_page() call is sufficient.
+ */
+#ifndef _NET_PAGE_POOL_H
+#define _NET_PAGE_POOL_H
+
+#include <linux/mm.h> /* Needed by ptr_ring */
+#include <linux/ptr_ring.h>
+#include <linux/dma-direction.h>
+
+#define PP_FLAG_DMA_MAP 1 /* Should page_pool do the DMA map/unmap */
+#define PP_FLAG_ALL PP_FLAG_DMA_MAP
+
+/*
+ * Fast allocation side cache array/stack
+ *
+ * The cache size and refill watermark is related to the network
+ * use-case. The NAPI budget is 64 packets. After a NAPI poll the RX
+ * ring is usually refilled and the max consumed elements will be 64,
+ * thus a natural max size of objects needed in the cache.
+ *
+ * Keeping room for more objects, is due to XDP_DROP use-case. As
+ * XDP_DROP allows the opportunity to recycle objects directly into
+ * this array, as it shares the same softirq/NAPI protection. If
+ * cache is already full (or partly full) then the XDP_DROP recycles
+ * would have to take a slower code path.
+ */
+#define PP_ALLOC_CACHE_SIZE 128
+#define PP_ALLOC_CACHE_REFILL 64
+struct pp_alloc_cache {
+ u32 count;
+ void *cache[PP_ALLOC_CACHE_SIZE];
+};
+
+struct page_pool_params {
+ unsigned int flags;
+ unsigned int order;
+ unsigned int pool_size;
+ int nid; /* Numa node id to allocate from pages from */
+ struct device *dev; /* device, for DMA pre-mapping purposes */
+ enum dma_data_direction dma_dir; /* DMA mapping direction */
+};
+
+struct page_pool {
+ struct rcu_head rcu;
+ struct page_pool_params p;
+
+ /*
+ * Data structure for allocation side
+ *
+ * Drivers allocation side usually already perform some kind
+ * of resource protection. Piggyback on this protection, and
+ * require driver to protect allocation side.
+ *
+ * For NIC drivers this means, allocate a page_pool per
+ * RX-queue. As the RX-queue is already protected by
+ * Softirq/BH scheduling and napi_schedule. NAPI schedule
+ * guarantee that a single napi_struct will only be scheduled
+ * on a single CPU (see napi_schedule).
+ */
+ struct pp_alloc_cache alloc ____cacheline_aligned_in_smp;
+
+ /* Data structure for storing recycled pages.
+ *
+ * Returning/freeing pages is more complicated synchronization
+ * wise, because free's can happen on remote CPUs, with no
+ * association with allocation resource.
+ *
+ * Use ptr_ring, as it separates consumer and producer
+ * effeciently, it a way that doesn't bounce cache-lines.
+ *
+ * TODO: Implement bulk return pages into this structure.
+ */
+ struct ptr_ring ring;
+};
+
+struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
+
+static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
+{
+ gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
+
+ return page_pool_alloc_pages(pool, gfp);
+}
+
+struct page_pool *page_pool_create(const struct page_pool_params *params);
+
+void page_pool_destroy(struct page_pool *pool);
+
+/* Never call this directly, use helpers below */
+void __page_pool_put_page(struct page_pool *pool,
+ struct page *page, bool allow_direct);
+
+static inline void page_pool_put_page(struct page_pool *pool, struct page *page)
+{
+ /* When page_pool isn't compiled-in, net/core/xdp.c doesn't
+ * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
+ */
+#ifdef CONFIG_PAGE_POOL
+ __page_pool_put_page(pool, page, false);
+#endif
+}
+/* Very limited use-cases allow recycle direct */
+static inline void page_pool_recycle_direct(struct page_pool *pool,
+ struct page *page)
+{
+ __page_pool_put_page(pool, page, true);
+}
+
+static inline bool is_page_pool_compiled_in(void)
+{
+#ifdef CONFIG_PAGE_POOL
+ return true;
+#else
+ return false;
+#endif
+}
+
+#endif /* _NET_PAGE_POOL_H */
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 20ff237c5eb2..86f034b524d4 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -254,11 +254,10 @@ enum { SCTP_ARBITRARY_COOKIE_ECHO_LEN = 200 };
#define SCTP_TSN_MAP_SIZE 4096
/* We will not record more than this many duplicate TSNs between two
- * SACKs. The minimum PMTU is 576. Remove all the headers and there
- * is enough room for 131 duplicate reports. Round down to the
+ * SACKs. The minimum PMTU is 512. Remove all the headers and there
+ * is enough room for 117 duplicate reports. Round down to the
* nearest power of 2.
*/
-enum { SCTP_MIN_PMTU = 576 };
enum { SCTP_MAX_DUP_TSNS = 16 };
enum { SCTP_MAX_GABS = 16 };
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 28b996d63490..f66d44350007 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -428,32 +428,6 @@ static inline int sctp_list_single_entry(struct list_head *head)
return (head->next != head) && (head->next == head->prev);
}
-/* Break down data chunks at this point. */
-static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu)
-{
- struct sctp_sock *sp = sctp_sk(asoc->base.sk);
- struct sctp_af *af = sp->pf->af;
- int frag = pmtu;
-
- frag -= af->ip_options_len(asoc->base.sk);
- frag -= af->net_header_len;
- frag -= sizeof(struct sctphdr) + sctp_datachk_len(&asoc->stream);
-
- if (asoc->user_frag)
- frag = min_t(int, frag, asoc->user_frag);
-
- frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN -
- sctp_datachk_len(&asoc->stream)));
-
- return frag;
-}
-
-static inline void sctp_assoc_pending_pmtu(struct sctp_association *asoc)
-{
- sctp_assoc_sync_pmtu(asoc);
- asoc->pmtu_pending = 0;
-}
-
static inline bool sctp_chunk_pending(const struct sctp_chunk *chunk)
{
return !list_empty(&chunk->list);
@@ -607,17 +581,29 @@ static inline struct dst_entry *sctp_transport_dst_check(struct sctp_transport *
return t->dst;
}
-static inline bool sctp_transport_pmtu_check(struct sctp_transport *t)
+/* Calculate max payload size given a MTU, or the total overhead if
+ * given MTU is zero
+ */
+static inline __u32 sctp_mtu_payload(const struct sctp_sock *sp,
+ __u32 mtu, __u32 extra)
{
- __u32 pmtu = max_t(size_t, SCTP_TRUNC4(dst_mtu(t->dst)),
- SCTP_DEFAULT_MINSEGMENT);
+ __u32 overhead = sizeof(struct sctphdr) + extra;
- if (t->pathmtu == pmtu)
- return true;
+ if (sp)
+ overhead += sp->pf->af->net_header_len;
+ else
+ overhead += sizeof(struct ipv6hdr);
- t->pathmtu = pmtu;
+ if (WARN_ON_ONCE(mtu && mtu <= overhead))
+ mtu = overhead;
- return false;
+ return mtu ? mtu - overhead : overhead;
+}
+
+static inline __u32 sctp_dst_mtu(const struct dst_entry *dst)
+{
+ return SCTP_TRUNC4(max_t(__u32, dst_mtu(dst),
+ SCTP_DEFAULT_MINSEGMENT));
}
#endif /* __net_sctp_h__ */
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 2d0e782c9055..5ef1bad81ef5 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -207,7 +207,7 @@ struct sctp_chunk *sctp_make_datafrag_empty(const struct sctp_association *asoc,
int len, __u8 flags, gfp_t gfp);
struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc,
const __u32 lowest_tsn);
-struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc);
+struct sctp_chunk *sctp_make_sack(struct sctp_association *asoc);
struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc,
const struct sctp_chunk *chunk);
struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc,
@@ -215,7 +215,7 @@ struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc,
struct sctp_chunk *sctp_make_shutdown_complete(
const struct sctp_association *asoc,
const struct sctp_chunk *chunk);
-void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause, size_t paylen);
+int sctp_init_cause(struct sctp_chunk *chunk, __be16 cause, size_t paylen);
struct sctp_chunk *sctp_make_abort(const struct sctp_association *asoc,
const struct sctp_chunk *chunk,
const size_t hint);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index a0ec462bc1a9..ebf809eed33a 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -2091,16 +2091,14 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
enum sctp_transport_cmd command,
sctp_sn_error_t error);
struct sctp_transport *sctp_assoc_lookup_tsn(struct sctp_association *, __u32);
-struct sctp_transport *sctp_assoc_is_match(struct sctp_association *,
- struct net *,
- const union sctp_addr *,
- const union sctp_addr *);
void sctp_assoc_migrate(struct sctp_association *, struct sock *);
int sctp_assoc_update(struct sctp_association *old,
struct sctp_association *new);
__u32 sctp_association_get_next_tsn(struct sctp_association *);
+void sctp_assoc_update_frag_point(struct sctp_association *asoc);
+void sctp_assoc_set_pmtu(struct sctp_association *asoc, __u32 pmtu);
void sctp_assoc_sync_pmtu(struct sctp_association *asoc);
void sctp_assoc_rwnd_increase(struct sctp_association *, unsigned int);
void sctp_assoc_rwnd_decrease(struct sctp_association *, unsigned int);
diff --git a/include/net/sock.h b/include/net/sock.h
index 74d725fdbe0f..3c568b36ee36 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -481,6 +481,11 @@ struct sock {
void (*sk_error_report)(struct sock *sk);
int (*sk_backlog_rcv)(struct sock *sk,
struct sk_buff *skb);
+#ifdef CONFIG_SOCK_VALIDATE_XMIT
+ struct sk_buff* (*sk_validate_xmit_skb)(struct sock *sk,
+ struct net_device *dev,
+ struct sk_buff *skb);
+#endif
void (*sk_destruct)(struct sock *sk);
struct sock_reuseport __rcu *sk_reuseport_cb;
struct rcu_head sk_rcu;
@@ -2332,6 +2337,22 @@ static inline bool sk_fullsock(const struct sock *sk)
return (1 << sk->sk_state) & ~(TCPF_TIME_WAIT | TCPF_NEW_SYN_RECV);
}
+/* Checks if this SKB belongs to an HW offloaded socket
+ * and whether any SW fallbacks are required based on dev.
+ */
+static inline struct sk_buff *sk_validate_xmit_skb(struct sk_buff *skb,
+ struct net_device *dev)
+{
+#ifdef CONFIG_SOCK_VALIDATE_XMIT
+ struct sock *sk = skb->sk;
+
+ if (sk && sk_fullsock(sk) && sk->sk_validate_xmit_skb)
+ skb = sk->sk_validate_xmit_skb(sk, dev, skb);
+#endif
+
+ return skb;
+}
+
/* This helper checks if a socket is a LISTEN or NEW_SYN_RECV
* SYNACK messages can be attached to either ones (depending on SYNCOOKIE)
*/
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 39bc855d7fee..d574ce63bf22 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -155,6 +155,7 @@ struct switchdev_notifier_fdb_info {
struct switchdev_notifier_info info; /* must be first */
const unsigned char *addr;
u16 vid;
+ bool added_by_user;
};
static inline struct net_device *
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 9c9b3768b350..cf803fe0fb86 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -402,6 +402,10 @@ void tcp_set_keepalive(struct sock *sk, int val);
void tcp_syn_ack_timeout(const struct request_sock *req);
int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
int flags, int *addr_len);
+int tcp_set_rcvlowat(struct sock *sk, int val);
+void tcp_data_ready(struct sock *sk);
+int tcp_mmap(struct file *file, struct socket *sock,
+ struct vm_area_struct *vma);
void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
struct tcp_options_received *opt_rx,
int estab, struct tcp_fastopen_cookie *foc);
@@ -2101,4 +2105,12 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
#if IS_ENABLED(CONFIG_SMC)
extern struct static_key_false tcp_have_smc;
#endif
+
+#if IS_ENABLED(CONFIG_TLS_DEVICE)
+void clean_acked_data_enable(struct inet_connection_sock *icsk,
+ void (*cad)(struct sock *sk, u32 ack_seq));
+void clean_acked_data_disable(struct inet_connection_sock *icsk);
+
+#endif
+
#endif /* _TCP_H */
diff --git a/include/net/tls.h b/include/net/tls.h
index b400d0bb7448..ee78f339b4b3 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -83,21 +83,10 @@ struct tls_device {
void (*unhash)(struct tls_device *device, struct sock *sk);
};
-struct tls_sw_context {
+struct tls_sw_context_tx {
struct crypto_aead *aead_send;
- struct crypto_aead *aead_recv;
struct crypto_wait async_wait;
- /* Receive context */
- struct strparser strp;
- void (*saved_data_ready)(struct sock *sk);
- unsigned int (*sk_poll)(struct file *file, struct socket *sock,
- struct poll_table_struct *wait);
- struct sk_buff *recv_pkt;
- u8 control;
- bool decrypted;
-
- /* Sending context */
char aad_space[TLS_AAD_SPACE_SIZE];
unsigned int sg_plaintext_size;
@@ -114,6 +103,50 @@ struct tls_sw_context {
struct scatterlist sg_aead_out[2];
};
+struct tls_sw_context_rx {
+ struct crypto_aead *aead_recv;
+ struct crypto_wait async_wait;
+
+ struct strparser strp;
+ void (*saved_data_ready)(struct sock *sk);
+ unsigned int (*sk_poll)(struct file *file, struct socket *sock,
+ struct poll_table_struct *wait);
+ struct sk_buff *recv_pkt;
+ u8 control;
+ bool decrypted;
+};
+
+struct tls_record_info {
+ struct list_head list;
+ u32 end_seq;
+ int len;
+ int num_frags;
+ skb_frag_t frags[MAX_SKB_FRAGS];
+};
+
+struct tls_offload_context {
+ struct crypto_aead *aead_send;
+ spinlock_t lock; /* protects records list */
+ struct list_head records_list;
+ struct tls_record_info *open_record;
+ struct tls_record_info *retransmit_hint;
+ u64 hint_record_sn;
+ u64 unacked_record_sn;
+
+ struct scatterlist sg_tx_data[MAX_SKB_FRAGS];
+ void (*sk_destruct)(struct sock *sk);
+ u8 driver_state[];
+ /* The TLS layer reserves room for driver specific state
+ * Currently the belief is that there is not enough
+ * driver specific state to justify another layer of indirection
+ */
+#define TLS_DRIVER_STATE_SIZE (max_t(size_t, 8, sizeof(void *)))
+};
+
+#define TLS_OFFLOAD_CONTEXT_SIZE \
+ (ALIGN(sizeof(struct tls_offload_context), sizeof(void *)) + \
+ TLS_DRIVER_STATE_SIZE)
+
enum {
TLS_PENDING_CLOSED_RECORD
};
@@ -138,9 +171,15 @@ struct tls_context {
struct tls12_crypto_info_aes_gcm_128 crypto_recv_aes_gcm_128;
};
- void *priv_ctx;
+ struct list_head list;
+ struct net_device *netdev;
+ refcount_t refcount;
+
+ void *priv_ctx_tx;
+ void *priv_ctx_rx;
- u8 conf:3;
+ u8 tx_conf:3;
+ u8 rx_conf:3;
struct cipher_context tx;
struct cipher_context rx;
@@ -178,7 +217,8 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
int tls_sw_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags);
void tls_sw_close(struct sock *sk, long timeout);
-void tls_sw_free_resources(struct sock *sk);
+void tls_sw_free_resources_tx(struct sock *sk);
+void tls_sw_free_resources_rx(struct sock *sk);
int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len);
unsigned int tls_sw_poll(struct file *file, struct socket *sock,
@@ -187,9 +227,28 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags);
-void tls_sk_destruct(struct sock *sk, struct tls_context *ctx);
-void tls_icsk_clean_acked(struct sock *sk);
+int tls_set_device_offload(struct sock *sk, struct tls_context *ctx);
+int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
+int tls_device_sendpage(struct sock *sk, struct page *page,
+ int offset, size_t size, int flags);
+void tls_device_sk_destruct(struct sock *sk);
+void tls_device_init(void);
+void tls_device_cleanup(void);
+struct tls_record_info *tls_get_record(struct tls_offload_context *context,
+ u32 seq, u64 *p_record_sn);
+
+static inline bool tls_record_is_start_marker(struct tls_record_info *rec)
+{
+ return rec->len == 0;
+}
+
+static inline u32 tls_record_start_seq(struct tls_record_info *rec)
+{
+ return rec->end_seq - rec->len;
+}
+
+void tls_sk_destruct(struct sock *sk, struct tls_context *ctx);
int tls_push_sg(struct sock *sk, struct tls_context *ctx,
struct scatterlist *sg, u16 first_offset,
int flags);
@@ -226,6 +285,13 @@ static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx)
return tls_ctx->pending_open_record_frags;
}
+static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk)
+{
+ return sk_fullsock(sk) &&
+ /* matches smp_store_release in tls_set_device_offload */
+ smp_load_acquire(&sk->sk_destruct) == &tls_device_sk_destruct;
+}
+
static inline void tls_err_abort(struct sock *sk, int err)
{
sk->sk_err = err;
@@ -298,16 +364,22 @@ static inline struct tls_context *tls_get_ctx(const struct sock *sk)
return icsk->icsk_ulp_data;
}
-static inline struct tls_sw_context *tls_sw_ctx(
+static inline struct tls_sw_context_rx *tls_sw_ctx_rx(
const struct tls_context *tls_ctx)
{
- return (struct tls_sw_context *)tls_ctx->priv_ctx;
+ return (struct tls_sw_context_rx *)tls_ctx->priv_ctx_rx;
+}
+
+static inline struct tls_sw_context_tx *tls_sw_ctx_tx(
+ const struct tls_context *tls_ctx)
+{
+ return (struct tls_sw_context_tx *)tls_ctx->priv_ctx_tx;
}
static inline struct tls_offload_context *tls_offload_ctx(
const struct tls_context *tls_ctx)
{
- return (struct tls_offload_context *)tls_ctx->priv_ctx;
+ return (struct tls_offload_context *)tls_ctx->priv_ctx_tx;
}
int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
@@ -315,4 +387,12 @@ int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
void tls_register_device(struct tls_device *device);
void tls_unregister_device(struct tls_device *device);
+struct sk_buff *tls_validate_xmit_skb(struct sock *sk,
+ struct net_device *dev,
+ struct sk_buff *skb);
+
+int tls_sw_fallback_init(struct sock *sk,
+ struct tls_offload_context *offload_ctx,
+ struct tls_crypto_info *crypto_info);
+
#endif /* _TLS_OFFLOAD_H */
diff --git a/include/net/udp.h b/include/net/udp.h
index 0676b272f6ac..05990746810e 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -174,6 +174,10 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
struct udphdr *uh, udp_lookup_t lookup);
int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
+struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
+ netdev_features_t features,
+ unsigned int mss, __sum16 check);
+
static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
{
struct udphdr *uh;
@@ -269,6 +273,7 @@ int udp_abort(struct sock *sk, int err);
int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
int udp_push_pending_frames(struct sock *sk);
void udp_flush_pending_frames(struct sock *sk);
+int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size);
void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst);
int udp_rcv(struct sk_buff *skb);
int udp_ioctl(struct sock *sk, int cmd, unsigned long arg);
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index ad73d8b3fcc2..b99a02ae3934 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -262,6 +262,7 @@ struct vxlan_dev {
#define VXLAN_F_COLLECT_METADATA 0x2000
#define VXLAN_F_GPE 0x4000
#define VXLAN_F_IPV6_LINKLOCAL 0x8000
+#define VXLAN_F_TTL_INHERIT 0x10000
/* Flags that are used in the receive path. These flags must match in
* order for a socket to be shareable
diff --git a/include/net/xdp.h b/include/net/xdp.h
index b2362ddfa694..137ad5f9f40f 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -33,16 +33,99 @@
* also mandatory during RX-ring setup.
*/
+enum xdp_mem_type {
+ MEM_TYPE_PAGE_SHARED = 0, /* Split-page refcnt based model */
+ MEM_TYPE_PAGE_ORDER0, /* Orig XDP full page model */
+ MEM_TYPE_PAGE_POOL,
+ MEM_TYPE_MAX,
+};
+
+struct xdp_mem_info {
+ u32 type; /* enum xdp_mem_type, but known size type */
+ u32 id;
+};
+
+struct page_pool;
+
struct xdp_rxq_info {
struct net_device *dev;
u32 queue_index;
u32 reg_state;
+ struct xdp_mem_info mem;
} ____cacheline_aligned; /* perf critical, avoid false-sharing */
+struct xdp_buff {
+ void *data;
+ void *data_end;
+ void *data_meta;
+ void *data_hard_start;
+ struct xdp_rxq_info *rxq;
+};
+
+struct xdp_frame {
+ void *data;
+ u16 len;
+ u16 headroom;
+ u16 metasize;
+ /* Lifetime of xdp_rxq_info is limited to NAPI/enqueue time,
+ * while mem info is valid on remote CPU.
+ */
+ struct xdp_mem_info mem;
+ struct net_device *dev_rx; /* used by cpumap */
+};
+
+/* Convert xdp_buff to xdp_frame */
+static inline
+struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
+{
+ struct xdp_frame *xdp_frame;
+ int metasize;
+ int headroom;
+
+ /* Assure headroom is available for storing info */
+ headroom = xdp->data - xdp->data_hard_start;
+ metasize = xdp->data - xdp->data_meta;
+ metasize = metasize > 0 ? metasize : 0;
+ if (unlikely((headroom - metasize) < sizeof(*xdp_frame)))
+ return NULL;
+
+ /* Store info in top of packet */
+ xdp_frame = xdp->data_hard_start;
+
+ xdp_frame->data = xdp->data;
+ xdp_frame->len = xdp->data_end - xdp->data;
+ xdp_frame->headroom = headroom - sizeof(*xdp_frame);
+ xdp_frame->metasize = metasize;
+
+ /* rxq only valid until napi_schedule ends, convert to xdp_mem_info */
+ xdp_frame->mem = xdp->rxq->mem;
+
+ return xdp_frame;
+}
+
+void xdp_return_frame(struct xdp_frame *xdpf);
+
int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
struct net_device *dev, u32 queue_index);
void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq);
void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq);
bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq);
+int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
+ enum xdp_mem_type type, void *allocator);
+
+/* Drivers not supporting XDP metadata can use this helper, which
+ * rejects any room expansion for metadata as a result.
+ */
+static __always_inline void
+xdp_set_data_meta_invalid(struct xdp_buff *xdp)
+{
+ xdp->data_meta = xdp->data + 1;
+}
+
+static __always_inline bool
+xdp_data_meta_unsupported(const struct xdp_buff *xdp)
+{
+ return unlikely(xdp->data_meta > xdp->data);
+}
#endif /* __LINUX_NET_XDP_H__ */
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 878b2be7ce77..c1a5284713b8 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -10,6 +10,7 @@
#include <linux/tracepoint.h>
#include <net/ipv6.h>
#include <net/tcp.h>
+#include <linux/sock_diag.h>
#define TP_STORE_V4MAPPED(__entry, saddr, daddr) \
do { \
@@ -113,7 +114,7 @@ DEFINE_EVENT(tcp_event_sk_skb, tcp_send_reset,
*/
DECLARE_EVENT_CLASS(tcp_event_sk,
- TP_PROTO(const struct sock *sk),
+ TP_PROTO(struct sock *sk),
TP_ARGS(sk),
@@ -125,6 +126,7 @@ DECLARE_EVENT_CLASS(tcp_event_sk,
__array(__u8, daddr, 4)
__array(__u8, saddr_v6, 16)
__array(__u8, daddr_v6, 16)
+ __field(__u64, sock_cookie)
),
TP_fast_assign(
@@ -144,73 +146,36 @@ DECLARE_EVENT_CLASS(tcp_event_sk,
TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
+
+ __entry->sock_cookie = sock_gen_cookie(sk);
),
- TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
+ TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c sock_cookie=%llx",
__entry->sport, __entry->dport,
__entry->saddr, __entry->daddr,
- __entry->saddr_v6, __entry->daddr_v6)
+ __entry->saddr_v6, __entry->daddr_v6,
+ __entry->sock_cookie)
);
DEFINE_EVENT(tcp_event_sk, tcp_receive_reset,
- TP_PROTO(const struct sock *sk),
+ TP_PROTO(struct sock *sk),
TP_ARGS(sk)
);
DEFINE_EVENT(tcp_event_sk, tcp_destroy_sock,
- TP_PROTO(const struct sock *sk),
+ TP_PROTO(struct sock *sk),
TP_ARGS(sk)
);
-TRACE_EVENT(tcp_set_state,
-
- TP_PROTO(const struct sock *sk, const int oldstate, const int newstate),
-
- TP_ARGS(sk, oldstate, newstate),
-
- TP_STRUCT__entry(
- __field(const void *, skaddr)
- __field(int, oldstate)
- __field(int, newstate)
- __field(__u16, sport)
- __field(__u16, dport)
- __array(__u8, saddr, 4)
- __array(__u8, daddr, 4)
- __array(__u8, saddr_v6, 16)
- __array(__u8, daddr_v6, 16)
- ),
-
- TP_fast_assign(
- struct inet_sock *inet = inet_sk(sk);
- __be32 *p32;
-
- __entry->skaddr = sk;
- __entry->oldstate = oldstate;
- __entry->newstate = newstate;
-
- __entry->sport = ntohs(inet->inet_sport);
- __entry->dport = ntohs(inet->inet_dport);
+DEFINE_EVENT(tcp_event_sk, tcp_rcv_space_adjust,
- p32 = (__be32 *) __entry->saddr;
- *p32 = inet->inet_saddr;
+ TP_PROTO(struct sock *sk),
- p32 = (__be32 *) __entry->daddr;
- *p32 = inet->inet_daddr;
-
- TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
- sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
- ),
-
- TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c oldstate=%s newstate=%s",
- __entry->sport, __entry->dport,
- __entry->saddr, __entry->daddr,
- __entry->saddr_v6, __entry->daddr_v6,
- show_tcp_state_name(__entry->oldstate),
- show_tcp_state_name(__entry->newstate))
+ TP_ARGS(sk)
);
TRACE_EVENT(tcp_retransmit_synack,
@@ -279,6 +244,7 @@ TRACE_EVENT(tcp_probe,
__field(__u32, snd_wnd)
__field(__u32, srtt)
__field(__u32, rcv_wnd)
+ __field(__u64, sock_cookie)
),
TP_fast_assign(
@@ -303,15 +269,14 @@ TRACE_EVENT(tcp_probe,
__entry->rcv_wnd = tp->rcv_wnd;
__entry->ssthresh = tcp_current_ssthresh(sk);
__entry->srtt = tp->srtt_us >> 3;
+ __entry->sock_cookie = sock_gen_cookie(sk);
),
- TP_printk("src=%pISpc dest=%pISpc mark=%#x length=%d snd_nxt=%#x "
- "snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u "
- "rcv_wnd=%u",
+ TP_printk("src=%pISpc dest=%pISpc mark=%#x length=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx",
__entry->saddr, __entry->daddr, __entry->mark,
__entry->length, __entry->snd_nxt, __entry->snd_una,
__entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd,
- __entry->srtt, __entry->rcv_wnd)
+ __entry->srtt, __entry->rcv_wnd, __entry->sock_cookie)
);
#endif /* _TRACE_TCP_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c5ec89732a8d..da77a9388947 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -95,6 +95,7 @@ enum bpf_cmd {
BPF_OBJ_GET_INFO_BY_FD,
BPF_PROG_QUERY,
BPF_RAW_TRACEPOINT_OPEN,
+ BPF_BTF_LOAD,
};
enum bpf_map_type {
@@ -279,6 +280,9 @@ union bpf_attr {
*/
char map_name[BPF_OBJ_NAME_LEN];
__u32 map_ifindex; /* ifindex of netdev to create on */
+ __u32 btf_fd; /* fd pointing to a BTF type data */
+ __u32 btf_key_id; /* BTF type_id of the key */
+ __u32 btf_value_id; /* BTF type_id of the value */
};
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -363,398 +367,1406 @@ union bpf_attr {
__u64 name;
__u32 prog_fd;
} raw_tracepoint;
+
+ struct { /* anonymous struct for BPF_BTF_LOAD */
+ __aligned_u64 btf;
+ __aligned_u64 btf_log_buf;
+ __u32 btf_size;
+ __u32 btf_log_size;
+ __u32 btf_log_level;
+ };
} __attribute__((aligned(8)));
-/* BPF helper function descriptions:
- *
- * void *bpf_map_lookup_elem(&map, &key)
- * Return: Map value or NULL
- *
- * int bpf_map_update_elem(&map, &key, &value, flags)
- * Return: 0 on success or negative error
- *
- * int bpf_map_delete_elem(&map, &key)
- * Return: 0 on success or negative error
- *
- * int bpf_probe_read(void *dst, int size, void *src)
- * Return: 0 on success or negative error
+/* The description below is an attempt at providing documentation to eBPF
+ * developers about the multiple available eBPF helper functions. It can be
+ * parsed and used to produce a manual page. The workflow is the following,
+ * and requires the rst2man utility:
+ *
+ * $ ./scripts/bpf_helpers_doc.py \
+ * --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
+ * $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
+ * $ man /tmp/bpf-helpers.7
+ *
+ * Note that in order to produce this external documentation, some RST
+ * formatting is used in the descriptions to get "bold" and "italics" in
+ * manual pages. Also note that the few trailing white spaces are
+ * intentional, removing them would break paragraphs for rst2man.
+ *
+ * Start of BPF helper function descriptions:
+ *
+ * void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+ * Description
+ * Perform a lookup in *map* for an entry associated to *key*.
+ * Return
+ * Map value associated to *key*, or **NULL** if no entry was
+ * found.
+ *
+ * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
+ * Description
+ * Add or update the value of the entry associated to *key* in
+ * *map* with *value*. *flags* is one of:
+ *
+ * **BPF_NOEXIST**
+ * The entry for *key* must not exist in the map.
+ * **BPF_EXIST**
+ * The entry for *key* must already exist in the map.
+ * **BPF_ANY**
+ * No condition on the existence of the entry for *key*.
+ *
+ * Flag value **BPF_NOEXIST** cannot be used for maps of types
+ * **BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY** (all
+ * elements always exist), the helper would return an error.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_delete_elem(struct bpf_map *map, const void *key)
+ * Description
+ * Delete entry with *key* from *map*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read(void *dst, u32 size, const void *src)
+ * Description
+ * For tracing programs, safely attempt to read *size* bytes from
+ * address *src* and store the data in *dst*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
*
* u64 bpf_ktime_get_ns(void)
- * Return: current ktime
- *
- * int bpf_trace_printk(const char *fmt, int fmt_size, ...)
- * Return: length of buffer written or negative error
- *
- * u32 bpf_prandom_u32(void)
- * Return: random value
- *
- * u32 bpf_raw_smp_processor_id(void)
- * Return: SMP processor ID
- *
- * int bpf_skb_store_bytes(skb, offset, from, len, flags)
- * store bytes into packet
- * @skb: pointer to skb
- * @offset: offset within packet from skb->mac_header
- * @from: pointer where to copy bytes from
- * @len: number of bytes to store into packet
- * @flags: bit 0 - if true, recompute skb->csum
- * other bits - reserved
- * Return: 0 on success or negative error
- *
- * int bpf_l3_csum_replace(skb, offset, from, to, flags)
- * recompute IP checksum
- * @skb: pointer to skb
- * @offset: offset within packet where IP checksum is located
- * @from: old value of header field
- * @to: new value of header field
- * @flags: bits 0-3 - size of header field
- * other bits - reserved
- * Return: 0 on success or negative error
- *
- * int bpf_l4_csum_replace(skb, offset, from, to, flags)
- * recompute TCP/UDP checksum
- * @skb: pointer to skb
- * @offset: offset within packet where TCP/UDP checksum is located
- * @from: old value of header field
- * @to: new value of header field
- * @flags: bits 0-3 - size of header field
- * bit 4 - is pseudo header
- * other bits - reserved
- * Return: 0 on success or negative error
- *
- * int bpf_tail_call(ctx, prog_array_map, index)
- * jump into another BPF program
- * @ctx: context pointer passed to next program
- * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
- * @index: 32-bit index inside array that selects specific program to run
- * Return: 0 on success or negative error
- *
- * int bpf_clone_redirect(skb, ifindex, flags)
- * redirect to another netdev
- * @skb: pointer to skb
- * @ifindex: ifindex of the net device
- * @flags: bit 0 - if set, redirect to ingress instead of egress
- * other bits - reserved
- * Return: 0 on success or negative error
+ * Description
+ * Return the time elapsed since system boot, in nanoseconds.
+ * Return
+ * Current *ktime*.
+ *
+ * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...)
+ * Description
+ * This helper is a "printk()-like" facility for debugging. It
+ * prints a message defined by format *fmt* (of size *fmt_size*)
+ * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if
+ * available. It can take up to three additional **u64**
+ * arguments (as an eBPF helpers, the total number of arguments is
+ * limited to five).
+ *
+ * Each time the helper is called, it appends a line to the trace.
+ * The format of the trace is customizable, and the exact output
+ * one will get depends on the options set in
+ * *\/sys/kernel/debug/tracing/trace_options* (see also the
+ * *README* file under the same directory). However, it usually
+ * defaults to something like:
+ *
+ * ::
+ *
+ * telnet-470 [001] .N.. 419421.045894: 0x00000001: <formatted msg>
+ *
+ * In the above:
+ *
+ * * ``telnet`` is the name of the current task.
+ * * ``470`` is the PID of the current task.
+ * * ``001`` is the CPU number on which the task is
+ * running.
+ * * In ``.N..``, each character refers to a set of
+ * options (whether irqs are enabled, scheduling
+ * options, whether hard/softirqs are running, level of
+ * preempt_disabled respectively). **N** means that
+ * **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED**
+ * are set.
+ * * ``419421.045894`` is a timestamp.
+ * * ``0x00000001`` is a fake value used by BPF for the
+ * instruction pointer register.
+ * * ``<formatted msg>`` is the message formatted with
+ * *fmt*.
+ *
+ * The conversion specifiers supported by *fmt* are similar, but
+ * more limited than for printk(). They are **%d**, **%i**,
+ * **%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**,
+ * **%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size
+ * of field, padding with zeroes, etc.) is available, and the
+ * helper will return **-EINVAL** (but print nothing) if it
+ * encounters an unknown specifier.
+ *
+ * Also, note that **bpf_trace_printk**\ () is slow, and should
+ * only be used for debugging purposes. For this reason, a notice
+ * bloc (spanning several lines) is printed to kernel logs and
+ * states that the helper should not be used "for production use"
+ * the first time this helper is used (or more precisely, when
+ * **trace_printk**\ () buffers are allocated). For passing values
+ * to user space, perf events should be preferred.
+ * Return
+ * The number of bytes written to the buffer, or a negative error
+ * in case of failure.
+ *
+ * u32 bpf_get_prandom_u32(void)
+ * Description
+ * Get a pseudo-random number.
+ *
+ * From a security point of view, this helper uses its own
+ * pseudo-random internal state, and cannot be used to infer the
+ * seed of other random functions in the kernel. However, it is
+ * essential to note that the generator used by the helper is not
+ * cryptographically secure.
+ * Return
+ * A random 32-bit unsigned value.
+ *
+ * u32 bpf_get_smp_processor_id(void)
+ * Description
+ * Get the SMP (symmetric multiprocessing) processor id. Note that
+ * all programs run with preemption disabled, which means that the
+ * SMP processor id is stable during all the execution of the
+ * program.
+ * Return
+ * The SMP id of the processor running the program.
+ *
+ * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
+ * Description
+ * Store *len* bytes from address *from* into the packet
+ * associated to *skb*, at *offset*. *flags* are a combination of
+ * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the
+ * checksum for the packet after storing the bytes) and
+ * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\
+ * **->swhash** and *skb*\ **->l4hash** to 0).
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size)
+ * Description
+ * Recompute the layer 3 (e.g. IP) checksum for the packet
+ * associated to *skb*. Computation is incremental, so the helper
+ * must know the former value of the header field that was
+ * modified (*from*), the new value of this field (*to*), and the
+ * number of bytes (2 or 4) for this field, stored in *size*.
+ * Alternatively, it is possible to store the difference between
+ * the previous and the new values of the header field in *to*, by
+ * setting *from* and *size* to 0. For both methods, *offset*
+ * indicates the location of the IP checksum within the packet.
+ *
+ * This helper works in combination with **bpf_csum_diff**\ (),
+ * which does not update the checksum in-place, but offers more
+ * flexibility and can handle sizes larger than 2 or 4 for the
+ * checksum to update.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags)
+ * Description
+ * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the
+ * packet associated to *skb*. Computation is incremental, so the
+ * helper must know the former value of the header field that was
+ * modified (*from*), the new value of this field (*to*), and the
+ * number of bytes (2 or 4) for this field, stored on the lowest
+ * four bits of *flags*. Alternatively, it is possible to store
+ * the difference between the previous and the new values of the
+ * header field in *to*, by setting *from* and the four lowest
+ * bits of *flags* to 0. For both methods, *offset* indicates the
+ * location of the IP checksum within the packet. In addition to
+ * the size of the field, *flags* can be added (bitwise OR) actual
+ * flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left
+ * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and
+ * for updates resulting in a null checksum the value is set to
+ * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
+ * the checksum is to be computed against a pseudo-header.
+ *
+ * This helper works in combination with **bpf_csum_diff**\ (),
+ * which does not update the checksum in-place, but offers more
+ * flexibility and can handle sizes larger than 2 or 4 for the
+ * checksum to update.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index)
+ * Description
+ * This special helper is used to trigger a "tail call", or in
+ * other words, to jump into another eBPF program. The same stack
+ * frame is used (but values on stack and in registers for the
+ * caller are not accessible to the callee). This mechanism allows
+ * for program chaining, either for raising the maximum number of
+ * available eBPF instructions, or to execute given programs in
+ * conditional blocks. For security reasons, there is an upper
+ * limit to the number of successive tail calls that can be
+ * performed.
+ *
+ * Upon call of this helper, the program attempts to jump into a
+ * program referenced at index *index* in *prog_array_map*, a
+ * special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes
+ * *ctx*, a pointer to the context.
+ *
+ * If the call succeeds, the kernel immediately runs the first
+ * instruction of the new program. This is not a function call,
+ * and it never returns to the previous program. If the call
+ * fails, then the helper has no effect, and the caller continues
+ * to run its subsequent instructions. A call can fail if the
+ * destination program for the jump does not exist (i.e. *index*
+ * is superior to the number of entries in *prog_array_map*), or
+ * if the maximum number of tail calls has been reached for this
+ * chain of programs. This limit is defined in the kernel by the
+ * macro **MAX_TAIL_CALL_CNT** (not accessible to user space),
+ * which is currently set to 32.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags)
+ * Description
+ * Clone and redirect the packet associated to *skb* to another
+ * net device of index *ifindex*. Both ingress and egress
+ * interfaces can be used for redirection. The **BPF_F_INGRESS**
+ * value in *flags* is used to make the distinction (ingress path
+ * is selected if the flag is present, egress path otherwise).
+ * This is the only flag supported for now.
+ *
+ * In comparison with **bpf_redirect**\ () helper,
+ * **bpf_clone_redirect**\ () has the associated cost of
+ * duplicating the packet buffer, but this can be executed out of
+ * the eBPF program. Conversely, **bpf_redirect**\ () is more
+ * efficient, but it is handled through an action code where the
+ * redirection happens only after the eBPF program has returned.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
*
* u64 bpf_get_current_pid_tgid(void)
- * Return: current->tgid << 32 | current->pid
+ * Return
+ * A 64-bit integer containing the current tgid and pid, and
+ * created as such:
+ * *current_task*\ **->tgid << 32 \|**
+ * *current_task*\ **->pid**.
*
* u64 bpf_get_current_uid_gid(void)
- * Return: current_gid << 32 | current_uid
- *
- * int bpf_get_current_comm(char *buf, int size_of_buf)
- * stores current->comm into buf
- * Return: 0 on success or negative error
- *
- * u32 bpf_get_cgroup_classid(skb)
- * retrieve a proc's classid
- * @skb: pointer to skb
- * Return: classid if != 0
- *
- * int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci)
- * Return: 0 on success or negative error
- *
- * int bpf_skb_vlan_pop(skb)
- * Return: 0 on success or negative error
- *
- * int bpf_skb_get_tunnel_key(skb, key, size, flags)
- * int bpf_skb_set_tunnel_key(skb, key, size, flags)
- * retrieve or populate tunnel metadata
- * @skb: pointer to skb
- * @key: pointer to 'struct bpf_tunnel_key'
- * @size: size of 'struct bpf_tunnel_key'
- * @flags: room for future extensions
- * Return: 0 on success or negative error
- *
- * u64 bpf_perf_event_read(map, flags)
- * read perf event counter value
- * @map: pointer to perf_event_array map
- * @flags: index of event in the map or bitmask flags
- * Return: value of perf event counter read or error code
- *
- * int bpf_redirect(ifindex, flags)
- * redirect to another netdev
- * @ifindex: ifindex of the net device
- * @flags:
- * cls_bpf:
- * bit 0 - if set, redirect to ingress instead of egress
- * other bits - reserved
- * xdp_bpf:
- * all bits - reserved
- * Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error
- * xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error
- * int bpf_redirect_map(map, key, flags)
- * redirect to endpoint in map
- * @map: pointer to dev map
- * @key: index in map to lookup
- * @flags: --
- * Return: XDP_REDIRECT on success or XDP_ABORT on error
- *
- * u32 bpf_get_route_realm(skb)
- * retrieve a dst's tclassid
- * @skb: pointer to skb
- * Return: realm if != 0
- *
- * int bpf_perf_event_output(ctx, map, flags, data, size)
- * output perf raw sample
- * @ctx: struct pt_regs*
- * @map: pointer to perf_event_array map
- * @flags: index of event in the map or bitmask flags
- * @data: data on stack to be output as raw data
- * @size: size of data
- * Return: 0 on success or negative error
- *
- * int bpf_get_stackid(ctx, map, flags)
- * walk user or kernel stack and return id
- * @ctx: struct pt_regs*
- * @map: pointer to stack_trace map
- * @flags: bits 0-7 - numer of stack frames to skip
- * bit 8 - collect user stack instead of kernel
- * bit 9 - compare stacks by hash only
- * bit 10 - if two different stacks hash into the same stackid
- * discard old
- * other bits - reserved
- * Return: >= 0 stackid on success or negative error
- *
- * s64 bpf_csum_diff(from, from_size, to, to_size, seed)
- * calculate csum diff
- * @from: raw from buffer
- * @from_size: length of from buffer
- * @to: raw to buffer
- * @to_size: length of to buffer
- * @seed: optional seed
- * Return: csum result or negative error code
- *
- * int bpf_skb_get_tunnel_opt(skb, opt, size)
- * retrieve tunnel options metadata
- * @skb: pointer to skb
- * @opt: pointer to raw tunnel option data
- * @size: size of @opt
- * Return: option size
- *
- * int bpf_skb_set_tunnel_opt(skb, opt, size)
- * populate tunnel options metadata
- * @skb: pointer to skb
- * @opt: pointer to raw tunnel option data
- * @size: size of @opt
- * Return: 0 on success or negative error
- *
- * int bpf_skb_change_proto(skb, proto, flags)
- * Change protocol of the skb. Currently supported is v4 -> v6,
- * v6 -> v4 transitions. The helper will also resize the skb. eBPF
- * program is expected to fill the new headers via skb_store_bytes
- * and lX_csum_replace.
- * @skb: pointer to skb
- * @proto: new skb->protocol type
- * @flags: reserved
- * Return: 0 on success or negative error
- *
- * int bpf_skb_change_type(skb, type)
- * Change packet type of skb.
- * @skb: pointer to skb
- * @type: new skb->pkt_type type
- * Return: 0 on success or negative error
- *
- * int bpf_skb_under_cgroup(skb, map, index)
- * Check cgroup2 membership of skb
- * @skb: pointer to skb
- * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
- * @index: index of the cgroup in the bpf_map
- * Return:
- * == 0 skb failed the cgroup2 descendant test
- * == 1 skb succeeded the cgroup2 descendant test
- * < 0 error
- *
- * u32 bpf_get_hash_recalc(skb)
- * Retrieve and possibly recalculate skb->hash.
- * @skb: pointer to skb
- * Return: hash
+ * Return
+ * A 64-bit integer containing the current GID and UID, and
+ * created as such: *current_gid* **<< 32 \|** *current_uid*.
+ *
+ * int bpf_get_current_comm(char *buf, u32 size_of_buf)
+ * Description
+ * Copy the **comm** attribute of the current task into *buf* of
+ * *size_of_buf*. The **comm** attribute contains the name of
+ * the executable (excluding the path) for the current task. The
+ * *size_of_buf* must be strictly positive. On success, the
+ * helper makes sure that the *buf* is NUL-terminated. On failure,
+ * it is filled with zeroes.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u32 bpf_get_cgroup_classid(struct sk_buff *skb)
+ * Description
+ * Retrieve the classid for the current task, i.e. for the net_cls
+ * cgroup to which *skb* belongs.
+ *
+ * This helper can be used on TC egress path, but not on ingress.
+ *
+ * The net_cls cgroup provides an interface to tag network packets
+ * based on a user-provided identifier for all traffic coming from
+ * the tasks belonging to the related cgroup. See also the related
+ * kernel documentation, available from the Linux sources in file
+ * *Documentation/cgroup-v1/net_cls.txt*.
+ *
+ * The Linux kernel has two versions for cgroups: there are
+ * cgroups v1 and cgroups v2. Both are available to users, who can
+ * use a mixture of them, but note that the net_cls cgroup is for
+ * cgroup v1 only. This makes it incompatible with BPF programs
+ * run on cgroups, which is a cgroup-v2-only feature (a socket can
+ * only hold data for one version of cgroups at a time).
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_CGROUP_NET_CLASSID** configuration option set to
+ * "**y**" or to "**m**".
+ * Return
+ * The classid, or 0 for the default unconfigured classid.
+ *
+ * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
+ * Description
+ * Push a *vlan_tci* (VLAN tag control information) of protocol
+ * *vlan_proto* to the packet associated to *skb*, then update
+ * the checksum. Note that if *vlan_proto* is different from
+ * **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to
+ * be **ETH_P_8021Q**.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_vlan_pop(struct sk_buff *skb)
+ * Description
+ * Pop a VLAN header from the packet associated to *skb*.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ * Description
+ * Get tunnel metadata. This helper takes a pointer *key* to an
+ * empty **struct bpf_tunnel_key** of **size**, that will be
+ * filled with tunnel metadata for the packet associated to *skb*.
+ * The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which
+ * indicates that the tunnel is based on IPv6 protocol instead of
+ * IPv4.
+ *
+ * The **struct bpf_tunnel_key** is an object that generalizes the
+ * principal parameters used by various tunneling protocols into a
+ * single struct. This way, it can be used to easily make a
+ * decision based on the contents of the encapsulation header,
+ * "summarized" in this struct. In particular, it holds the IP
+ * address of the remote end (IPv4 or IPv6, depending on the case)
+ * in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also,
+ * this struct exposes the *key*\ **->tunnel_id**, which is
+ * generally mapped to a VNI (Virtual Network Identifier), making
+ * it programmable together with the **bpf_skb_set_tunnel_key**\
+ * () helper.
+ *
+ * Let's imagine that the following code is part of a program
+ * attached to the TC ingress interface, on one end of a GRE
+ * tunnel, and is supposed to filter out all messages coming from
+ * remote ends with IPv4 address other than 10.0.0.1:
+ *
+ * ::
+ *
+ * int ret;
+ * struct bpf_tunnel_key key = {};
+ *
+ * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ * if (ret < 0)
+ * return TC_ACT_SHOT; // drop packet
+ *
+ * if (key.remote_ipv4 != 0x0a000001)
+ * return TC_ACT_SHOT; // drop packet
+ *
+ * return TC_ACT_OK; // accept packet
+ *
+ * This interface can also be used with all encapsulation devices
+ * that can operate in "collect metadata" mode: instead of having
+ * one network device per specific configuration, the "collect
+ * metadata" mode only requires a single device where the
+ * configuration can be extracted from this helper.
+ *
+ * This can be used together with various tunnels such as VXLan,
+ * Geneve, GRE or IP in IP (IPIP).
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ * Description
+ * Populate tunnel metadata for packet associated to *skb.* The
+ * tunnel metadata is set to the contents of *key*, of *size*. The
+ * *flags* can be set to a combination of the following values:
+ *
+ * **BPF_F_TUNINFO_IPV6**
+ * Indicate that the tunnel is based on IPv6 protocol
+ * instead of IPv4.
+ * **BPF_F_ZERO_CSUM_TX**
+ * For IPv4 packets, add a flag to tunnel metadata
+ * indicating that checksum computation should be skipped
+ * and checksum set to zeroes.
+ * **BPF_F_DONT_FRAGMENT**
+ * Add a flag to tunnel metadata indicating that the
+ * packet should not be fragmented.
+ * **BPF_F_SEQ_NUMBER**
+ * Add a flag to tunnel metadata indicating that a
+ * sequence number should be added to tunnel header before
+ * sending the packet. This flag was added for GRE
+ * encapsulation, but might be used with other protocols
+ * as well in the future.
+ *
+ * Here is a typical usage on the transmit path:
+ *
+ * ::
+ *
+ * struct bpf_tunnel_key key;
+ * populate key ...
+ * bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
+ * bpf_clone_redirect(skb, vxlan_dev_ifindex, 0);
+ *
+ * See also the description of the **bpf_skb_get_tunnel_key**\ ()
+ * helper for additional information.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_perf_event_read(struct bpf_map *map, u64 flags)
+ * Description
+ * Read the value of a perf event counter. This helper relies on a
+ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of
+ * the perf event counter is selected when *map* is updated with
+ * perf event file descriptors. The *map* is an array whose size
+ * is the number of available CPUs, and each cell contains a value
+ * relative to one CPU. The value to retrieve is indicated by
+ * *flags*, that contains the index of the CPU to look up, masked
+ * with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+ * **BPF_F_CURRENT_CPU** to indicate that the value for the
+ * current CPU should be retrieved.
+ *
+ * Note that before Linux 4.13, only hardware perf event can be
+ * retrieved.
+ *
+ * Also, be aware that the newer helper
+ * **bpf_perf_event_read_value**\ () is recommended over
+ * **bpf_perf_event_read*\ () in general. The latter has some ABI
+ * quirks where error and counter value are used as a return code
+ * (which is wrong to do since ranges may overlap). This issue is
+ * fixed with bpf_perf_event_read_value(), which at the same time
+ * provides more features over the **bpf_perf_event_read**\ ()
+ * interface. Please refer to the description of
+ * **bpf_perf_event_read_value**\ () for details.
+ * Return
+ * The value of the perf event counter read from the map, or a
+ * negative error code in case of failure.
+ *
+ * int bpf_redirect(u32 ifindex, u64 flags)
+ * Description
+ * Redirect the packet to another net device of index *ifindex*.
+ * This helper is somewhat similar to **bpf_clone_redirect**\
+ * (), except that the packet is not cloned, which provides
+ * increased performance.
+ *
+ * Except for XDP, both ingress and egress interfaces can be used
+ * for redirection. The **BPF_F_INGRESS** value in *flags* is used
+ * to make the distinction (ingress path is selected if the flag
+ * is present, egress path otherwise). Currently, XDP only
+ * supports redirection to the egress interface, and accepts no
+ * flag at all.
+ *
+ * The same effect can be attained with the more generic
+ * **bpf_redirect_map**\ (), which requires specific maps to be
+ * used but offers better performance.
+ * Return
+ * For XDP, the helper returns **XDP_REDIRECT** on success or
+ * **XDP_ABORTED** on error. For other program types, the values
+ * are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on
+ * error.
+ *
+ * u32 bpf_get_route_realm(struct sk_buff *skb)
+ * Description
+ * Retrieve the realm or the route, that is to say the
+ * **tclassid** field of the destination for the *skb*. The
+ * indentifier retrieved is a user-provided tag, similar to the
+ * one used with the net_cls cgroup (see description for
+ * **bpf_get_cgroup_classid**\ () helper), but here this tag is
+ * held by a route (a destination entry), not by a task.
+ *
+ * Retrieving this identifier works with the clsact TC egress hook
+ * (see also **tc-bpf(8)**), or alternatively on conventional
+ * classful egress qdiscs, but not on TC ingress path. In case of
+ * clsact TC egress hook, this has the advantage that, internally,
+ * the destination entry has not been dropped yet in the transmit
+ * path. Therefore, the destination entry does not need to be
+ * artificially held via **netif_keep_dst**\ () for a classful
+ * qdisc until the *skb* is freed.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_IP_ROUTE_CLASSID** configuration option.
+ * Return
+ * The realm of the route for the packet associated to *skb*, or 0
+ * if none was found.
+ *
+ * int bpf_perf_event_output(struct pt_reg *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * Description
+ * Write raw *data* blob into a special BPF perf event held by
+ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ * event must have the following attributes: **PERF_SAMPLE_RAW**
+ * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ * **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ * The *flags* are used to indicate the index in *map* for which
+ * the value must be put, masked with **BPF_F_INDEX_MASK**.
+ * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ * to indicate that the index of the current CPU core should be
+ * used.
+ *
+ * The value to write, of *size*, is passed through eBPF stack and
+ * pointed by *data*.
+ *
+ * The context of the program *ctx* needs also be passed to the
+ * helper.
+ *
+ * On user space, a program willing to read the values needs to
+ * call **perf_event_open**\ () on the perf event (either for
+ * one or for all CPUs) and to store the file descriptor into the
+ * *map*. This must be done before the eBPF program can send data
+ * into it. An example is available in file
+ * *samples/bpf/trace_output_user.c* in the Linux kernel source
+ * tree (the eBPF program counterpart is in
+ * *samples/bpf/trace_output_kern.c*).
+ *
+ * **bpf_perf_event_output**\ () achieves better performance
+ * than **bpf_trace_printk**\ () for sharing data with user
+ * space, and is much better suitable for streaming data from eBPF
+ * programs.
+ *
+ * Note that this helper is not restricted to tracing use cases
+ * and can be used with programs attached to TC or XDP as well,
+ * where it allows for passing data to user space listeners. Data
+ * can be:
+ *
+ * * Only custom structs,
+ * * Only the packet payload, or
+ * * A combination of both.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len)
+ * Description
+ * This helper was provided as an easy way to load data from a
+ * packet. It can be used to load *len* bytes from *offset* from
+ * the packet associated to *skb*, into the buffer pointed by
+ * *to*.
+ *
+ * Since Linux 4.7, usage of this helper has mostly been replaced
+ * by "direct packet access", enabling packet data to be
+ * manipulated with *skb*\ **->data** and *skb*\ **->data_end**
+ * pointing respectively to the first byte of packet data and to
+ * the byte after the last byte of packet data. However, it
+ * remains useful if one wishes to read large quantities of data
+ * at once from a packet into the eBPF stack.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_stackid(struct pt_reg *ctx, struct bpf_map *map, u64 flags)
+ * Description
+ * Walk a user or a kernel stack and return its id. To achieve
+ * this, the helper needs *ctx*, which is a pointer to the context
+ * on which the tracing program is executed, and a pointer to a
+ * *map* of type **BPF_MAP_TYPE_STACK_TRACE**.
+ *
+ * The last argument, *flags*, holds the number of stack frames to
+ * skip (from 0 to 255), masked with
+ * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ * a combination of the following flags:
+ *
+ * **BPF_F_USER_STACK**
+ * Collect a user space stack instead of a kernel stack.
+ * **BPF_F_FAST_STACK_CMP**
+ * Compare stacks by hash only.
+ * **BPF_F_REUSE_STACKID**
+ * If two different stacks hash into the same *stackid*,
+ * discard the old one.
+ *
+ * The stack id retrieved is a 32 bit long integer handle which
+ * can be further combined with other data (including other stack
+ * ids) and used as a key into maps. This can be useful for
+ * generating a variety of graphs (such as flame graphs or off-cpu
+ * graphs).
+ *
+ * For walking a stack, this helper is an improvement over
+ * **bpf_probe_read**\ (), which can be used with unrolled loops
+ * but is not efficient and consumes a lot of eBPF instructions.
+ * Instead, **bpf_get_stackid**\ () can collect up to
+ * **PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that
+ * this limit can be controlled with the **sysctl** program, and
+ * that it should be manually increased in order to profile long
+ * user stacks (such as stacks for Java programs). To do so, use:
+ *
+ * ::
+ *
+ * # sysctl kernel.perf_event_max_stack=<new value>
+ *
+ * Return
+ * The positive or null stack id on success, or a negative error
+ * in case of failure.
+ *
+ * s64 bpf_csum_diff(__be32 *from, u32 from_size, __be32 *to, u32 to_size, __wsum seed)
+ * Description
+ * Compute a checksum difference, from the raw buffer pointed by
+ * *from*, of length *from_size* (that must be a multiple of 4),
+ * towards the raw buffer pointed by *to*, of size *to_size*
+ * (same remark). An optional *seed* can be added to the value
+ * (this can be cascaded, the seed may come from a previous call
+ * to the helper).
+ *
+ * This is flexible enough to be used in several ways:
+ *
+ * * With *from_size* == 0, *to_size* > 0 and *seed* set to
+ * checksum, it can be used when pushing new data.
+ * * With *from_size* > 0, *to_size* == 0 and *seed* set to
+ * checksum, it can be used when removing data from a packet.
+ * * With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it
+ * can be used to compute a diff. Note that *from_size* and
+ * *to_size* do not need to be equal.
+ *
+ * This helper can be used in combination with
+ * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to
+ * which one can feed in the difference computed with
+ * **bpf_csum_diff**\ ().
+ * Return
+ * The checksum result, or a negative error code in case of
+ * failure.
+ *
+ * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
+ * Description
+ * Retrieve tunnel options metadata for the packet associated to
+ * *skb*, and store the raw tunnel option data to the buffer *opt*
+ * of *size*.
+ *
+ * This helper can be used with encapsulation devices that can
+ * operate in "collect metadata" mode (please refer to the related
+ * note in the description of **bpf_skb_get_tunnel_key**\ () for
+ * more details). A particular example where this can be used is
+ * in combination with the Geneve encapsulation protocol, where it
+ * allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper)
+ * and retrieving arbitrary TLVs (Type-Length-Value headers) from
+ * the eBPF program. This allows for full customization of these
+ * headers.
+ * Return
+ * The size of the option data retrieved.
+ *
+ * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
+ * Description
+ * Set tunnel options metadata for the packet associated to *skb*
+ * to the option data contained in the raw buffer *opt* of *size*.
+ *
+ * See also the description of the **bpf_skb_get_tunnel_opt**\ ()
+ * helper for additional information.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags)
+ * Description
+ * Change the protocol of the *skb* to *proto*. Currently
+ * supported are transition from IPv4 to IPv6, and from IPv6 to
+ * IPv4. The helper takes care of the groundwork for the
+ * transition, including resizing the socket buffer. The eBPF
+ * program is expected to fill the new headers, if any, via
+ * **skb_store_bytes**\ () and to recompute the checksums with
+ * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\
+ * (). The main case for this helper is to perform NAT64
+ * operations out of an eBPF program.
+ *
+ * Internally, the GSO type is marked as dodgy so that headers are
+ * checked and segments are recalculated by the GSO/GRO engine.
+ * The size for GSO target is adapted as well.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_change_type(struct sk_buff *skb, u32 type)
+ * Description
+ * Change the packet type for the packet associated to *skb*. This
+ * comes down to setting *skb*\ **->pkt_type** to *type*, except
+ * the eBPF program does not have a write access to *skb*\
+ * **->pkt_type** beside this helper. Using a helper here allows
+ * for graceful handling of errors.
+ *
+ * The major use case is to change incoming *skb*s to
+ * **PACKET_HOST** in a programmatic way instead of having to
+ * recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for
+ * example.
+ *
+ * Note that *type* only allows certain values. At this time, they
+ * are:
+ *
+ * **PACKET_HOST**
+ * Packet is for us.
+ * **PACKET_BROADCAST**
+ * Send packet to all.
+ * **PACKET_MULTICAST**
+ * Send packet to group.
+ * **PACKET_OTHERHOST**
+ * Send packet to someone else.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index)
+ * Description
+ * Check whether *skb* is a descendant of the cgroup2 held by
+ * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+ * Return
+ * The return value depends on the result of the test, and can be:
+ *
+ * * 0, if the *skb* failed the cgroup2 descendant test.
+ * * 1, if the *skb* succeeded the cgroup2 descendant test.
+ * * A negative error code, if an error occurred.
+ *
+ * u32 bpf_get_hash_recalc(struct sk_buff *skb)
+ * Description
+ * Retrieve the hash of the packet, *skb*\ **->hash**. If it is
+ * not set, in particular if the hash was cleared due to mangling,
+ * recompute this hash. Later accesses to the hash can be done
+ * directly with *skb*\ **->hash**.
+ *
+ * Calling **bpf_set_hash_invalid**\ (), changing a packet
+ * prototype with **bpf_skb_change_proto**\ (), or calling
+ * **bpf_skb_store_bytes**\ () with the
+ * **BPF_F_INVALIDATE_HASH** are actions susceptible to clear
+ * the hash and to trigger a new computation for the next call to
+ * **bpf_get_hash_recalc**\ ().
+ * Return
+ * The 32-bit hash.
*
* u64 bpf_get_current_task(void)
- * Returns current task_struct
- * Return: current
- *
- * int bpf_probe_write_user(void *dst, void *src, int len)
- * safely attempt to write to a location
- * @dst: destination address in userspace
- * @src: source address on stack
- * @len: number of bytes to copy
- * Return: 0 on success or negative error
- *
- * int bpf_current_task_under_cgroup(map, index)
- * Check cgroup2 membership of current task
- * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
- * @index: index of the cgroup in the bpf_map
- * Return:
- * == 0 current failed the cgroup2 descendant test
- * == 1 current succeeded the cgroup2 descendant test
- * < 0 error
- *
- * int bpf_skb_change_tail(skb, len, flags)
- * The helper will resize the skb to the given new size, to be used f.e.
- * with control messages.
- * @skb: pointer to skb
- * @len: new skb length
- * @flags: reserved
- * Return: 0 on success or negative error
- *
- * int bpf_skb_pull_data(skb, len)
- * The helper will pull in non-linear data in case the skb is non-linear
- * and not all of len are part of the linear section. Only needed for
- * read/write with direct packet access.
- * @skb: pointer to skb
- * @len: len to make read/writeable
- * Return: 0 on success or negative error
- *
- * s64 bpf_csum_update(skb, csum)
- * Adds csum into skb->csum in case of CHECKSUM_COMPLETE.
- * @skb: pointer to skb
- * @csum: csum to add
- * Return: csum on success or negative error
- *
- * void bpf_set_hash_invalid(skb)
- * Invalidate current skb->hash.
- * @skb: pointer to skb
- *
- * int bpf_get_numa_node_id()
- * Return: Id of current NUMA node.
- *
- * int bpf_skb_change_head()
- * Grows headroom of skb and adjusts MAC header offset accordingly.
- * Will extends/reallocae as required automatically.
- * May change skb data pointer and will thus invalidate any check
- * performed for direct packet access.
- * @skb: pointer to skb
- * @len: length of header to be pushed in front
- * @flags: Flags (unused for now)
- * Return: 0 on success or negative error
- *
- * int bpf_xdp_adjust_head(xdp_md, delta)
- * Adjust the xdp_md.data by delta
- * @xdp_md: pointer to xdp_md
- * @delta: An positive/negative integer to be added to xdp_md.data
- * Return: 0 on success or negative on error
+ * Return
+ * A pointer to the current task struct.
+ *
+ * int bpf_probe_write_user(void *dst, const void *src, u32 len)
+ * Description
+ * Attempt in a safe way to write *len* bytes from the buffer
+ * *src* to *dst* in memory. It only works for threads that are in
+ * user context, and *dst* must be a valid user space address.
+ *
+ * This helper should not be used to implement any kind of
+ * security mechanism because of TOC-TOU attacks, but rather to
+ * debug, divert, and manipulate execution of semi-cooperative
+ * processes.
+ *
+ * Keep in mind that this feature is meant for experiments, and it
+ * has a risk of crashing the system and running programs.
+ * Therefore, when an eBPF program using this helper is attached,
+ * a warning including PID and process name is printed to kernel
+ * logs.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index)
+ * Description
+ * Check whether the probe is being run is the context of a given
+ * subset of the cgroup2 hierarchy. The cgroup2 to test is held by
+ * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+ * Return
+ * The return value depends on the result of the test, and can be:
+ *
+ * * 0, if the *skb* task belongs to the cgroup2.
+ * * 1, if the *skb* task does not belong to the cgroup2.
+ * * A negative error code, if an error occurred.
+ *
+ * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
+ * Description
+ * Resize (trim or grow) the packet associated to *skb* to the
+ * new *len*. The *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * The basic idea is that the helper performs the needed work to
+ * change the size of the packet, then the eBPF program rewrites
+ * the rest via helpers like **bpf_skb_store_bytes**\ (),
+ * **bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ ()
+ * and others. This helper is a slow path utility intended for
+ * replies with control messages. And because it is targeted for
+ * slow path, the helper itself can afford to be slow: it
+ * implicitly linearizes, unclones and drops offloads from the
+ * *skb*.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_pull_data(struct sk_buff *skb, u32 len)
+ * Description
+ * Pull in non-linear data in case the *skb* is non-linear and not
+ * all of *len* are part of the linear section. Make *len* bytes
+ * from *skb* readable and writable. If a zero value is passed for
+ * *len*, then the whole length of the *skb* is pulled.
+ *
+ * This helper is only needed for reading and writing with direct
+ * packet access.
+ *
+ * For direct packet access, testing that offsets to access
+ * are within packet boundaries (test on *skb*\ **->data_end**) is
+ * susceptible to fail if offsets are invalid, or if the requested
+ * data is in non-linear parts of the *skb*. On failure the
+ * program can just bail out, or in the case of a non-linear
+ * buffer, use a helper to make the data available. The
+ * **bpf_skb_load_bytes**\ () helper is a first solution to access
+ * the data. Another one consists in using **bpf_skb_pull_data**
+ * to pull in once the non-linear parts, then retesting and
+ * eventually access the data.
+ *
+ * At the same time, this also makes sure the *skb* is uncloned,
+ * which is a necessary condition for direct write. As this needs
+ * to be an invariant for the write part only, the verifier
+ * detects writes and adds a prologue that is calling
+ * **bpf_skb_pull_data()** to effectively unclone the *skb* from
+ * the very beginning in case it is indeed cloned.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * s64 bpf_csum_update(struct sk_buff *skb, __wsum csum)
+ * Description
+ * Add the checksum *csum* into *skb*\ **->csum** in case the
+ * driver has supplied a checksum for the entire packet into that
+ * field. Return an error otherwise. This helper is intended to be
+ * used in combination with **bpf_csum_diff**\ (), in particular
+ * when the checksum needs to be updated after data has been
+ * written into the packet through direct packet access.
+ * Return
+ * The checksum on success, or a negative error code in case of
+ * failure.
+ *
+ * void bpf_set_hash_invalid(struct sk_buff *skb)
+ * Description
+ * Invalidate the current *skb*\ **->hash**. It can be used after
+ * mangling on headers through direct packet access, in order to
+ * indicate that the hash is outdated and to trigger a
+ * recalculation the next time the kernel tries to access this
+ * hash or when the **bpf_get_hash_recalc**\ () helper is called.
+ *
+ * int bpf_get_numa_node_id(void)
+ * Description
+ * Return the id of the current NUMA node. The primary use case
+ * for this helper is the selection of sockets for the local NUMA
+ * node, when the program is attached to sockets using the
+ * **SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**),
+ * but the helper is also available to other eBPF program types,
+ * similarly to **bpf_get_smp_processor_id**\ ().
+ * Return
+ * The id of current NUMA node.
+ *
+ * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags)
+ * Description
+ * Grows headroom of packet associated to *skb* and adjusts the
+ * offset of the MAC header accordingly, adding *len* bytes of
+ * space. It automatically extends and reallocates memory as
+ * required.
+ *
+ * This helper can be used on a layer 3 *skb* to push a MAC header
+ * for redirection into a layer 2 device.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta)
+ * Description
+ * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that
+ * it is possible to use a negative value for *delta*. This helper
+ * can be used to prepare the packet for pushing or popping
+ * headers.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
*
* int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
- * Copy a NUL terminated string from unsafe address. In case the string
- * length is smaller than size, the target is not padded with further NUL
- * bytes. In case the string length is larger than size, just count-1
- * bytes are copied and the last byte is set to NUL.
- * @dst: destination address
- * @size: maximum number of bytes to copy, including the trailing NUL
- * @unsafe_ptr: unsafe address
- * Return:
- * > 0 length of the string including the trailing NUL on success
- * < 0 error
- *
- * u64 bpf_get_socket_cookie(skb)
- * Get the cookie for the socket stored inside sk_buff.
- * @skb: pointer to skb
- * Return: 8 Bytes non-decreasing number on success or 0 if the socket
- * field is missing inside sk_buff
- *
- * u32 bpf_get_socket_uid(skb)
- * Get the owner uid of the socket stored inside sk_buff.
- * @skb: pointer to skb
- * Return: uid of the socket owner on success or overflowuid if failed.
- *
- * u32 bpf_set_hash(skb, hash)
- * Set full skb->hash.
- * @skb: pointer to skb
- * @hash: hash to set
- *
- * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen)
- * Calls setsockopt. Not all opts are available, only those with
- * integer optvals plus TCP_CONGESTION.
- * Supported levels: SOL_SOCKET and IPPROTO_TCP
- * @bpf_socket: pointer to bpf_socket
- * @level: SOL_SOCKET or IPPROTO_TCP
- * @optname: option name
- * @optval: pointer to option value
- * @optlen: length of optval in bytes
- * Return: 0 or negative error
- *
- * int bpf_getsockopt(bpf_socket, level, optname, optval, optlen)
- * Calls getsockopt. Not all opts are available.
- * Supported levels: IPPROTO_TCP
- * @bpf_socket: pointer to bpf_socket
- * @level: IPPROTO_TCP
- * @optname: option name
- * @optval: pointer to option value
- * @optlen: length of optval in bytes
- * Return: 0 or negative error
- *
- * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags)
- * Set callback flags for sock_ops
- * @bpf_sock_ops: pointer to bpf_sock_ops_kern struct
- * @flags: flags value
- * Return: 0 for no error
- * -EINVAL if there is no full tcp socket
- * bits in flags that are not supported by current kernel
- *
- * int bpf_skb_adjust_room(skb, len_diff, mode, flags)
- * Grow or shrink room in sk_buff.
- * @skb: pointer to skb
- * @len_diff: (signed) amount of room to grow/shrink
- * @mode: operation mode (enum bpf_adj_room_mode)
- * @flags: reserved for future use
- * Return: 0 on success or negative error code
- *
- * int bpf_sk_redirect_map(map, key, flags)
- * Redirect skb to a sock in map using key as a lookup key for the
- * sock in map.
- * @map: pointer to sockmap
- * @key: key to lookup sock in map
- * @flags: reserved for future use
- * Return: SK_PASS
- *
- * int bpf_sock_map_update(skops, map, key, flags)
- * @skops: pointer to bpf_sock_ops
- * @map: pointer to sockmap to update
- * @key: key to insert/update sock in map
- * @flags: same flags as map update elem
- *
- * int bpf_xdp_adjust_meta(xdp_md, delta)
- * Adjust the xdp_md.data_meta by delta
- * @xdp_md: pointer to xdp_md
- * @delta: An positive/negative integer to be added to xdp_md.data_meta
- * Return: 0 on success or negative on error
- *
- * int bpf_perf_event_read_value(map, flags, buf, buf_size)
- * read perf event counter value and perf event enabled/running time
- * @map: pointer to perf_event_array map
- * @flags: index of event in the map or bitmask flags
- * @buf: buf to fill
- * @buf_size: size of the buf
- * Return: 0 on success or negative error code
- *
- * int bpf_perf_prog_read_value(ctx, buf, buf_size)
- * read perf prog attached perf event counter and enabled/running time
- * @ctx: pointer to ctx
- * @buf: buf to fill
- * @buf_size: size of the buf
- * Return : 0 on success or negative error code
- *
- * int bpf_override_return(pt_regs, rc)
- * @pt_regs: pointer to struct pt_regs
- * @rc: the return value to set
- *
- * int bpf_msg_redirect_map(map, key, flags)
- * Redirect msg to a sock in map using key as a lookup key for the
- * sock in map.
- * @map: pointer to sockmap
- * @key: key to lookup sock in map
- * @flags: reserved for future use
- * Return: SK_PASS
- *
- * int bpf_bind(ctx, addr, addr_len)
- * Bind socket to address. Only binding to IP is supported, no port can be
- * set in addr.
- * @ctx: pointer to context of type bpf_sock_addr
- * @addr: pointer to struct sockaddr to bind socket to
- * @addr_len: length of sockaddr structure
- * Return: 0 on success or negative error code
+ * Description
+ * Copy a NUL terminated string from an unsafe address
+ * *unsafe_ptr* to *dst*. The *size* should include the
+ * terminating NUL byte. In case the string length is smaller than
+ * *size*, the target is not padded with further NUL bytes. If the
+ * string length is larger than *size*, just *size*-1 bytes are
+ * copied and the last byte is set to NUL.
+ *
+ * On success, the length of the copied string is returned. This
+ * makes this helper useful in tracing programs for reading
+ * strings, and more importantly to get its length at runtime. See
+ * the following snippet:
+ *
+ * ::
+ *
+ * SEC("kprobe/sys_open")
+ * void bpf_sys_open(struct pt_regs *ctx)
+ * {
+ * char buf[PATHLEN]; // PATHLEN is defined to 256
+ * int res = bpf_probe_read_str(buf, sizeof(buf),
+ * ctx->di);
+ *
+ * // Consume buf, for example push it to
+ * // userspace via bpf_perf_event_output(); we
+ * // can use res (the string length) as event
+ * // size, after checking its boundaries.
+ * }
+ *
+ * In comparison, using **bpf_probe_read()** helper here instead
+ * to read the string would require to estimate the length at
+ * compile time, and would often result in copying more memory
+ * than necessary.
+ *
+ * Another useful use case is when parsing individual process
+ * arguments or individual environment variables navigating
+ * *current*\ **->mm->arg_start** and *current*\
+ * **->mm->env_start**: using this helper and the return value,
+ * one can quickly iterate at the right offset of the memory area.
+ * Return
+ * On success, the strictly positive length of the string,
+ * including the trailing NUL character. On error, a negative
+ * value.
+ *
+ * u64 bpf_get_socket_cookie(struct sk_buff *skb)
+ * Description
+ * If the **struct sk_buff** pointed by *skb* has a known socket,
+ * retrieve the cookie (generated by the kernel) of this socket.
+ * If no cookie has been set yet, generate a new cookie. Once
+ * generated, the socket cookie remains stable for the life of the
+ * socket. This helper can be useful for monitoring per socket
+ * networking traffic statistics as it provides a unique socket
+ * identifier per namespace.
+ * Return
+ * A 8-byte long non-decreasing number on success, or 0 if the
+ * socket field is missing inside *skb*.
+ *
+ * u32 bpf_get_socket_uid(struct sk_buff *skb)
+ * Return
+ * The owner UID of the socket associated to *skb*. If the socket
+ * is **NULL**, or if it is not a full socket (i.e. if it is a
+ * time-wait or a request socket instead), **overflowuid** value
+ * is returned (note that **overflowuid** might also be the actual
+ * UID value for the socket).
+ *
+ * u32 bpf_set_hash(struct sk_buff *skb, u32 hash)
+ * Description
+ * Set the full hash for *skb* (set the field *skb*\ **->hash**)
+ * to value *hash*.
+ * Return
+ * 0
+ *
+ * int bpf_setsockopt(struct bpf_sock_ops_kern *bpf_socket, int level, int optname, char *optval, int optlen)
+ * Description
+ * Emulate a call to **setsockopt()** on the socket associated to
+ * *bpf_socket*, which must be a full socket. The *level* at
+ * which the option resides and the name *optname* of the option
+ * must be specified, see **setsockopt(2)** for more information.
+ * The option value of length *optlen* is pointed by *optval*.
+ *
+ * This helper actually implements a subset of **setsockopt()**.
+ * It supports the following *level*\ s:
+ *
+ * * **SOL_SOCKET**, which supports the following *optname*\ s:
+ * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
+ * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**.
+ * * **IPPROTO_TCP**, which supports the following *optname*\ s:
+ * **TCP_CONGESTION**, **TCP_BPF_IW**,
+ * **TCP_BPF_SNDCWND_CLAMP**.
+ * * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+ * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_adjust_room(struct sk_buff *skb, u32 len_diff, u32 mode, u64 flags)
+ * Description
+ * Grow or shrink the room for data in the packet associated to
+ * *skb* by *len_diff*, and according to the selected *mode*.
+ *
+ * There is a single supported mode at this time:
+ *
+ * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
+ * (room space is added or removed below the layer 3 header).
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ * Description
+ * Redirect the packet to the endpoint referenced by *map* at
+ * index *key*. Depending on its type, this *map* can contain
+ * references to net devices (for forwarding packets through other
+ * ports), or to CPUs (for redirecting XDP frames to another CPU;
+ * but this is only implemented for native XDP (with driver
+ * support) as of this writing).
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * When used to redirect packets to net devices, this helper
+ * provides a high performance increase over **bpf_redirect**\ ().
+ * This is due to various implementation details of the underlying
+ * mechanisms, one of which is the fact that **bpf_redirect_map**\
+ * () tries to send packet as a "bulk" to the device.
+ * Return
+ * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
+ *
+ * int bpf_sk_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ * Description
+ * Redirect the packet to the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress path otherwise). This is the only flag supported for now.
+ * Return
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_sock_map_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * Add an entry to, or update a *map* referencing sockets. The
+ * *skops* is used as a new value for the entry associated to
+ * *key*. *flags* is one of:
+ *
+ * **BPF_NOEXIST**
+ * The entry for *key* must not exist in the map.
+ * **BPF_EXIST**
+ * The entry for *key* must already exist in the map.
+ * **BPF_ANY**
+ * No condition on the existence of the entry for *key*.
+ *
+ * If the *map* has eBPF programs (parser and verdict), those will
+ * be inherited by the socket being added. If the socket is
+ * already attached to eBPF programs, this results in an error.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta)
+ * Description
+ * Adjust the address pointed by *xdp_md*\ **->data_meta** by
+ * *delta* (which can be positive or negative). Note that this
+ * operation modifies the address stored in *xdp_md*\ **->data**,
+ * so the latter must be loaded only after the helper has been
+ * called.
+ *
+ * The use of *xdp_md*\ **->data_meta** is optional and programs
+ * are not required to use it. The rationale is that when the
+ * packet is processed with XDP (e.g. as DoS filter), it is
+ * possible to push further meta data along with it before passing
+ * to the stack, and to give the guarantee that an ingress eBPF
+ * program attached as a TC classifier on the same device can pick
+ * this up for further post-processing. Since TC works with socket
+ * buffers, it remains possible to set from XDP the **mark** or
+ * **priority** pointers, or other pointers for the socket buffer.
+ * Having this scratch space generic and programmable allows for
+ * more flexibility as the user is free to store whatever meta
+ * data they need.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size)
+ * Description
+ * Read the value of a perf event counter, and store it into *buf*
+ * of size *buf_size*. This helper relies on a *map* of type
+ * **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event
+ * counter is selected when *map* is updated with perf event file
+ * descriptors. The *map* is an array whose size is the number of
+ * available CPUs, and each cell contains a value relative to one
+ * CPU. The value to retrieve is indicated by *flags*, that
+ * contains the index of the CPU to look up, masked with
+ * **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+ * **BPF_F_CURRENT_CPU** to indicate that the value for the
+ * current CPU should be retrieved.
+ *
+ * This helper behaves in a way close to
+ * **bpf_perf_event_read**\ () helper, save that instead of
+ * just returning the value observed, it fills the *buf*
+ * structure. This allows for additional data to be retrieved: in
+ * particular, the enabled and running times (in *buf*\
+ * **->enabled** and *buf*\ **->running**, respectively) are
+ * copied. In general, **bpf_perf_event_read_value**\ () is
+ * recommended over **bpf_perf_event_read**\ (), which has some
+ * ABI issues and provides fewer functionalities.
+ *
+ * These values are interesting, because hardware PMU (Performance
+ * Monitoring Unit) counters are limited resources. When there are
+ * more PMU based perf events opened than available counters,
+ * kernel will multiplex these events so each event gets certain
+ * percentage (but not all) of the PMU time. In case that
+ * multiplexing happens, the number of samples or counter value
+ * will not reflect the case compared to when no multiplexing
+ * occurs. This makes comparison between different runs difficult.
+ * Typically, the counter value should be normalized before
+ * comparing to other experiments. The usual normalization is done
+ * as follows.
+ *
+ * ::
+ *
+ * normalized_counter = counter * t_enabled / t_running
+ *
+ * Where t_enabled is the time enabled for event and t_running is
+ * the time running for event since last normalization. The
+ * enabled and running times are accumulated since the perf event
+ * open. To achieve scaling factor between two invocations of an
+ * eBPF program, users can can use CPU id as the key (which is
+ * typical for perf array usage model) to remember the previous
+ * value and do the calculation inside the eBPF program.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_perf_prog_read_value(struct bpf_perf_event_data_kern *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
+ * Description
+ * For en eBPF program attached to a perf event, retrieve the
+ * value of the event counter associated to *ctx* and store it in
+ * the structure pointed by *buf* and of size *buf_size*. Enabled
+ * and running times are also stored in the structure (see
+ * description of helper **bpf_perf_event_read_value**\ () for
+ * more details).
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_getsockopt(struct bpf_sock_ops_kern *bpf_socket, int level, int optname, char *optval, int optlen)
+ * Description
+ * Emulate a call to **getsockopt()** on the socket associated to
+ * *bpf_socket*, which must be a full socket. The *level* at
+ * which the option resides and the name *optname* of the option
+ * must be specified, see **getsockopt(2)** for more information.
+ * The retrieved value is stored in the structure pointed by
+ * *opval* and of length *optlen*.
+ *
+ * This helper actually implements a subset of **getsockopt()**.
+ * It supports the following *level*\ s:
+ *
+ * * **IPPROTO_TCP**, which supports *optname*
+ * **TCP_CONGESTION**.
+ * * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+ * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_override_return(struct pt_reg *regs, u64 rc)
+ * Description
+ * Used for error injection, this helper uses kprobes to override
+ * the return value of the probed function, and to set it to *rc*.
+ * The first argument is the context *regs* on which the kprobe
+ * works.
+ *
+ * This helper works by setting setting the PC (program counter)
+ * to an override function which is run in place of the original
+ * probed function. This means the probed function is not run at
+ * all. The replacement function just returns with the required
+ * value.
+ *
+ * This helper has security implications, and thus is subject to
+ * restrictions. It is only available if the kernel was compiled
+ * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration
+ * option, and in this case it only works on functions tagged with
+ * **ALLOW_ERROR_INJECTION** in the kernel code.
+ *
+ * Also, the helper is only available for the architectures having
+ * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing,
+ * x86 architecture is the only one to support this feature.
+ * Return
+ * 0
+ *
+ * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops_kern *bpf_sock, int argval)
+ * Description
+ * Attempt to set the value of the **bpf_sock_ops_cb_flags** field
+ * for the full TCP socket associated to *bpf_sock_ops* to
+ * *argval*.
+ *
+ * The primary use of this field is to determine if there should
+ * be calls to eBPF programs of type
+ * **BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP
+ * code. A program of the same type can change its value, per
+ * connection and as necessary, when the connection is
+ * established. This field is directly accessible for reading, but
+ * this helper must be used for updates in order to return an
+ * error if an eBPF program tries to set a callback that is not
+ * supported in the current kernel.
+ *
+ * The supported callback values that *argval* can combine are:
+ *
+ * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
+ * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
+ * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
+ *
+ * Here are some examples of where one could call such eBPF
+ * program:
+ *
+ * * When RTO fires.
+ * * When a packet is retransmitted.
+ * * When the connection terminates.
+ * * When a packet is sent.
+ * * When a packet is received.
+ * Return
+ * Code **-EINVAL** if the socket is not a full TCP socket;
+ * otherwise, a positive number containing the bits that could not
+ * be set is returned (which comes down to 0 if all bits were set
+ * as required).
+ *
+ * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags)
+ * Description
+ * This helper is used in programs implementing policies at the
+ * socket level. If the message *msg* is allowed to pass (i.e. if
+ * the verdict eBPF program returns **SK_PASS**), redirect it to
+ * the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress path otherwise). This is the only flag supported for now.
+ * Return
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes)
+ * Description
+ * For socket policies, apply the verdict of the eBPF program to
+ * the next *bytes* (number of bytes) of message *msg*.
+ *
+ * For example, this helper can be used in the following cases:
+ *
+ * * A single **sendmsg**\ () or **sendfile**\ () system call
+ * contains multiple logical messages that the eBPF program is
+ * supposed to read and for which it should apply a verdict.
+ * * An eBPF program only cares to read the first *bytes* of a
+ * *msg*. If the message has a large payload, then setting up
+ * and calling the eBPF program repeatedly for all bytes, even
+ * though the verdict is already known, would create unnecessary
+ * overhead.
+ *
+ * When called from within an eBPF program, the helper sets a
+ * counter internal to the BPF infrastructure, that is used to
+ * apply the last verdict to the next *bytes*. If *bytes* is
+ * smaller than the current data being processed from a
+ * **sendmsg**\ () or **sendfile**\ () system call, the first
+ * *bytes* will be sent and the eBPF program will be re-run with
+ * the pointer for start of data pointing to byte number *bytes*
+ * **+ 1**. If *bytes* is larger than the current data being
+ * processed, then the eBPF verdict will be applied to multiple
+ * **sendmsg**\ () or **sendfile**\ () calls until *bytes* are
+ * consumed.
+ *
+ * Note that if a socket closes with the internal counter holding
+ * a non-zero value, this is not a problem because data is not
+ * being buffered for *bytes* and is sent as it is received.
+ * Return
+ * 0
+ *
+ * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes)
+ * Description
+ * For socket policies, prevent the execution of the verdict eBPF
+ * program for message *msg* until *bytes* (byte number) have been
+ * accumulated.
+ *
+ * This can be used when one needs a specific number of bytes
+ * before a verdict can be assigned, even if the data spans
+ * multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme
+ * case would be a user calling **sendmsg**\ () repeatedly with
+ * 1-byte long message segments. Obviously, this is bad for
+ * performance, but it is still valid. If the eBPF program needs
+ * *bytes* bytes to validate a header, this helper can be used to
+ * prevent the eBPF program to be called again until *bytes* have
+ * been accumulated.
+ * Return
+ * 0
+ *
+ * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags)
+ * Description
+ * For socket policies, pull in non-linear data from user space
+ * for *msg* and set pointers *msg*\ **->data** and *msg*\
+ * **->data_end** to *start* and *end* bytes offsets into *msg*,
+ * respectively.
+ *
+ * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
+ * *msg* it can only parse data that the (**data**, **data_end**)
+ * pointers have already consumed. For **sendmsg**\ () hooks this
+ * is likely the first scatterlist element. But for calls relying
+ * on the **sendpage** handler (e.g. **sendfile**\ ()) this will
+ * be the range (**0**, **0**) because the data is shared with
+ * user space and by default the objective is to avoid allowing
+ * user space to modify data while (or after) eBPF verdict is
+ * being decided. This helper can be used to pull in data and to
+ * set the start and end pointer to given values. Data will be
+ * copied if necessary (i.e. if data was not linear and if start
+ * and end pointers do not point to the same chunk).
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_bind(struct bpf_sock_addr_kern *ctx, struct sockaddr *addr, int addr_len)
+ * Description
+ * Bind the socket associated to *ctx* to the address pointed by
+ * *addr*, of length *addr_len*. This allows for making outgoing
+ * connection from the desired IP address, which can be useful for
+ * example when all processes inside a cgroup should use one
+ * single IP address on a host that has multiple IP configured.
+ *
+ * This helper works for IPv4 and IPv6, TCP and UDP sockets. The
+ * domain (*addr*\ **->sa_family**) must be **AF_INET** (or
+ * **AF_INET6**). Looking for a free port to bind to can be
+ * expensive, therefore binding to port is not permitted by the
+ * helper: *addr*\ **->sin_port** (or **sin6_port**, respectively)
+ * must be set to zero.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
+ * Description
+ * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
+ * only possible to shrink the packet as of this writing,
+ * therefore *delta* must be a negative integer.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags)
+ * Description
+ * Retrieve the XFRM state (IP transform framework, see also
+ * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*.
+ *
+ * The retrieved value is stored in the **struct bpf_xfrm_state**
+ * pointed by *xfrm_state* and of length *size*.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_XFRM** configuration option.
+ * Return
+ * 0 on success, or a negative error in case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -821,7 +1833,9 @@ union bpf_attr {
FN(msg_apply_bytes), \
FN(msg_cork_bytes), \
FN(msg_pull_data), \
- FN(bind),
+ FN(bind), \
+ FN(xdp_adjust_tail), \
+ FN(skb_get_xfrm_state),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -927,6 +1941,19 @@ struct bpf_tunnel_key {
__u32 tunnel_label;
};
+/* user accessible mirror of in-kernel xfrm_state.
+ * new fields can only be added to the end of this structure
+ */
+struct bpf_xfrm_state {
+ __u32 reqid;
+ __u32 spi; /* Stored in network byte order */
+ __u16 family;
+ union {
+ __u32 remote_ipv4; /* Stored in network byte order */
+ __u32 remote_ipv6[4]; /* Stored in network byte order */
+ };
+};
+
/* Generic BPF return codes which all BPF program types may support.
* The values are binary compatible with their TC_ACT_* counter-part to
* provide backwards compatibility with existing SCHED_CLS and SCHED_ACT
@@ -1017,6 +2044,7 @@ struct bpf_prog_info {
__aligned_u64 map_ids;
char name[BPF_OBJ_NAME_LEN];
__u32 ifindex;
+ __u32 gpl_compatible:1;
__u64 netns_dev;
__u64 netns_ino;
} __attribute__((aligned(8)));
diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
new file mode 100644
index 000000000000..bcb56ee47014
--- /dev/null
+++ b/include/uapi/linux/btf.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* Copyright (c) 2018 Facebook */
+#ifndef _UAPI__LINUX_BTF_H__
+#define _UAPI__LINUX_BTF_H__
+
+#include <linux/types.h>
+
+#define BTF_MAGIC 0xeB9F
+#define BTF_VERSION 1
+
+struct btf_header {
+ __u16 magic;
+ __u8 version;
+ __u8 flags;
+
+ __u32 parent_label;
+ __u32 parent_name;
+
+ /* All offsets are in bytes relative to the end of this header */
+ __u32 label_off; /* offset of label section */
+ __u32 object_off; /* offset of data object section*/
+ __u32 func_off; /* offset of function section */
+ __u32 type_off; /* offset of type section */
+ __u32 str_off; /* offset of string section */
+ __u32 str_len; /* length of string section */
+};
+
+/* Max # of type identifier */
+#define BTF_MAX_TYPE 0x7fffffff
+/* Max offset into the string section */
+#define BTF_MAX_NAME_OFFSET 0x7fffffff
+/* Max # of struct/union/enum members or func args */
+#define BTF_MAX_VLEN 0xffff
+
+/* The type id is referring to a parent BTF */
+#define BTF_TYPE_PARENT(id) (((id) >> 31) & 0x1)
+#define BTF_TYPE_ID(id) ((id) & BTF_MAX_TYPE)
+
+/* String is in the ELF string section */
+#define BTF_STR_TBL_ELF_ID(ref) (((ref) >> 31) & 0x1)
+#define BTF_STR_OFFSET(ref) ((ref) & BTF_MAX_NAME_OFFSET)
+
+struct btf_type {
+ __u32 name_off;
+ /* "info" bits arrangement
+ * bits 0-15: vlen (e.g. # of struct's members)
+ * bits 16-23: unused
+ * bits 24-28: kind (e.g. int, ptr, array...etc)
+ * bits 29-30: unused
+ * bits 31: root
+ */
+ __u32 info;
+ /* "size" is used by INT, ENUM, STRUCT and UNION.
+ * "size" tells the size of the type it is describing.
+ *
+ * "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT.
+ * "type" is a type_id referring to another type.
+ */
+ union {
+ __u32 size;
+ __u32 type;
+ };
+};
+
+#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f)
+#define BTF_INFO_ISROOT(info) (!!(((info) >> 24) & 0x80))
+#define BTF_INFO_VLEN(info) ((info) & 0xffff)
+
+#define BTF_KIND_UNKN 0 /* Unknown */
+#define BTF_KIND_INT 1 /* Integer */
+#define BTF_KIND_PTR 2 /* Pointer */
+#define BTF_KIND_ARRAY 3 /* Array */
+#define BTF_KIND_STRUCT 4 /* Struct */
+#define BTF_KIND_UNION 5 /* Union */
+#define BTF_KIND_ENUM 6 /* Enumeration */
+#define BTF_KIND_FWD 7 /* Forward */
+#define BTF_KIND_TYPEDEF 8 /* Typedef */
+#define BTF_KIND_VOLATILE 9 /* Volatile */
+#define BTF_KIND_CONST 10 /* Const */
+#define BTF_KIND_RESTRICT 11 /* Restrict */
+#define BTF_KIND_MAX 11
+#define NR_BTF_KINDS 12
+
+/* For some specific BTF_KIND, "struct btf_type" is immediately
+ * followed by extra data.
+ */
+
+/* BTF_KIND_INT is followed by a u32 and the following
+ * is the 32 bits arrangement:
+ */
+#define BTF_INT_ENCODING(VAL) (((VAL) & 0xff000000) >> 24)
+#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16)
+#define BTF_INT_BITS(VAL) ((VAL) & 0x0000ffff)
+
+/* Attributes stored in the BTF_INT_ENCODING */
+#define BTF_INT_SIGNED 0x1
+#define BTF_INT_CHAR 0x2
+#define BTF_INT_BOOL 0x4
+#define BTF_INT_VARARGS 0x8
+
+/* BTF_KIND_ENUM is followed by multiple "struct btf_enum".
+ * The exact number of btf_enum is stored in the vlen (of the
+ * info in "struct btf_type").
+ */
+struct btf_enum {
+ __u32 name_off;
+ __s32 val;
+};
+
+/* BTF_KIND_ARRAY is followed by one "struct btf_array" */
+struct btf_array {
+ __u32 type;
+ __u32 index_type;
+ __u32 nelems;
+};
+
+/* BTF_KIND_STRUCT and BTF_KIND_UNION are followed
+ * by multiple "struct btf_member". The exact number
+ * of btf_member is stored in the vlen (of the info in
+ * "struct btf_type").
+ */
+struct btf_member {
+ __u32 name_off;
+ __u32 type;
+ __u32 offset; /* offset in bits */
+};
+
+#endif /* _UAPI__LINUX_BTF_H__ */
diff --git a/include/uapi/linux/cn_proc.h b/include/uapi/linux/cn_proc.h
index 68ff25414700..db210625cee8 100644
--- a/include/uapi/linux/cn_proc.h
+++ b/include/uapi/linux/cn_proc.h
@@ -116,12 +116,16 @@ struct proc_event {
struct coredump_proc_event {
__kernel_pid_t process_pid;
__kernel_pid_t process_tgid;
+ __kernel_pid_t parent_pid;
+ __kernel_pid_t parent_tgid;
} coredump;
struct exit_proc_event {
__kernel_pid_t process_pid;
__kernel_pid_t process_tgid;
__u32 exit_code, exit_signal;
+ __kernel_pid_t parent_pid;
+ __kernel_pid_t parent_tgid;
} exit;
} event_data;
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 68699f654118..b85266420bfb 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -516,6 +516,7 @@ enum {
IFLA_VXLAN_COLLECT_METADATA,
IFLA_VXLAN_LABEL,
IFLA_VXLAN_GPE,
+ IFLA_VXLAN_TTL_INHERIT,
__IFLA_VXLAN_MAX
};
#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 103ba797a8f3..83ade9b5cf95 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -506,6 +506,8 @@
#define PCI_EXP_DEVCTL_READRQ_256B 0x1000 /* 256 Bytes */
#define PCI_EXP_DEVCTL_READRQ_512B 0x2000 /* 512 Bytes */
#define PCI_EXP_DEVCTL_READRQ_1024B 0x3000 /* 1024 Bytes */
+#define PCI_EXP_DEVCTL_READRQ_2048B 0x4000 /* 2048 Bytes */
+#define PCI_EXP_DEVCTL_READRQ_4096B 0x5000 /* 4096 Bytes */
#define PCI_EXP_DEVCTL_BCR_FLR 0x8000 /* Bridge Configuration Retry / FLR */
#define PCI_EXP_DEVSTA 10 /* Device Status */
#define PCI_EXP_DEVSTA_CED 0x0001 /* Correctable Error Detected */
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 33a70ece462f..d02e859301ff 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -276,6 +276,8 @@ enum
LINUX_MIB_TCPKEEPALIVE, /* TCPKeepAlive */
LINUX_MIB_TCPMTUPFAIL, /* TCPMTUPFail */
LINUX_MIB_TCPMTUPSUCCESS, /* TCPMTUPSuccess */
+ LINUX_MIB_TCPDELIVERED, /* TCPDelivered */
+ LINUX_MIB_TCPDELIVEREDCE, /* TCPDeliveredCE */
__LINUX_MIB_MAX
};
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 560374c978f9..29eb659aa77a 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -122,6 +122,10 @@ enum {
#define TCP_MD5SIG_EXT 32 /* TCP MD5 Signature with extensions */
#define TCP_FASTOPEN_KEY 33 /* Set the key for Fast Open (cookie) */
#define TCP_FASTOPEN_NO_COOKIE 34 /* Enable TFO without a TFO cookie */
+#define TCP_ZEROCOPY_RECEIVE 35
+#define TCP_INQ 36 /* Notify bytes available to read as a cmsg on read */
+
+#define TCP_CM_INQ TCP_INQ
struct tcp_repair_opt {
__u32 opt_code;
@@ -224,6 +228,9 @@ struct tcp_info {
__u64 tcpi_busy_time; /* Time (usec) busy sending data */
__u64 tcpi_rwnd_limited; /* Time (usec) limited by receive window */
__u64 tcpi_sndbuf_limited; /* Time (usec) limited by send buffer */
+
+ __u32 tcpi_delivered;
+ __u32 tcpi_delivered_ce;
};
/* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */
@@ -244,6 +251,8 @@ enum {
TCP_NLA_SNDQ_SIZE, /* Data (bytes) pending in send queue */
TCP_NLA_CA_STATE, /* ca_state of socket */
TCP_NLA_SND_SSTHRESH, /* Slow start size threshold */
+ TCP_NLA_DELIVERED, /* Data pkts delivered incl. out-of-order */
+ TCP_NLA_DELIVERED_CE, /* Like above but only ones w/ CE marks */
};
@@ -271,4 +280,11 @@ struct tcp_diag_md5sig {
__u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN];
};
+/* setsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...) */
+
+struct tcp_zerocopy_receive {
+ __u64 address; /* in: address of mapping */
+ __u32 length; /* in/out: number of bytes to map/mapped */
+ __u32 recv_skip_hint; /* out: amount of bytes to skip */
+};
#endif /* _UAPI_LINUX_TCP_H */
diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h
index bf6d28677cfe..6b2fd4d9655f 100644
--- a/include/uapi/linux/tipc.h
+++ b/include/uapi/linux/tipc.h
@@ -209,16 +209,16 @@ struct tipc_group_req {
* The string formatting for each name element is:
* media: media
* interface: media:interface name
- * link: Z.C.N:interface-Z.C.N:interface
- *
+ * link: node:interface-node:interface
*/
-
+#define TIPC_NODEID_LEN 16
#define TIPC_MAX_MEDIA_NAME 16
#define TIPC_MAX_IF_NAME 16
#define TIPC_MAX_BEARER_NAME 32
#define TIPC_MAX_LINK_NAME 68
-#define SIOCGETLINKNAME SIOCPROTOPRIVATE
+#define SIOCGETLINKNAME SIOCPROTOPRIVATE
+#define SIOCGETNODEID (SIOCPROTOPRIVATE + 1)
struct tipc_sioc_ln_req {
__u32 peer;
@@ -226,6 +226,10 @@ struct tipc_sioc_ln_req {
char linkname[TIPC_MAX_LINK_NAME];
};
+struct tipc_sioc_nodeid_req {
+ __u32 peer;
+ char node_id[TIPC_NODEID_LEN];
+};
/* The macros and functions below are deprecated:
*/
diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h
index 3f29e3c8ed06..4b2c93b1934c 100644
--- a/include/uapi/linux/tipc_config.h
+++ b/include/uapi/linux/tipc_config.h
@@ -185,6 +185,11 @@
#define TIPC_DEF_LINK_WIN 50
#define TIPC_MAX_LINK_WIN 8191
+/*
+ * Default MTU for UDP media
+ */
+
+#define TIPC_DEF_LINK_UDP_MTU 14000
struct tipc_node_info {
__be32 addr; /* network address of node */
diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h
index 0affb682e5e3..85c11982c89b 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -266,6 +266,7 @@ enum {
TIPC_NLA_PROP_PRIO, /* u32 */
TIPC_NLA_PROP_TOL, /* u32 */
TIPC_NLA_PROP_WIN, /* u32 */
+ TIPC_NLA_PROP_MTU, /* u32 */
__TIPC_NLA_PROP_MAX,
TIPC_NLA_PROP_MAX = __TIPC_NLA_PROP_MAX - 1
diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h
index efb7b5991c2f..09d00f8c442b 100644
--- a/include/uapi/linux/udp.h
+++ b/include/uapi/linux/udp.h
@@ -32,6 +32,7 @@ struct udphdr {
#define UDP_ENCAP 100 /* Set the socket to accept encapsulated packets */
#define UDP_NO_CHECK6_TX 101 /* Disable sending checksum for UDP6X */
#define UDP_NO_CHECK6_RX 102 /* Disable accpeting checksum for UDP6 */
+#define UDP_SEGMENT 103 /* Set GSO segmentation size */
/* UDP encapsulation types */
#define UDP_ENCAP_ESPINUDP_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index a713fd23ec88..35c485fa9ea3 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -4,6 +4,7 @@ obj-y := core.o
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
+obj-$(CONFIG_BPF_SYSCALL) += btf.o
ifeq ($(CONFIG_NET),y)
obj-$(CONFIG_BPF_SYSCALL) += devmap.o
obj-$(CONFIG_BPF_SYSCALL) += cpumap.o
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 027107f4be53..0fd8d8f1a398 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -11,11 +11,13 @@
* General Public License for more details.
*/
#include <linux/bpf.h>
+#include <linux/btf.h>
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/filter.h>
#include <linux/perf_event.h>
+#include <uapi/linux/btf.h>
#include "map_in_map.h"
@@ -336,6 +338,52 @@ static void array_map_free(struct bpf_map *map)
bpf_map_area_free(array);
}
+static void array_map_seq_show_elem(struct bpf_map *map, void *key,
+ struct seq_file *m)
+{
+ void *value;
+
+ rcu_read_lock();
+
+ value = array_map_lookup_elem(map, key);
+ if (!value) {
+ rcu_read_unlock();
+ return;
+ }
+
+ seq_printf(m, "%u: ", *(u32 *)key);
+ btf_type_seq_show(map->btf, map->btf_value_id, value, m);
+ seq_puts(m, "\n");
+
+ rcu_read_unlock();
+}
+
+static int array_map_check_btf(const struct bpf_map *map, const struct btf *btf,
+ u32 btf_key_id, u32 btf_value_id)
+{
+ const struct btf_type *key_type, *value_type;
+ u32 key_size, value_size;
+ u32 int_data;
+
+ key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
+ if (!key_type || BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
+ return -EINVAL;
+
+ int_data = *(u32 *)(key_type + 1);
+ /* bpf array can only take a u32 key. This check makes
+ * sure that the btf matches the attr used during map_create.
+ */
+ if (BTF_INT_BITS(int_data) != 32 || key_size != 4 ||
+ BTF_INT_OFFSET(int_data))
+ return -EINVAL;
+
+ value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
+ if (!value_type || value_size > map->value_size)
+ return -EINVAL;
+
+ return 0;
+}
+
const struct bpf_map_ops array_map_ops = {
.map_alloc_check = array_map_alloc_check,
.map_alloc = array_map_alloc,
@@ -345,6 +393,8 @@ const struct bpf_map_ops array_map_ops = {
.map_update_elem = array_map_update_elem,
.map_delete_elem = array_map_delete_elem,
.map_gen_lookup = array_map_gen_lookup,
+ .map_seq_show_elem = array_map_seq_show_elem,
+ .map_check_btf = array_map_check_btf,
};
const struct bpf_map_ops percpu_array_map_ops = {
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
new file mode 100644
index 000000000000..22e1046a1a86
--- /dev/null
+++ b/kernel/bpf/btf.c
@@ -0,0 +1,2064 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+
+#include <uapi/linux/btf.h>
+#include <uapi/linux/types.h>
+#include <linux/seq_file.h>
+#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <linux/bpf_verifier.h>
+#include <linux/btf.h>
+
+/* BTF (BPF Type Format) is the meta data format which describes
+ * the data types of BPF program/map. Hence, it basically focus
+ * on the C programming language which the modern BPF is primary
+ * using.
+ *
+ * ELF Section:
+ * ~~~~~~~~~~~
+ * The BTF data is stored under the ".BTF" ELF section
+ *
+ * struct btf_type:
+ * ~~~~~~~~~~~~~~~
+ * Each 'struct btf_type' object describes a C data type.
+ * Depending on the type it is describing, a 'struct btf_type'
+ * object may be followed by more data. F.e.
+ * To describe an array, 'struct btf_type' is followed by
+ * 'struct btf_array'.
+ *
+ * 'struct btf_type' and any extra data following it are
+ * 4 bytes aligned.
+ *
+ * Type section:
+ * ~~~~~~~~~~~~~
+ * The BTF type section contains a list of 'struct btf_type' objects.
+ * Each one describes a C type. Recall from the above section
+ * that a 'struct btf_type' object could be immediately followed by extra
+ * data in order to desribe some particular C types.
+ *
+ * type_id:
+ * ~~~~~~~
+ * Each btf_type object is identified by a type_id. The type_id
+ * is implicitly implied by the location of the btf_type object in
+ * the BTF type section. The first one has type_id 1. The second
+ * one has type_id 2...etc. Hence, an earlier btf_type has
+ * a smaller type_id.
+ *
+ * A btf_type object may refer to another btf_type object by using
+ * type_id (i.e. the "type" in the "struct btf_type").
+ *
+ * NOTE that we cannot assume any reference-order.
+ * A btf_type object can refer to an earlier btf_type object
+ * but it can also refer to a later btf_type object.
+ *
+ * For example, to describe "const void *". A btf_type
+ * object describing "const" may refer to another btf_type
+ * object describing "void *". This type-reference is done
+ * by specifying type_id:
+ *
+ * [1] CONST (anon) type_id=2
+ * [2] PTR (anon) type_id=0
+ *
+ * The above is the btf_verifier debug log:
+ * - Each line started with "[?]" is a btf_type object
+ * - [?] is the type_id of the btf_type object.
+ * - CONST/PTR is the BTF_KIND_XXX
+ * - "(anon)" is the name of the type. It just
+ * happens that CONST and PTR has no name.
+ * - type_id=XXX is the 'u32 type' in btf_type
+ *
+ * NOTE: "void" has type_id 0
+ *
+ * String section:
+ * ~~~~~~~~~~~~~~
+ * The BTF string section contains the names used by the type section.
+ * Each string is referred by an "offset" from the beginning of the
+ * string section.
+ *
+ * Each string is '\0' terminated.
+ *
+ * The first character in the string section must be '\0'
+ * which is used to mean 'anonymous'. Some btf_type may not
+ * have a name.
+ */
+
+/* BTF verification:
+ *
+ * To verify BTF data, two passes are needed.
+ *
+ * Pass #1
+ * ~~~~~~~
+ * The first pass is to collect all btf_type objects to
+ * an array: "btf->types".
+ *
+ * Depending on the C type that a btf_type is describing,
+ * a btf_type may be followed by extra data. We don't know
+ * how many btf_type is there, and more importantly we don't
+ * know where each btf_type is located in the type section.
+ *
+ * Without knowing the location of each type_id, most verifications
+ * cannot be done. e.g. an earlier btf_type may refer to a later
+ * btf_type (recall the "const void *" above), so we cannot
+ * check this type-reference in the first pass.
+ *
+ * In the first pass, it still does some verifications (e.g.
+ * checking the name is a valid offset to the string section).
+ *
+ * Pass #2
+ * ~~~~~~~
+ * The main focus is to resolve a btf_type that is referring
+ * to another type.
+ *
+ * We have to ensure the referring type:
+ * 1) does exist in the BTF (i.e. in btf->types[])
+ * 2) does not cause a loop:
+ * struct A {
+ * struct B b;
+ * };
+ *
+ * struct B {
+ * struct A a;
+ * };
+ *
+ * btf_type_needs_resolve() decides if a btf_type needs
+ * to be resolved.
+ *
+ * The needs_resolve type implements the "resolve()" ops which
+ * essentially does a DFS and detects backedge.
+ *
+ * During resolve (or DFS), different C types have different
+ * "RESOLVED" conditions.
+ *
+ * When resolving a BTF_KIND_STRUCT, we need to resolve all its
+ * members because a member is always referring to another
+ * type. A struct's member can be treated as "RESOLVED" if
+ * it is referring to a BTF_KIND_PTR. Otherwise, the
+ * following valid C struct would be rejected:
+ *
+ * struct A {
+ * int m;
+ * struct A *a;
+ * };
+ *
+ * When resolving a BTF_KIND_PTR, it needs to keep resolving if
+ * it is referring to another BTF_KIND_PTR. Otherwise, we cannot
+ * detect a pointer loop, e.g.:
+ * BTF_KIND_CONST -> BTF_KIND_PTR -> BTF_KIND_CONST -> BTF_KIND_PTR +
+ * ^ |
+ * +-----------------------------------------+
+ *
+ */
+
+#define BITS_PER_U64 (sizeof(u64) * BITS_PER_BYTE)
+#define BITS_PER_BYTE_MASK (BITS_PER_BYTE - 1)
+#define BITS_PER_BYTE_MASKED(bits) ((bits) & BITS_PER_BYTE_MASK)
+#define BITS_ROUNDDOWN_BYTES(bits) ((bits) >> 3)
+#define BITS_ROUNDUP_BYTES(bits) \
+ (BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits))
+
+/* 16MB for 64k structs and each has 16 members and
+ * a few MB spaces for the string section.
+ * The hard limit is S32_MAX.
+ */
+#define BTF_MAX_SIZE (16 * 1024 * 1024)
+/* 64k. We can raise it later. The hard limit is S32_MAX. */
+#define BTF_MAX_NR_TYPES 65535
+
+#define for_each_member(i, struct_type, member) \
+ for (i = 0, member = btf_type_member(struct_type); \
+ i < btf_type_vlen(struct_type); \
+ i++, member++)
+
+#define for_each_member_from(i, from, struct_type, member) \
+ for (i = from, member = btf_type_member(struct_type) + from; \
+ i < btf_type_vlen(struct_type); \
+ i++, member++)
+
+struct btf {
+ union {
+ struct btf_header *hdr;
+ void *data;
+ };
+ struct btf_type **types;
+ u32 *resolved_ids;
+ u32 *resolved_sizes;
+ const char *strings;
+ void *nohdr_data;
+ u32 nr_types;
+ u32 types_size;
+ u32 data_size;
+ refcount_t refcnt;
+};
+
+enum verifier_phase {
+ CHECK_META,
+ CHECK_TYPE,
+};
+
+struct resolve_vertex {
+ const struct btf_type *t;
+ u32 type_id;
+ u16 next_member;
+};
+
+enum visit_state {
+ NOT_VISITED,
+ VISITED,
+ RESOLVED,
+};
+
+enum resolve_mode {
+ RESOLVE_TBD, /* To Be Determined */
+ RESOLVE_PTR, /* Resolving for Pointer */
+ RESOLVE_STRUCT_OR_ARRAY, /* Resolving for struct/union
+ * or array
+ */
+};
+
+#define MAX_RESOLVE_DEPTH 32
+
+struct btf_verifier_env {
+ struct btf *btf;
+ u8 *visit_states;
+ struct resolve_vertex stack[MAX_RESOLVE_DEPTH];
+ struct bpf_verifier_log log;
+ u32 log_type_id;
+ u32 top_stack;
+ enum verifier_phase phase;
+ enum resolve_mode resolve_mode;
+};
+
+static const char * const btf_kind_str[NR_BTF_KINDS] = {
+ [BTF_KIND_UNKN] = "UNKNOWN",
+ [BTF_KIND_INT] = "INT",
+ [BTF_KIND_PTR] = "PTR",
+ [BTF_KIND_ARRAY] = "ARRAY",
+ [BTF_KIND_STRUCT] = "STRUCT",
+ [BTF_KIND_UNION] = "UNION",
+ [BTF_KIND_ENUM] = "ENUM",
+ [BTF_KIND_FWD] = "FWD",
+ [BTF_KIND_TYPEDEF] = "TYPEDEF",
+ [BTF_KIND_VOLATILE] = "VOLATILE",
+ [BTF_KIND_CONST] = "CONST",
+ [BTF_KIND_RESTRICT] = "RESTRICT",
+};
+
+struct btf_kind_operations {
+ s32 (*check_meta)(struct btf_verifier_env *env,
+ const struct btf_type *t,
+ u32 meta_left);
+ int (*resolve)(struct btf_verifier_env *env,
+ const struct resolve_vertex *v);
+ int (*check_member)(struct btf_verifier_env *env,
+ const struct btf_type *struct_type,
+ const struct btf_member *member,
+ const struct btf_type *member_type);
+ void (*log_details)(struct btf_verifier_env *env,
+ const struct btf_type *t);
+ void (*seq_show)(const struct btf *btf, const struct btf_type *t,
+ u32 type_id, void *data, u8 bits_offsets,
+ struct seq_file *m);
+};
+
+static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS];
+static struct btf_type btf_void;
+
+static bool btf_type_is_modifier(const struct btf_type *t)
+{
+ /* Some of them is not strictly a C modifier
+ * but they are grouped into the same bucket
+ * for BTF concern:
+ * A type (t) that refers to another
+ * type through t->type AND its size cannot
+ * be determined without following the t->type.
+ *
+ * ptr does not fall into this bucket
+ * because its size is always sizeof(void *).
+ */
+ switch (BTF_INFO_KIND(t->info)) {
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ return true;
+ }
+
+ return false;
+}
+
+static bool btf_type_is_void(const struct btf_type *t)
+{
+ /* void => no type and size info.
+ * Hence, FWD is also treated as void.
+ */
+ return t == &btf_void || BTF_INFO_KIND(t->info) == BTF_KIND_FWD;
+}
+
+static bool btf_type_is_void_or_null(const struct btf_type *t)
+{
+ return !t || btf_type_is_void(t);
+}
+
+/* union is only a special case of struct:
+ * all its offsetof(member) == 0
+ */
+static bool btf_type_is_struct(const struct btf_type *t)
+{
+ u8 kind = BTF_INFO_KIND(t->info);
+
+ return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
+}
+
+static bool btf_type_is_array(const struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY;
+}
+
+static bool btf_type_is_ptr(const struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info) == BTF_KIND_PTR;
+}
+
+static bool btf_type_is_int(const struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info) == BTF_KIND_INT;
+}
+
+/* What types need to be resolved?
+ *
+ * btf_type_is_modifier() is an obvious one.
+ *
+ * btf_type_is_struct() because its member refers to
+ * another type (through member->type).
+
+ * btf_type_is_array() because its element (array->type)
+ * refers to another type. Array can be thought of a
+ * special case of struct while array just has the same
+ * member-type repeated by array->nelems of times.
+ */
+static bool btf_type_needs_resolve(const struct btf_type *t)
+{
+ return btf_type_is_modifier(t) ||
+ btf_type_is_ptr(t) ||
+ btf_type_is_struct(t) ||
+ btf_type_is_array(t);
+}
+
+/* t->size can be used */
+static bool btf_type_has_size(const struct btf_type *t)
+{
+ switch (BTF_INFO_KIND(t->info)) {
+ case BTF_KIND_INT:
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ case BTF_KIND_ENUM:
+ return true;
+ }
+
+ return false;
+}
+
+static const char *btf_int_encoding_str(u8 encoding)
+{
+ if (encoding == 0)
+ return "(none)";
+ else if (encoding == BTF_INT_SIGNED)
+ return "SIGNED";
+ else if (encoding == BTF_INT_CHAR)
+ return "CHAR";
+ else if (encoding == BTF_INT_BOOL)
+ return "BOOL";
+ else if (encoding == BTF_INT_VARARGS)
+ return "VARARGS";
+ else
+ return "UNKN";
+}
+
+static u16 btf_type_vlen(const struct btf_type *t)
+{
+ return BTF_INFO_VLEN(t->info);
+}
+
+static u32 btf_type_int(const struct btf_type *t)
+{
+ return *(u32 *)(t + 1);
+}
+
+static const struct btf_array *btf_type_array(const struct btf_type *t)
+{
+ return (const struct btf_array *)(t + 1);
+}
+
+static const struct btf_member *btf_type_member(const struct btf_type *t)
+{
+ return (const struct btf_member *)(t + 1);
+}
+
+static const struct btf_enum *btf_type_enum(const struct btf_type *t)
+{
+ return (const struct btf_enum *)(t + 1);
+}
+
+static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
+{
+ return kind_ops[BTF_INFO_KIND(t->info)];
+}
+
+static bool btf_name_offset_valid(const struct btf *btf, u32 offset)
+{
+ return !BTF_STR_TBL_ELF_ID(offset) &&
+ BTF_STR_OFFSET(offset) < btf->hdr->str_len;
+}
+
+static const char *btf_name_by_offset(const struct btf *btf, u32 offset)
+{
+ if (!BTF_STR_OFFSET(offset))
+ return "(anon)";
+ else if (BTF_STR_OFFSET(offset) < btf->hdr->str_len)
+ return &btf->strings[BTF_STR_OFFSET(offset)];
+ else
+ return "(invalid-name-offset)";
+}
+
+static const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id)
+{
+ if (type_id > btf->nr_types)
+ return NULL;
+
+ return btf->types[type_id];
+}
+
+__printf(2, 3) static void __btf_verifier_log(struct bpf_verifier_log *log,
+ const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ bpf_verifier_vlog(log, fmt, args);
+ va_end(args);
+}
+
+__printf(2, 3) static void btf_verifier_log(struct btf_verifier_env *env,
+ const char *fmt, ...)
+{
+ struct bpf_verifier_log *log = &env->log;
+ va_list args;
+
+ if (!bpf_verifier_log_needed(log))
+ return;
+
+ va_start(args, fmt);
+ bpf_verifier_vlog(log, fmt, args);
+ va_end(args);
+}
+
+__printf(4, 5) static void __btf_verifier_log_type(struct btf_verifier_env *env,
+ const struct btf_type *t,
+ bool log_details,
+ const char *fmt, ...)
+{
+ struct bpf_verifier_log *log = &env->log;
+ u8 kind = BTF_INFO_KIND(t->info);
+ struct btf *btf = env->btf;
+ va_list args;
+
+ if (!bpf_verifier_log_needed(log))
+ return;
+
+ __btf_verifier_log(log, "[%u] %s %s%s",
+ env->log_type_id,
+ btf_kind_str[kind],
+ btf_name_by_offset(btf, t->name_off),
+ log_details ? " " : "");
+
+ if (log_details)
+ btf_type_ops(t)->log_details(env, t);
+
+ if (fmt && *fmt) {
+ __btf_verifier_log(log, " ");
+ va_start(args, fmt);
+ bpf_verifier_vlog(log, fmt, args);
+ va_end(args);
+ }
+
+ __btf_verifier_log(log, "\n");
+}
+
+#define btf_verifier_log_type(env, t, ...) \
+ __btf_verifier_log_type((env), (t), true, __VA_ARGS__)
+#define btf_verifier_log_basic(env, t, ...) \
+ __btf_verifier_log_type((env), (t), false, __VA_ARGS__)
+
+__printf(4, 5)
+static void btf_verifier_log_member(struct btf_verifier_env *env,
+ const struct btf_type *struct_type,
+ const struct btf_member *member,
+ const char *fmt, ...)
+{
+ struct bpf_verifier_log *log = &env->log;
+ struct btf *btf = env->btf;
+ va_list args;
+
+ if (!bpf_verifier_log_needed(log))
+ return;
+
+ /* The CHECK_META phase already did a btf dump.
+ *
+ * If member is logged again, it must hit an error in
+ * parsing this member. It is useful to print out which
+ * struct this member belongs to.
+ */
+ if (env->phase != CHECK_META)
+ btf_verifier_log_type(env, struct_type, NULL);
+
+ __btf_verifier_log(log, "\t%s type_id=%u bits_offset=%u",
+ btf_name_by_offset(btf, member->name_off),
+ member->type, member->offset);
+
+ if (fmt && *fmt) {
+ __btf_verifier_log(log, " ");
+ va_start(args, fmt);
+ bpf_verifier_vlog(log, fmt, args);
+ va_end(args);
+ }
+
+ __btf_verifier_log(log, "\n");
+}
+
+static void btf_verifier_log_hdr(struct btf_verifier_env *env)
+{
+ struct bpf_verifier_log *log = &env->log;
+ const struct btf *btf = env->btf;
+ const struct btf_header *hdr;
+
+ if (!bpf_verifier_log_needed(log))
+ return;
+
+ hdr = btf->hdr;
+ __btf_verifier_log(log, "magic: 0x%x\n", hdr->magic);
+ __btf_verifier_log(log, "version: %u\n", hdr->version);
+ __btf_verifier_log(log, "flags: 0x%x\n", hdr->flags);
+ __btf_verifier_log(log, "parent_label: %u\n", hdr->parent_label);
+ __btf_verifier_log(log, "parent_name: %u\n", hdr->parent_name);
+ __btf_verifier_log(log, "label_off: %u\n", hdr->label_off);
+ __btf_verifier_log(log, "object_off: %u\n", hdr->object_off);
+ __btf_verifier_log(log, "func_off: %u\n", hdr->func_off);
+ __btf_verifier_log(log, "type_off: %u\n", hdr->type_off);
+ __btf_verifier_log(log, "str_off: %u\n", hdr->str_off);
+ __btf_verifier_log(log, "str_len: %u\n", hdr->str_len);
+ __btf_verifier_log(log, "btf_total_size: %u\n", btf->data_size);
+}
+
+static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t)
+{
+ struct btf *btf = env->btf;
+
+ /* < 2 because +1 for btf_void which is always in btf->types[0].
+ * btf_void is not accounted in btf->nr_types because btf_void
+ * does not come from the BTF file.
+ */
+ if (btf->types_size - btf->nr_types < 2) {
+ /* Expand 'types' array */
+
+ struct btf_type **new_types;
+ u32 expand_by, new_size;
+
+ if (btf->types_size == BTF_MAX_NR_TYPES) {
+ btf_verifier_log(env, "Exceeded max num of types");
+ return -E2BIG;
+ }
+
+ expand_by = max_t(u32, btf->types_size >> 2, 16);
+ new_size = min_t(u32, BTF_MAX_NR_TYPES,
+ btf->types_size + expand_by);
+
+ new_types = kvzalloc(new_size * sizeof(*new_types),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!new_types)
+ return -ENOMEM;
+
+ if (btf->nr_types == 0)
+ new_types[0] = &btf_void;
+ else
+ memcpy(new_types, btf->types,
+ sizeof(*btf->types) * (btf->nr_types + 1));
+
+ kvfree(btf->types);
+ btf->types = new_types;
+ btf->types_size = new_size;
+ }
+
+ btf->types[++(btf->nr_types)] = t;
+
+ return 0;
+}
+
+static void btf_free(struct btf *btf)
+{
+ kvfree(btf->types);
+ kvfree(btf->resolved_sizes);
+ kvfree(btf->resolved_ids);
+ kvfree(btf->data);
+ kfree(btf);
+}
+
+static void btf_get(struct btf *btf)
+{
+ refcount_inc(&btf->refcnt);
+}
+
+void btf_put(struct btf *btf)
+{
+ if (btf && refcount_dec_and_test(&btf->refcnt))
+ btf_free(btf);
+}
+
+static int env_resolve_init(struct btf_verifier_env *env)
+{
+ struct btf *btf = env->btf;
+ u32 nr_types = btf->nr_types;
+ u32 *resolved_sizes = NULL;
+ u32 *resolved_ids = NULL;
+ u8 *visit_states = NULL;
+
+ /* +1 for btf_void */
+ resolved_sizes = kvzalloc((nr_types + 1) * sizeof(*resolved_sizes),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!resolved_sizes)
+ goto nomem;
+
+ resolved_ids = kvzalloc((nr_types + 1) * sizeof(*resolved_ids),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!resolved_ids)
+ goto nomem;
+
+ visit_states = kvzalloc((nr_types + 1) * sizeof(*visit_states),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!visit_states)
+ goto nomem;
+
+ btf->resolved_sizes = resolved_sizes;
+ btf->resolved_ids = resolved_ids;
+ env->visit_states = visit_states;
+
+ return 0;
+
+nomem:
+ kvfree(resolved_sizes);
+ kvfree(resolved_ids);
+ kvfree(visit_states);
+ return -ENOMEM;
+}
+
+static void btf_verifier_env_free(struct btf_verifier_env *env)
+{
+ kvfree(env->visit_states);
+ kfree(env);
+}
+
+static bool env_type_is_resolve_sink(const struct btf_verifier_env *env,
+ const struct btf_type *next_type)
+{
+ switch (env->resolve_mode) {
+ case RESOLVE_TBD:
+ /* int, enum or void is a sink */
+ return !btf_type_needs_resolve(next_type);
+ case RESOLVE_PTR:
+ /* int, enum, void, struct or array is a sink for ptr */
+ return !btf_type_is_modifier(next_type) &&
+ !btf_type_is_ptr(next_type);
+ case RESOLVE_STRUCT_OR_ARRAY:
+ /* int, enum, void or ptr is a sink for struct and array */
+ return !btf_type_is_modifier(next_type) &&
+ !btf_type_is_array(next_type) &&
+ !btf_type_is_struct(next_type);
+ default:
+ BUG_ON(1);
+ }
+}
+
+static bool env_type_is_resolved(const struct btf_verifier_env *env,
+ u32 type_id)
+{
+ return env->visit_states[type_id] == RESOLVED;
+}
+
+static int env_stack_push(struct btf_verifier_env *env,
+ const struct btf_type *t, u32 type_id)
+{
+ struct resolve_vertex *v;
+
+ if (env->top_stack == MAX_RESOLVE_DEPTH)
+ return -E2BIG;
+
+ if (env->visit_states[type_id] != NOT_VISITED)
+ return -EEXIST;
+
+ env->visit_states[type_id] = VISITED;
+
+ v = &env->stack[env->top_stack++];
+ v->t = t;
+ v->type_id = type_id;
+ v->next_member = 0;
+
+ if (env->resolve_mode == RESOLVE_TBD) {
+ if (btf_type_is_ptr(t))
+ env->resolve_mode = RESOLVE_PTR;
+ else if (btf_type_is_struct(t) || btf_type_is_array(t))
+ env->resolve_mode = RESOLVE_STRUCT_OR_ARRAY;
+ }
+
+ return 0;
+}
+
+static void env_stack_set_next_member(struct btf_verifier_env *env,
+ u16 next_member)
+{
+ env->stack[env->top_stack - 1].next_member = next_member;
+}
+
+static void env_stack_pop_resolved(struct btf_verifier_env *env,
+ u32 resolved_type_id,
+ u32 resolved_size)
+{
+ u32 type_id = env->stack[--(env->top_stack)].type_id;
+ struct btf *btf = env->btf;
+
+ btf->resolved_sizes[type_id] = resolved_size;
+ btf->resolved_ids[type_id] = resolved_type_id;
+ env->visit_states[type_id] = RESOLVED;
+}
+
+static const struct resolve_vertex *env_stack_peak(struct btf_verifier_env *env)
+{
+ return env->top_stack ? &env->stack[env->top_stack - 1] : NULL;
+}
+
+/* The input param "type_id" must point to a needs_resolve type */
+static const struct btf_type *btf_type_id_resolve(const struct btf *btf,
+ u32 *type_id)
+{
+ *type_id = btf->resolved_ids[*type_id];
+ return btf_type_by_id(btf, *type_id);
+}
+
+const struct btf_type *btf_type_id_size(const struct btf *btf,
+ u32 *type_id, u32 *ret_size)
+{
+ const struct btf_type *size_type;
+ u32 size_type_id = *type_id;
+ u32 size = 0;
+
+ size_type = btf_type_by_id(btf, size_type_id);
+ if (btf_type_is_void_or_null(size_type))
+ return NULL;
+
+ if (btf_type_has_size(size_type)) {
+ size = size_type->size;
+ } else if (btf_type_is_array(size_type)) {
+ size = btf->resolved_sizes[size_type_id];
+ } else if (btf_type_is_ptr(size_type)) {
+ size = sizeof(void *);
+ } else {
+ if (WARN_ON_ONCE(!btf_type_is_modifier(size_type)))
+ return NULL;
+
+ size = btf->resolved_sizes[size_type_id];
+ size_type_id = btf->resolved_ids[size_type_id];
+ size_type = btf_type_by_id(btf, size_type_id);
+ if (btf_type_is_void(size_type))
+ return NULL;
+ }
+
+ *type_id = size_type_id;
+ if (ret_size)
+ *ret_size = size;
+
+ return size_type;
+}
+
+static int btf_df_check_member(struct btf_verifier_env *env,
+ const struct btf_type *struct_type,
+ const struct btf_member *member,
+ const struct btf_type *member_type)
+{
+ btf_verifier_log_basic(env, struct_type,
+ "Unsupported check_member");
+ return -EINVAL;
+}
+
+static int btf_df_resolve(struct btf_verifier_env *env,
+ const struct resolve_vertex *v)
+{
+ btf_verifier_log_basic(env, v->t, "Unsupported resolve");
+ return -EINVAL;
+}
+
+static void btf_df_seq_show(const struct btf *btf, const struct btf_type *t,
+ u32 type_id, void *data, u8 bits_offsets,
+ struct seq_file *m)
+{
+ seq_printf(m, "<unsupported kind:%u>", BTF_INFO_KIND(t->info));
+}
+
+static int btf_int_check_member(struct btf_verifier_env *env,
+ const struct btf_type *struct_type,
+ const struct btf_member *member,
+ const struct btf_type *member_type)
+{
+ u32 int_data = btf_type_int(member_type);
+ u32 struct_bits_off = member->offset;
+ u32 struct_size = struct_type->size;
+ u32 nr_copy_bits;
+ u32 bytes_offset;
+
+ if (U32_MAX - struct_bits_off < BTF_INT_OFFSET(int_data)) {
+ btf_verifier_log_member(env, struct_type, member,
+ "bits_offset exceeds U32_MAX");
+ return -EINVAL;
+ }
+
+ struct_bits_off += BTF_INT_OFFSET(int_data);
+ bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
+ nr_copy_bits = BTF_INT_BITS(int_data) +
+ BITS_PER_BYTE_MASKED(struct_bits_off);
+
+ if (nr_copy_bits > BITS_PER_U64) {
+ btf_verifier_log_member(env, struct_type, member,
+ "nr_copy_bits exceeds 64");
+ return -EINVAL;
+ }
+
+ if (struct_size < bytes_offset ||
+ struct_size - bytes_offset < BITS_ROUNDUP_BYTES(nr_copy_bits)) {
+ btf_verifier_log_member(env, struct_type, member,
+ "Member exceeds struct_size");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static s32 btf_int_check_meta(struct btf_verifier_env *env,
+ const struct btf_type *t,
+ u32 meta_left)
+{
+ u32 int_data, nr_bits, meta_needed = sizeof(int_data);
+ u16 encoding;
+
+ if (meta_left < meta_needed) {
+ btf_verifier_log_basic(env, t,
+ "meta_left:%u meta_needed:%u",
+ meta_left, meta_needed);
+ return -EINVAL;
+ }
+
+ if (btf_type_vlen(t)) {
+ btf_verifier_log_type(env, t, "vlen != 0");
+ return -EINVAL;
+ }
+
+ int_data = btf_type_int(t);
+ nr_bits = BTF_INT_BITS(int_data) + BTF_INT_OFFSET(int_data);
+
+ if (nr_bits > BITS_PER_U64) {
+ btf_verifier_log_type(env, t, "nr_bits exceeds %zu",
+ BITS_PER_U64);
+ return -EINVAL;
+ }
+
+ if (BITS_ROUNDUP_BYTES(nr_bits) > t->size) {
+ btf_verifier_log_type(env, t, "nr_bits exceeds type_size");
+ return -EINVAL;
+ }
+
+ encoding = BTF_INT_ENCODING(int_data);
+ if (encoding &&
+ encoding != BTF_INT_SIGNED &&
+ encoding != BTF_INT_CHAR &&
+ encoding != BTF_INT_BOOL &&
+ encoding != BTF_INT_VARARGS) {
+ btf_verifier_log_type(env, t, "Unsupported encoding");
+ return -ENOTSUPP;
+ }
+
+ btf_verifier_log_type(env, t, NULL);
+
+ return meta_needed;
+}
+
+static void btf_int_log(struct btf_verifier_env *env,
+ const struct btf_type *t)
+{
+ int int_data = btf_type_int(t);
+
+ btf_verifier_log(env,
+ "size=%u bits_offset=%u nr_bits=%u encoding=%s",
+ t->size, BTF_INT_OFFSET(int_data),
+ BTF_INT_BITS(int_data),
+ btf_int_encoding_str(BTF_INT_ENCODING(int_data)));
+}
+
+static void btf_int_bits_seq_show(const struct btf *btf,
+ const struct btf_type *t,
+ void *data, u8 bits_offset,
+ struct seq_file *m)
+{
+ u32 int_data = btf_type_int(t);
+ u16 nr_bits = BTF_INT_BITS(int_data);
+ u16 total_bits_offset;
+ u16 nr_copy_bytes;
+ u16 nr_copy_bits;
+ u8 nr_upper_bits;
+ union {
+ u64 u64_num;
+ u8 u8_nums[8];
+ } print_num;
+
+ total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data);
+ data += BITS_ROUNDDOWN_BYTES(total_bits_offset);
+ bits_offset = BITS_PER_BYTE_MASKED(total_bits_offset);
+ nr_copy_bits = nr_bits + bits_offset;
+ nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits);
+
+ print_num.u64_num = 0;
+ memcpy(&print_num.u64_num, data, nr_copy_bytes);
+
+ /* Ditch the higher order bits */
+ nr_upper_bits = BITS_PER_BYTE_MASKED(nr_copy_bits);
+ if (nr_upper_bits) {
+ /* We need to mask out some bits of the upper byte. */
+ u8 mask = (1 << nr_upper_bits) - 1;
+
+ print_num.u8_nums[nr_copy_bytes - 1] &= mask;
+ }
+
+ print_num.u64_num >>= bits_offset;
+
+ seq_printf(m, "0x%llx", print_num.u64_num);
+}
+
+static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t,
+ u32 type_id, void *data, u8 bits_offset,
+ struct seq_file *m)
+{
+ u32 int_data = btf_type_int(t);
+ u8 encoding = BTF_INT_ENCODING(int_data);
+ bool sign = encoding & BTF_INT_SIGNED;
+ u32 nr_bits = BTF_INT_BITS(int_data);
+
+ if (bits_offset || BTF_INT_OFFSET(int_data) ||
+ BITS_PER_BYTE_MASKED(nr_bits)) {
+ btf_int_bits_seq_show(btf, t, data, bits_offset, m);
+ return;
+ }
+
+ switch (nr_bits) {
+ case 64:
+ if (sign)
+ seq_printf(m, "%lld", *(s64 *)data);
+ else
+ seq_printf(m, "%llu", *(u64 *)data);
+ break;
+ case 32:
+ if (sign)
+ seq_printf(m, "%d", *(s32 *)data);
+ else
+ seq_printf(m, "%u", *(u32 *)data);
+ break;
+ case 16:
+ if (sign)
+ seq_printf(m, "%d", *(s16 *)data);
+ else
+ seq_printf(m, "%u", *(u16 *)data);
+ break;
+ case 8:
+ if (sign)
+ seq_printf(m, "%d", *(s8 *)data);
+ else
+ seq_printf(m, "%u", *(u8 *)data);
+ break;
+ default:
+ btf_int_bits_seq_show(btf, t, data, bits_offset, m);
+ }
+}
+
+static const struct btf_kind_operations int_ops = {
+ .check_meta = btf_int_check_meta,
+ .resolve = btf_df_resolve,
+ .check_member = btf_int_check_member,
+ .log_details = btf_int_log,
+ .seq_show = btf_int_seq_show,
+};
+
+static int btf_modifier_check_member(struct btf_verifier_env *env,
+ const struct btf_type *struct_type,
+ const struct btf_member *member,
+ const struct btf_type *member_type)
+{
+ const struct btf_type *resolved_type;
+ u32 resolved_type_id = member->type;
+ struct btf_member resolved_member;
+ struct btf *btf = env->btf;
+
+ resolved_type = btf_type_id_size(btf, &resolved_type_id, NULL);
+ if (!resolved_type) {
+ btf_verifier_log_member(env, struct_type, member,
+ "Invalid member");
+ return -EINVAL;
+ }
+
+ resolved_member = *member;
+ resolved_member.type = resolved_type_id;
+
+ return btf_type_ops(resolved_type)->check_member(env, struct_type,
+ &resolved_member,
+ resolved_type);
+}
+
+static int btf_ptr_check_member(struct btf_verifier_env *env,
+ const struct btf_type *struct_type,
+ const struct btf_member *member,
+ const struct btf_type *member_type)
+{
+ u32 struct_size, struct_bits_off, bytes_offset;
+
+ struct_size = struct_type->size;
+ struct_bits_off = member->offset;
+ bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
+
+ if (BITS_PER_BYTE_MASKED(struct_bits_off)) {
+ btf_verifier_log_member(env, struct_type, member,
+ "Member is not byte aligned");
+ return -EINVAL;
+ }
+
+ if (struct_size - bytes_offset < sizeof(void *)) {
+ btf_verifier_log_member(env, struct_type, member,
+ "Member exceeds struct_size");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int btf_ref_type_check_meta(struct btf_verifier_env *env,
+ const struct btf_type *t,
+ u32 meta_left)
+{
+ if (btf_type_vlen(t)) {
+ btf_verifier_log_type(env, t, "vlen != 0");
+ return -EINVAL;
+ }
+
+ if (BTF_TYPE_PARENT(t->type)) {
+ btf_verifier_log_type(env, t, "Invalid type_id");
+ return -EINVAL;
+ }
+
+ btf_verifier_log_type(env, t, NULL);
+
+ return 0;
+}
+
+static int btf_modifier_resolve(struct btf_verifier_env *env,
+ const struct resolve_vertex *v)
+{
+ const struct btf_type *t = v->t;
+ const struct btf_type *next_type;
+ u32 next_type_id = t->type;
+ struct btf *btf = env->btf;
+ u32 next_type_size = 0;
+
+ next_type = btf_type_by_id(btf, next_type_id);
+ if (!next_type) {
+ btf_verifier_log_type(env, v->t, "Invalid type_id");
+ return -EINVAL;
+ }
+
+ /* "typedef void new_void", "const void"...etc */
+ if (btf_type_is_void(next_type))
+ goto resolved;
+
+ if (!env_type_is_resolve_sink(env, next_type) &&
+ !env_type_is_resolved(env, next_type_id))
+ return env_stack_push(env, next_type, next_type_id);
+
+ /* Figure out the resolved next_type_id with size.
+ * They will be stored in the current modifier's
+ * resolved_ids and resolved_sizes such that it can
+ * save us a few type-following when we use it later (e.g. in
+ * pretty print).
+ */
+ if (!btf_type_id_size(btf, &next_type_id, &next_type_size) &&
+ !btf_type_is_void(btf_type_id_resolve(btf, &next_type_id))) {
+ btf_verifier_log_type(env, v->t, "Invalid type_id");
+ return -EINVAL;
+ }
+
+resolved:
+ env_stack_pop_resolved(env, next_type_id, next_type_size);
+
+ return 0;
+}
+
+static int btf_ptr_resolve(struct btf_verifier_env *env,
+ const struct resolve_vertex *v)
+{
+ const struct btf_type *next_type;
+ const struct btf_type *t = v->t;
+ u32 next_type_id = t->type;
+ struct btf *btf = env->btf;
+ u32 next_type_size = 0;
+
+ next_type = btf_type_by_id(btf, next_type_id);
+ if (!next_type) {
+ btf_verifier_log_type(env, v->t, "Invalid type_id");
+ return -EINVAL;
+ }
+
+ /* "void *" */
+ if (btf_type_is_void(next_type))
+ goto resolved;
+
+ if (!env_type_is_resolve_sink(env, next_type) &&
+ !env_type_is_resolved(env, next_type_id))
+ return env_stack_push(env, next_type, next_type_id);
+
+ /* If the modifier was RESOLVED during RESOLVE_STRUCT_OR_ARRAY,
+ * the modifier may have stopped resolving when it was resolved
+ * to a ptr (last-resolved-ptr).
+ *
+ * We now need to continue from the last-resolved-ptr to
+ * ensure the last-resolved-ptr will not referring back to
+ * the currenct ptr (t).
+ */
+ if (btf_type_is_modifier(next_type)) {
+ const struct btf_type *resolved_type;
+ u32 resolved_type_id;
+
+ resolved_type_id = next_type_id;
+ resolved_type = btf_type_id_resolve(btf, &resolved_type_id);
+
+ if (btf_type_is_ptr(resolved_type) &&
+ !env_type_is_resolve_sink(env, resolved_type) &&
+ !env_type_is_resolved(env, resolved_type_id))
+ return env_stack_push(env, resolved_type,
+ resolved_type_id);
+ }
+
+ if (!btf_type_id_size(btf, &next_type_id, &next_type_size) &&
+ !btf_type_is_void(btf_type_id_resolve(btf, &next_type_id))) {
+ btf_verifier_log_type(env, v->t, "Invalid type_id");
+ return -EINVAL;
+ }
+
+resolved:
+ env_stack_pop_resolved(env, next_type_id, 0);
+
+ return 0;
+}
+
+static void btf_modifier_seq_show(const struct btf *btf,
+ const struct btf_type *t,
+ u32 type_id, void *data,
+ u8 bits_offset, struct seq_file *m)
+{
+ t = btf_type_id_resolve(btf, &type_id);
+
+ btf_type_ops(t)->seq_show(btf, t, type_id, data, bits_offset, m);
+}
+
+static void btf_ptr_seq_show(const struct btf *btf, const struct btf_type *t,
+ u32 type_id, void *data, u8 bits_offset,
+ struct seq_file *m)
+{
+ /* It is a hashed value */
+ seq_printf(m, "%p", *(void **)data);
+}
+
+static void btf_ref_type_log(struct btf_verifier_env *env,
+ const struct btf_type *t)
+{
+ btf_verifier_log(env, "type_id=%u", t->type);
+}
+
+static struct btf_kind_operations modifier_ops = {
+ .check_meta = btf_ref_type_check_meta,
+ .resolve = btf_modifier_resolve,
+ .check_member = btf_modifier_check_member,
+ .log_details = btf_ref_type_log,
+ .seq_show = btf_modifier_seq_show,
+};
+
+static struct btf_kind_operations ptr_ops = {
+ .check_meta = btf_ref_type_check_meta,
+ .resolve = btf_ptr_resolve,
+ .check_member = btf_ptr_check_member,
+ .log_details = btf_ref_type_log,
+ .seq_show = btf_ptr_seq_show,
+};
+
+static struct btf_kind_operations fwd_ops = {
+ .check_meta = btf_ref_type_check_meta,
+ .resolve = btf_df_resolve,
+ .check_member = btf_df_check_member,
+ .log_details = btf_ref_type_log,
+ .seq_show = btf_df_seq_show,
+};
+
+static int btf_array_check_member(struct btf_verifier_env *env,
+ const struct btf_type *struct_type,
+ const struct btf_member *member,
+ const struct btf_type *member_type)
+{
+ u32 struct_bits_off = member->offset;
+ u32 struct_size, bytes_offset;
+ u32 array_type_id, array_size;
+ struct btf *btf = env->btf;
+
+ if (BITS_PER_BYTE_MASKED(struct_bits_off)) {
+ btf_verifier_log_member(env, struct_type, member,
+ "Member is not byte aligned");
+ return -EINVAL;
+ }
+
+ array_type_id = member->type;
+ btf_type_id_size(btf, &array_type_id, &array_size);
+ struct_size = struct_type->size;
+ bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
+ if (struct_size - bytes_offset < array_size) {
+ btf_verifier_log_member(env, struct_type, member,
+ "Member exceeds struct_size");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static s32 btf_array_check_meta(struct btf_verifier_env *env,
+ const struct btf_type *t,
+ u32 meta_left)
+{
+ const struct btf_array *array = btf_type_array(t);
+ u32 meta_needed = sizeof(*array);
+
+ if (meta_left < meta_needed) {
+ btf_verifier_log_basic(env, t,
+ "meta_left:%u meta_needed:%u",
+ meta_left, meta_needed);
+ return -EINVAL;
+ }
+
+ if (btf_type_vlen(t)) {
+ btf_verifier_log_type(env, t, "vlen != 0");
+ return -EINVAL;
+ }
+
+ /* We are a little forgiving on array->index_type since
+ * the kernel is not using it.
+ */
+ /* Array elem cannot be in type void,
+ * so !array->type is not allowed.
+ */
+ if (!array->type || BTF_TYPE_PARENT(array->type)) {
+ btf_verifier_log_type(env, t, "Invalid type_id");
+ return -EINVAL;
+ }
+
+ btf_verifier_log_type(env, t, NULL);
+
+ return meta_needed;
+}
+
+static int btf_array_resolve(struct btf_verifier_env *env,
+ const struct resolve_vertex *v)
+{
+ const struct btf_array *array = btf_type_array(v->t);
+ const struct btf_type *elem_type;
+ u32 elem_type_id = array->type;
+ struct btf *btf = env->btf;
+ u32 elem_size;
+
+ elem_type = btf_type_by_id(btf, elem_type_id);
+ if (btf_type_is_void_or_null(elem_type)) {
+ btf_verifier_log_type(env, v->t,
+ "Invalid elem");
+ return -EINVAL;
+ }
+
+ if (!env_type_is_resolve_sink(env, elem_type) &&
+ !env_type_is_resolved(env, elem_type_id))
+ return env_stack_push(env, elem_type, elem_type_id);
+
+ elem_type = btf_type_id_size(btf, &elem_type_id, &elem_size);
+ if (!elem_type) {
+ btf_verifier_log_type(env, v->t, "Invalid elem");
+ return -EINVAL;
+ }
+
+ if (btf_type_is_int(elem_type)) {
+ int int_type_data = btf_type_int(elem_type);
+ u16 nr_bits = BTF_INT_BITS(int_type_data);
+ u16 nr_bytes = BITS_ROUNDUP_BYTES(nr_bits);
+
+ /* Put more restriction on array of int. The int cannot
+ * be a bit field and it must be either u8/u16/u32/u64.
+ */
+ if (BITS_PER_BYTE_MASKED(nr_bits) ||
+ BTF_INT_OFFSET(int_type_data) ||
+ (nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) &&
+ nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64))) {
+ btf_verifier_log_type(env, v->t,
+ "Invalid array of int");
+ return -EINVAL;
+ }
+ }
+
+ if (array->nelems && elem_size > U32_MAX / array->nelems) {
+ btf_verifier_log_type(env, v->t,
+ "Array size overflows U32_MAX");
+ return -EINVAL;
+ }
+
+ env_stack_pop_resolved(env, elem_type_id, elem_size * array->nelems);
+
+ return 0;
+}
+
+static void btf_array_log(struct btf_verifier_env *env,
+ const struct btf_type *t)
+{
+ const struct btf_array *array = btf_type_array(t);
+
+ btf_verifier_log(env, "type_id=%u index_type_id=%u nr_elems=%u",
+ array->type, array->index_type, array->nelems);
+}
+
+static void btf_array_seq_show(const struct btf *btf, const struct btf_type *t,
+ u32 type_id, void *data, u8 bits_offset,
+ struct seq_file *m)
+{
+ const struct btf_array *array = btf_type_array(t);
+ const struct btf_kind_operations *elem_ops;
+ const struct btf_type *elem_type;
+ u32 i, elem_size, elem_type_id;
+
+ elem_type_id = array->type;
+ elem_type = btf_type_id_size(btf, &elem_type_id, &elem_size);
+ elem_ops = btf_type_ops(elem_type);
+ seq_puts(m, "[");
+ for (i = 0; i < array->nelems; i++) {
+ if (i)
+ seq_puts(m, ",");
+
+ elem_ops->seq_show(btf, elem_type, elem_type_id, data,
+ bits_offset, m);
+ data += elem_size;
+ }
+ seq_puts(m, "]");
+}
+
+static struct btf_kind_operations array_ops = {
+ .check_meta = btf_array_check_meta,
+ .resolve = btf_array_resolve,
+ .check_member = btf_array_check_member,
+ .log_details = btf_array_log,
+ .seq_show = btf_array_seq_show,
+};
+
+static int btf_struct_check_member(struct btf_verifier_env *env,
+ const struct btf_type *struct_type,
+ const struct btf_member *member,
+ const struct btf_type *member_type)
+{
+ u32 struct_bits_off = member->offset;
+ u32 struct_size, bytes_offset;
+
+ if (BITS_PER_BYTE_MASKED(struct_bits_off)) {
+ btf_verifier_log_member(env, struct_type, member,
+ "Member is not byte aligned");
+ return -EINVAL;
+ }
+
+ struct_size = struct_type->size;
+ bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
+ if (struct_size - bytes_offset < member_type->size) {
+ btf_verifier_log_member(env, struct_type, member,
+ "Member exceeds struct_size");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static s32 btf_struct_check_meta(struct btf_verifier_env *env,
+ const struct btf_type *t,
+ u32 meta_left)
+{
+ bool is_union = BTF_INFO_KIND(t->info) == BTF_KIND_UNION;
+ const struct btf_member *member;
+ struct btf *btf = env->btf;
+ u32 struct_size = t->size;
+ u32 meta_needed;
+ u16 i;
+
+ meta_needed = btf_type_vlen(t) * sizeof(*member);
+ if (meta_left < meta_needed) {
+ btf_verifier_log_basic(env, t,
+ "meta_left:%u meta_needed:%u",
+ meta_left, meta_needed);
+ return -EINVAL;
+ }
+
+ btf_verifier_log_type(env, t, NULL);
+
+ for_each_member(i, t, member) {
+ if (!btf_name_offset_valid(btf, member->name_off)) {
+ btf_verifier_log_member(env, t, member,
+ "Invalid member name_offset:%u",
+ member->name_off);
+ return -EINVAL;
+ }
+
+ /* A member cannot be in type void */
+ if (!member->type || BTF_TYPE_PARENT(member->type)) {
+ btf_verifier_log_member(env, t, member,
+ "Invalid type_id");
+ return -EINVAL;
+ }
+
+ if (is_union && member->offset) {
+ btf_verifier_log_member(env, t, member,
+ "Invalid member bits_offset");
+ return -EINVAL;
+ }
+
+ if (BITS_ROUNDUP_BYTES(member->offset) > struct_size) {
+ btf_verifier_log_member(env, t, member,
+ "Memmber bits_offset exceeds its struct size");
+ return -EINVAL;
+ }
+
+ btf_verifier_log_member(env, t, member, NULL);
+ }
+
+ return meta_needed;
+}
+
+static int btf_struct_resolve(struct btf_verifier_env *env,
+ const struct resolve_vertex *v)
+{
+ const struct btf_member *member;
+ int err;
+ u16 i;
+
+ /* Before continue resolving the next_member,
+ * ensure the last member is indeed resolved to a
+ * type with size info.
+ */
+ if (v->next_member) {
+ const struct btf_type *last_member_type;
+ const struct btf_member *last_member;
+ u16 last_member_type_id;
+
+ last_member = btf_type_member(v->t) + v->next_member - 1;
+ last_member_type_id = last_member->type;
+ if (WARN_ON_ONCE(!env_type_is_resolved(env,
+ last_member_type_id)))
+ return -EINVAL;
+
+ last_member_type = btf_type_by_id(env->btf,
+ last_member_type_id);
+ err = btf_type_ops(last_member_type)->check_member(env, v->t,
+ last_member,
+ last_member_type);
+ if (err)
+ return err;
+ }
+
+ for_each_member_from(i, v->next_member, v->t, member) {
+ u32 member_type_id = member->type;
+ const struct btf_type *member_type = btf_type_by_id(env->btf,
+ member_type_id);
+
+ if (btf_type_is_void_or_null(member_type)) {
+ btf_verifier_log_member(env, v->t, member,
+ "Invalid member");
+ return -EINVAL;
+ }
+
+ if (!env_type_is_resolve_sink(env, member_type) &&
+ !env_type_is_resolved(env, member_type_id)) {
+ env_stack_set_next_member(env, i + 1);
+ return env_stack_push(env, member_type, member_type_id);
+ }
+
+ err = btf_type_ops(member_type)->check_member(env, v->t,
+ member,
+ member_type);
+ if (err)
+ return err;
+ }
+
+ env_stack_pop_resolved(env, 0, 0);
+
+ return 0;
+}
+
+static void btf_struct_log(struct btf_verifier_env *env,
+ const struct btf_type *t)
+{
+ btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
+}
+
+static void btf_struct_seq_show(const struct btf *btf, const struct btf_type *t,
+ u32 type_id, void *data, u8 bits_offset,
+ struct seq_file *m)
+{
+ const char *seq = BTF_INFO_KIND(t->info) == BTF_KIND_UNION ? "|" : ",";
+ const struct btf_member *member;
+ u32 i;
+
+ seq_puts(m, "{");
+ for_each_member(i, t, member) {
+ const struct btf_type *member_type = btf_type_by_id(btf,
+ member->type);
+ u32 member_offset = member->offset;
+ u32 bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset);
+ u8 bits8_offset = BITS_PER_BYTE_MASKED(member_offset);
+ const struct btf_kind_operations *ops;
+
+ if (i)
+ seq_puts(m, seq);
+
+ ops = btf_type_ops(member_type);
+ ops->seq_show(btf, member_type, member->type,
+ data + bytes_offset, bits8_offset, m);
+ }
+ seq_puts(m, "}");
+}
+
+static struct btf_kind_operations struct_ops = {
+ .check_meta = btf_struct_check_meta,
+ .resolve = btf_struct_resolve,
+ .check_member = btf_struct_check_member,
+ .log_details = btf_struct_log,
+ .seq_show = btf_struct_seq_show,
+};
+
+static int btf_enum_check_member(struct btf_verifier_env *env,
+ const struct btf_type *struct_type,
+ const struct btf_member *member,
+ const struct btf_type *member_type)
+{
+ u32 struct_bits_off = member->offset;
+ u32 struct_size, bytes_offset;
+
+ if (BITS_PER_BYTE_MASKED(struct_bits_off)) {
+ btf_verifier_log_member(env, struct_type, member,
+ "Member is not byte aligned");
+ return -EINVAL;
+ }
+
+ struct_size = struct_type->size;
+ bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
+ if (struct_size - bytes_offset < sizeof(int)) {
+ btf_verifier_log_member(env, struct_type, member,
+ "Member exceeds struct_size");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static s32 btf_enum_check_meta(struct btf_verifier_env *env,
+ const struct btf_type *t,
+ u32 meta_left)
+{
+ const struct btf_enum *enums = btf_type_enum(t);
+ struct btf *btf = env->btf;
+ u16 i, nr_enums;
+ u32 meta_needed;
+
+ nr_enums = btf_type_vlen(t);
+ meta_needed = nr_enums * sizeof(*enums);
+
+ if (meta_left < meta_needed) {
+ btf_verifier_log_basic(env, t,
+ "meta_left:%u meta_needed:%u",
+ meta_left, meta_needed);
+ return -EINVAL;
+ }
+
+ if (t->size != sizeof(int)) {
+ btf_verifier_log_type(env, t, "Expected size:%zu",
+ sizeof(int));
+ return -EINVAL;
+ }
+
+ btf_verifier_log_type(env, t, NULL);
+
+ for (i = 0; i < nr_enums; i++) {
+ if (!btf_name_offset_valid(btf, enums[i].name_off)) {
+ btf_verifier_log(env, "\tInvalid name_offset:%u",
+ enums[i].name_off);
+ return -EINVAL;
+ }
+
+ btf_verifier_log(env, "\t%s val=%d\n",
+ btf_name_by_offset(btf, enums[i].name_off),
+ enums[i].val);
+ }
+
+ return meta_needed;
+}
+
+static void btf_enum_log(struct btf_verifier_env *env,
+ const struct btf_type *t)
+{
+ btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
+}
+
+static void btf_enum_seq_show(const struct btf *btf, const struct btf_type *t,
+ u32 type_id, void *data, u8 bits_offset,
+ struct seq_file *m)
+{
+ const struct btf_enum *enums = btf_type_enum(t);
+ u32 i, nr_enums = btf_type_vlen(t);
+ int v = *(int *)data;
+
+ for (i = 0; i < nr_enums; i++) {
+ if (v == enums[i].val) {
+ seq_printf(m, "%s",
+ btf_name_by_offset(btf, enums[i].name_off));
+ return;
+ }
+ }
+
+ seq_printf(m, "%d", v);
+}
+
+static struct btf_kind_operations enum_ops = {
+ .check_meta = btf_enum_check_meta,
+ .resolve = btf_df_resolve,
+ .check_member = btf_enum_check_member,
+ .log_details = btf_enum_log,
+ .seq_show = btf_enum_seq_show,
+};
+
+static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = {
+ [BTF_KIND_INT] = &int_ops,
+ [BTF_KIND_PTR] = &ptr_ops,
+ [BTF_KIND_ARRAY] = &array_ops,
+ [BTF_KIND_STRUCT] = &struct_ops,
+ [BTF_KIND_UNION] = &struct_ops,
+ [BTF_KIND_ENUM] = &enum_ops,
+ [BTF_KIND_FWD] = &fwd_ops,
+ [BTF_KIND_TYPEDEF] = &modifier_ops,
+ [BTF_KIND_VOLATILE] = &modifier_ops,
+ [BTF_KIND_CONST] = &modifier_ops,
+ [BTF_KIND_RESTRICT] = &modifier_ops,
+};
+
+static s32 btf_check_meta(struct btf_verifier_env *env,
+ const struct btf_type *t,
+ u32 meta_left)
+{
+ u32 saved_meta_left = meta_left;
+ s32 var_meta_size;
+
+ if (meta_left < sizeof(*t)) {
+ btf_verifier_log(env, "[%u] meta_left:%u meta_needed:%zu",
+ env->log_type_id, meta_left, sizeof(*t));
+ return -EINVAL;
+ }
+ meta_left -= sizeof(*t);
+
+ if (BTF_INFO_KIND(t->info) > BTF_KIND_MAX ||
+ BTF_INFO_KIND(t->info) == BTF_KIND_UNKN) {
+ btf_verifier_log(env, "[%u] Invalid kind:%u",
+ env->log_type_id, BTF_INFO_KIND(t->info));
+ return -EINVAL;
+ }
+
+ if (!btf_name_offset_valid(env->btf, t->name_off)) {
+ btf_verifier_log(env, "[%u] Invalid name_offset:%u",
+ env->log_type_id, t->name_off);
+ return -EINVAL;
+ }
+
+ var_meta_size = btf_type_ops(t)->check_meta(env, t, meta_left);
+ if (var_meta_size < 0)
+ return var_meta_size;
+
+ meta_left -= var_meta_size;
+
+ return saved_meta_left - meta_left;
+}
+
+static int btf_check_all_metas(struct btf_verifier_env *env)
+{
+ struct btf *btf = env->btf;
+ struct btf_header *hdr;
+ void *cur, *end;
+
+ hdr = btf->hdr;
+ cur = btf->nohdr_data + hdr->type_off;
+ end = btf->nohdr_data + hdr->str_off;
+
+ env->log_type_id = 1;
+ while (cur < end) {
+ struct btf_type *t = cur;
+ s32 meta_size;
+
+ meta_size = btf_check_meta(env, t, end - cur);
+ if (meta_size < 0)
+ return meta_size;
+
+ btf_add_type(env, t);
+ cur += meta_size;
+ env->log_type_id++;
+ }
+
+ return 0;
+}
+
+static int btf_resolve(struct btf_verifier_env *env,
+ const struct btf_type *t, u32 type_id)
+{
+ const struct resolve_vertex *v;
+ int err = 0;
+
+ env->resolve_mode = RESOLVE_TBD;
+ env_stack_push(env, t, type_id);
+ while (!err && (v = env_stack_peak(env))) {
+ env->log_type_id = v->type_id;
+ err = btf_type_ops(v->t)->resolve(env, v);
+ }
+
+ env->log_type_id = type_id;
+ if (err == -E2BIG)
+ btf_verifier_log_type(env, t,
+ "Exceeded max resolving depth:%u",
+ MAX_RESOLVE_DEPTH);
+ else if (err == -EEXIST)
+ btf_verifier_log_type(env, t, "Loop detected");
+
+ return err;
+}
+
+static bool btf_resolve_valid(struct btf_verifier_env *env,
+ const struct btf_type *t,
+ u32 type_id)
+{
+ struct btf *btf = env->btf;
+
+ if (!env_type_is_resolved(env, type_id))
+ return false;
+
+ if (btf_type_is_struct(t))
+ return !btf->resolved_ids[type_id] &&
+ !btf->resolved_sizes[type_id];
+
+ if (btf_type_is_modifier(t) || btf_type_is_ptr(t)) {
+ t = btf_type_id_resolve(btf, &type_id);
+ return t && !btf_type_is_modifier(t);
+ }
+
+ if (btf_type_is_array(t)) {
+ const struct btf_array *array = btf_type_array(t);
+ const struct btf_type *elem_type;
+ u32 elem_type_id = array->type;
+ u32 elem_size;
+
+ elem_type = btf_type_id_size(btf, &elem_type_id, &elem_size);
+ return elem_type && !btf_type_is_modifier(elem_type) &&
+ (array->nelems * elem_size ==
+ btf->resolved_sizes[type_id]);
+ }
+
+ return false;
+}
+
+static int btf_check_all_types(struct btf_verifier_env *env)
+{
+ struct btf *btf = env->btf;
+ u32 type_id;
+ int err;
+
+ err = env_resolve_init(env);
+ if (err)
+ return err;
+
+ env->phase++;
+ for (type_id = 1; type_id <= btf->nr_types; type_id++) {
+ const struct btf_type *t = btf_type_by_id(btf, type_id);
+
+ env->log_type_id = type_id;
+ if (btf_type_needs_resolve(t) &&
+ !env_type_is_resolved(env, type_id)) {
+ err = btf_resolve(env, t, type_id);
+ if (err)
+ return err;
+ }
+
+ if (btf_type_needs_resolve(t) &&
+ !btf_resolve_valid(env, t, type_id)) {
+ btf_verifier_log_type(env, t, "Invalid resolve state");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int btf_parse_type_sec(struct btf_verifier_env *env)
+{
+ int err;
+
+ err = btf_check_all_metas(env);
+ if (err)
+ return err;
+
+ return btf_check_all_types(env);
+}
+
+static int btf_parse_str_sec(struct btf_verifier_env *env)
+{
+ const struct btf_header *hdr;
+ struct btf *btf = env->btf;
+ const char *start, *end;
+
+ hdr = btf->hdr;
+ start = btf->nohdr_data + hdr->str_off;
+ end = start + hdr->str_len;
+
+ if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET ||
+ start[0] || end[-1]) {
+ btf_verifier_log(env, "Invalid string section");
+ return -EINVAL;
+ }
+
+ btf->strings = start;
+
+ return 0;
+}
+
+static int btf_parse_hdr(struct btf_verifier_env *env)
+{
+ const struct btf_header *hdr;
+ struct btf *btf = env->btf;
+ u32 meta_left;
+
+ if (btf->data_size < sizeof(*hdr)) {
+ btf_verifier_log(env, "btf_header not found");
+ return -EINVAL;
+ }
+
+ btf_verifier_log_hdr(env);
+
+ hdr = btf->hdr;
+ if (hdr->magic != BTF_MAGIC) {
+ btf_verifier_log(env, "Invalid magic");
+ return -EINVAL;
+ }
+
+ if (hdr->version != BTF_VERSION) {
+ btf_verifier_log(env, "Unsupported version");
+ return -ENOTSUPP;
+ }
+
+ if (hdr->flags) {
+ btf_verifier_log(env, "Unsupported flags");
+ return -ENOTSUPP;
+ }
+
+ meta_left = btf->data_size - sizeof(*hdr);
+ if (!meta_left) {
+ btf_verifier_log(env, "No data");
+ return -EINVAL;
+ }
+
+ if (meta_left < hdr->type_off || hdr->str_off <= hdr->type_off ||
+ /* Type section must align to 4 bytes */
+ hdr->type_off & (sizeof(u32) - 1)) {
+ btf_verifier_log(env, "Invalid type_off");
+ return -EINVAL;
+ }
+
+ if (meta_left < hdr->str_off ||
+ meta_left - hdr->str_off < hdr->str_len) {
+ btf_verifier_log(env, "Invalid str_off or str_len");
+ return -EINVAL;
+ }
+
+ btf->nohdr_data = btf->hdr + 1;
+
+ return 0;
+}
+
+static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
+ u32 log_level, char __user *log_ubuf, u32 log_size)
+{
+ struct btf_verifier_env *env = NULL;
+ struct bpf_verifier_log *log;
+ struct btf *btf = NULL;
+ u8 *data;
+ int err;
+
+ if (btf_data_size > BTF_MAX_SIZE)
+ return ERR_PTR(-E2BIG);
+
+ env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN);
+ if (!env)
+ return ERR_PTR(-ENOMEM);
+
+ log = &env->log;
+ if (log_level || log_ubuf || log_size) {
+ /* user requested verbose verifier output
+ * and supplied buffer to store the verification trace
+ */
+ log->level = log_level;
+ log->ubuf = log_ubuf;
+ log->len_total = log_size;
+
+ /* log attributes have to be sane */
+ if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 ||
+ !log->level || !log->ubuf) {
+ err = -EINVAL;
+ goto errout;
+ }
+ }
+
+ btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN);
+ if (!btf) {
+ err = -ENOMEM;
+ goto errout;
+ }
+
+ data = kvmalloc(btf_data_size, GFP_KERNEL | __GFP_NOWARN);
+ if (!data) {
+ err = -ENOMEM;
+ goto errout;
+ }
+
+ btf->data = data;
+ btf->data_size = btf_data_size;
+
+ if (copy_from_user(data, btf_data, btf_data_size)) {
+ err = -EFAULT;
+ goto errout;
+ }
+
+ env->btf = btf;
+
+ err = btf_parse_hdr(env);
+ if (err)
+ goto errout;
+
+ err = btf_parse_str_sec(env);
+ if (err)
+ goto errout;
+
+ err = btf_parse_type_sec(env);
+ if (err)
+ goto errout;
+
+ if (!err && log->level && bpf_verifier_log_full(log)) {
+ err = -ENOSPC;
+ goto errout;
+ }
+
+ if (!err) {
+ btf_verifier_env_free(env);
+ btf_get(btf);
+ return btf;
+ }
+
+errout:
+ btf_verifier_env_free(env);
+ if (btf)
+ btf_free(btf);
+ return ERR_PTR(err);
+}
+
+void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
+ struct seq_file *m)
+{
+ const struct btf_type *t = btf_type_by_id(btf, type_id);
+
+ btf_type_ops(t)->seq_show(btf, t, type_id, obj, 0, m);
+}
+
+static int btf_release(struct inode *inode, struct file *filp)
+{
+ btf_put(filp->private_data);
+ return 0;
+}
+
+const struct file_operations btf_fops = {
+ .release = btf_release,
+};
+
+int btf_new_fd(const union bpf_attr *attr)
+{
+ struct btf *btf;
+ int fd;
+
+ btf = btf_parse(u64_to_user_ptr(attr->btf),
+ attr->btf_size, attr->btf_log_level,
+ u64_to_user_ptr(attr->btf_log_buf),
+ attr->btf_log_size);
+ if (IS_ERR(btf))
+ return PTR_ERR(btf);
+
+ fd = anon_inode_getfd("btf", &btf_fops, btf,
+ O_RDONLY | O_CLOEXEC);
+ if (fd < 0)
+ btf_put(btf);
+
+ return fd;
+}
+
+struct btf *btf_get_by_fd(int fd)
+{
+ struct btf *btf;
+ struct fd f;
+
+ f = fdget(fd);
+
+ if (!f.file)
+ return ERR_PTR(-EBADF);
+
+ if (f.file->f_op != &btf_fops) {
+ fdput(f);
+ return ERR_PTR(-EINVAL);
+ }
+
+ btf = f.file->private_data;
+ btf_get(btf);
+ fdput(f);
+
+ return btf;
+}
+
+int btf_get_info_by_fd(const struct btf *btf,
+ const union bpf_attr *attr,
+ union bpf_attr __user *uattr)
+{
+ void __user *udata = u64_to_user_ptr(attr->info.info);
+ u32 copy_len = min_t(u32, btf->data_size,
+ attr->info.info_len);
+
+ if (copy_to_user(udata, btf->data, copy_len) ||
+ put_user(btf->data_size, &uattr->info.info_len))
+ return -EFAULT;
+
+ return 0;
+}
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index a4bb0b34375a..c95b04ec103e 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -19,6 +19,7 @@
#include <linux/bpf.h>
#include <linux/filter.h>
#include <linux/ptr_ring.h>
+#include <net/xdp.h>
#include <linux/sched.h>
#include <linux/workqueue.h>
@@ -137,27 +138,6 @@ free_cmap:
return ERR_PTR(err);
}
-static void __cpu_map_queue_destructor(void *ptr)
-{
- /* The tear-down procedure should have made sure that queue is
- * empty. See __cpu_map_entry_replace() and work-queue
- * invoked cpu_map_kthread_stop(). Catch any broken behaviour
- * gracefully and warn once.
- */
- if (WARN_ON_ONCE(ptr))
- page_frag_free(ptr);
-}
-
-static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
-{
- if (atomic_dec_and_test(&rcpu->refcnt)) {
- /* The queue should be empty at this point */
- ptr_ring_cleanup(rcpu->queue, __cpu_map_queue_destructor);
- kfree(rcpu->queue);
- kfree(rcpu);
- }
-}
-
static void get_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
{
atomic_inc(&rcpu->refcnt);
@@ -179,45 +159,8 @@ static void cpu_map_kthread_stop(struct work_struct *work)
kthread_stop(rcpu->kthread);
}
-/* For now, xdp_pkt is a cpumap internal data structure, with info
- * carried between enqueue to dequeue. It is mapped into the top
- * headroom of the packet, to avoid allocating separate mem.
- */
-struct xdp_pkt {
- void *data;
- u16 len;
- u16 headroom;
- u16 metasize;
- struct net_device *dev_rx;
-};
-
-/* Convert xdp_buff to xdp_pkt */
-static struct xdp_pkt *convert_to_xdp_pkt(struct xdp_buff *xdp)
-{
- struct xdp_pkt *xdp_pkt;
- int metasize;
- int headroom;
-
- /* Assure headroom is available for storing info */
- headroom = xdp->data - xdp->data_hard_start;
- metasize = xdp->data - xdp->data_meta;
- metasize = metasize > 0 ? metasize : 0;
- if (unlikely((headroom - metasize) < sizeof(*xdp_pkt)))
- return NULL;
-
- /* Store info in top of packet */
- xdp_pkt = xdp->data_hard_start;
-
- xdp_pkt->data = xdp->data;
- xdp_pkt->len = xdp->data_end - xdp->data;
- xdp_pkt->headroom = headroom - sizeof(*xdp_pkt);
- xdp_pkt->metasize = metasize;
-
- return xdp_pkt;
-}
-
static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
- struct xdp_pkt *xdp_pkt)
+ struct xdp_frame *xdpf)
{
unsigned int frame_size;
void *pkt_data_start;
@@ -232,7 +175,7 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
* would be preferred to set frame_size to 2048 or 4096
* depending on the driver.
* frame_size = 2048;
- * frame_len = frame_size - sizeof(*xdp_pkt);
+ * frame_len = frame_size - sizeof(*xdp_frame);
*
* Instead, with info avail, skb_shared_info in placed after
* packet len. This, unfortunately fakes the truesize.
@@ -240,21 +183,21 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
* is not at a fixed memory location, with mixed length
* packets, which is bad for cache-line hotness.
*/
- frame_size = SKB_DATA_ALIGN(xdp_pkt->len) + xdp_pkt->headroom +
+ frame_size = SKB_DATA_ALIGN(xdpf->len) + xdpf->headroom +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- pkt_data_start = xdp_pkt->data - xdp_pkt->headroom;
+ pkt_data_start = xdpf->data - xdpf->headroom;
skb = build_skb(pkt_data_start, frame_size);
if (!skb)
return NULL;
- skb_reserve(skb, xdp_pkt->headroom);
- __skb_put(skb, xdp_pkt->len);
- if (xdp_pkt->metasize)
- skb_metadata_set(skb, xdp_pkt->metasize);
+ skb_reserve(skb, xdpf->headroom);
+ __skb_put(skb, xdpf->len);
+ if (xdpf->metasize)
+ skb_metadata_set(skb, xdpf->metasize);
/* Essential SKB info: protocol and skb->dev */
- skb->protocol = eth_type_trans(skb, xdp_pkt->dev_rx);
+ skb->protocol = eth_type_trans(skb, xdpf->dev_rx);
/* Optional SKB info, currently missing:
* - HW checksum info (skb->ip_summed)
@@ -265,6 +208,31 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
return skb;
}
+static void __cpu_map_ring_cleanup(struct ptr_ring *ring)
+{
+ /* The tear-down procedure should have made sure that queue is
+ * empty. See __cpu_map_entry_replace() and work-queue
+ * invoked cpu_map_kthread_stop(). Catch any broken behaviour
+ * gracefully and warn once.
+ */
+ struct xdp_frame *xdpf;
+
+ while ((xdpf = ptr_ring_consume(ring)))
+ if (WARN_ON_ONCE(xdpf))
+ xdp_return_frame(xdpf);
+}
+
+static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
+{
+ if (atomic_dec_and_test(&rcpu->refcnt)) {
+ /* The queue should be empty at this point */
+ __cpu_map_ring_cleanup(rcpu->queue);
+ ptr_ring_cleanup(rcpu->queue, NULL);
+ kfree(rcpu->queue);
+ kfree(rcpu);
+ }
+}
+
static int cpu_map_kthread_run(void *data)
{
struct bpf_cpu_map_entry *rcpu = data;
@@ -278,7 +246,7 @@ static int cpu_map_kthread_run(void *data)
*/
while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) {
unsigned int processed = 0, drops = 0, sched = 0;
- struct xdp_pkt *xdp_pkt;
+ struct xdp_frame *xdpf;
/* Release CPU reschedule checks */
if (__ptr_ring_empty(rcpu->queue)) {
@@ -301,13 +269,13 @@ static int cpu_map_kthread_run(void *data)
* kthread CPU pinned. Lockless access to ptr_ring
* consume side valid as no-resize allowed of queue.
*/
- while ((xdp_pkt = __ptr_ring_consume(rcpu->queue))) {
+ while ((xdpf = __ptr_ring_consume(rcpu->queue))) {
struct sk_buff *skb;
int ret;
- skb = cpu_map_build_skb(rcpu, xdp_pkt);
+ skb = cpu_map_build_skb(rcpu, xdpf);
if (!skb) {
- page_frag_free(xdp_pkt);
+ xdp_return_frame(xdpf);
continue;
}
@@ -604,13 +572,13 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
spin_lock(&q->producer_lock);
for (i = 0; i < bq->count; i++) {
- void *xdp_pkt = bq->q[i];
+ struct xdp_frame *xdpf = bq->q[i];
int err;
- err = __ptr_ring_produce(q, xdp_pkt);
+ err = __ptr_ring_produce(q, xdpf);
if (err) {
drops++;
- page_frag_free(xdp_pkt); /* Free xdp_pkt */
+ xdp_return_frame(xdpf);
}
processed++;
}
@@ -625,7 +593,7 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
/* Runs under RCU-read-side, plus in softirq under NAPI protection.
* Thus, safe percpu variable access.
*/
-static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_pkt *xdp_pkt)
+static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
{
struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);
@@ -636,28 +604,28 @@ static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_pkt *xdp_pkt)
* driver to code invoking us to finished, due to driver
* (e.g. ixgbe) recycle tricks based on page-refcnt.
*
- * Thus, incoming xdp_pkt is always queued here (else we race
+ * Thus, incoming xdp_frame is always queued here (else we race
* with another CPU on page-refcnt and remaining driver code).
* Queue time is very short, as driver will invoke flush
* operation, when completing napi->poll call.
*/
- bq->q[bq->count++] = xdp_pkt;
+ bq->q[bq->count++] = xdpf;
return 0;
}
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
struct net_device *dev_rx)
{
- struct xdp_pkt *xdp_pkt;
+ struct xdp_frame *xdpf;
- xdp_pkt = convert_to_xdp_pkt(xdp);
- if (unlikely(!xdp_pkt))
+ xdpf = convert_to_xdp_frame(xdp);
+ if (unlikely(!xdpf))
return -EOVERFLOW;
/* Info needed when constructing SKB on remote CPU */
- xdp_pkt->dev_rx = dev_rx;
+ xdpf->dev_rx = dev_rx;
- bq_enqueue(rcpu, xdp_pkt);
+ bq_enqueue(rcpu, xdpf);
return 0;
}
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index bf6da59ae0d0..a41343009ccc 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -150,8 +150,154 @@ static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
return 0;
}
+struct map_iter {
+ void *key;
+ bool done;
+};
+
+static struct map_iter *map_iter(struct seq_file *m)
+{
+ return m->private;
+}
+
+static struct bpf_map *seq_file_to_map(struct seq_file *m)
+{
+ return file_inode(m->file)->i_private;
+}
+
+static void map_iter_free(struct map_iter *iter)
+{
+ if (iter) {
+ kfree(iter->key);
+ kfree(iter);
+ }
+}
+
+static struct map_iter *map_iter_alloc(struct bpf_map *map)
+{
+ struct map_iter *iter;
+
+ iter = kzalloc(sizeof(*iter), GFP_KERNEL | __GFP_NOWARN);
+ if (!iter)
+ goto error;
+
+ iter->key = kzalloc(map->key_size, GFP_KERNEL | __GFP_NOWARN);
+ if (!iter->key)
+ goto error;
+
+ return iter;
+
+error:
+ map_iter_free(iter);
+ return NULL;
+}
+
+static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct bpf_map *map = seq_file_to_map(m);
+ void *key = map_iter(m)->key;
+
+ if (map_iter(m)->done)
+ return NULL;
+
+ if (unlikely(v == SEQ_START_TOKEN))
+ goto done;
+
+ if (map->ops->map_get_next_key(map, key, key)) {
+ map_iter(m)->done = true;
+ return NULL;
+ }
+
+done:
+ ++(*pos);
+ return key;
+}
+
+static void *map_seq_start(struct seq_file *m, loff_t *pos)
+{
+ if (map_iter(m)->done)
+ return NULL;
+
+ return *pos ? map_iter(m)->key : SEQ_START_TOKEN;
+}
+
+static void map_seq_stop(struct seq_file *m, void *v)
+{
+}
+
+static int map_seq_show(struct seq_file *m, void *v)
+{
+ struct bpf_map *map = seq_file_to_map(m);
+ void *key = map_iter(m)->key;
+
+ if (unlikely(v == SEQ_START_TOKEN)) {
+ seq_puts(m, "# WARNING!! The output is for debug purpose only\n");
+ seq_puts(m, "# WARNING!! The output format will change\n");
+ } else {
+ map->ops->map_seq_show_elem(map, key, m);
+ }
+
+ return 0;
+}
+
+static const struct seq_operations bpffs_map_seq_ops = {
+ .start = map_seq_start,
+ .next = map_seq_next,
+ .show = map_seq_show,
+ .stop = map_seq_stop,
+};
+
+static int bpffs_map_open(struct inode *inode, struct file *file)
+{
+ struct bpf_map *map = inode->i_private;
+ struct map_iter *iter;
+ struct seq_file *m;
+ int err;
+
+ iter = map_iter_alloc(map);
+ if (!iter)
+ return -ENOMEM;
+
+ err = seq_open(file, &bpffs_map_seq_ops);
+ if (err) {
+ map_iter_free(iter);
+ return err;
+ }
+
+ m = file->private_data;
+ m->private = iter;
+
+ return 0;
+}
+
+static int bpffs_map_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *m = file->private_data;
+
+ map_iter_free(map_iter(m));
+
+ return seq_release(inode, file);
+}
+
+/* bpffs_map_fops should only implement the basic
+ * read operation for a BPF map. The purpose is to
+ * provide a simple user intuitive way to do
+ * "cat bpffs/pathto/a-pinned-map".
+ *
+ * Other operations (e.g. write, lookup...) should be realized by
+ * the userspace tools (e.g. bpftool) through the
+ * BPF_OBJ_GET_INFO_BY_FD and the map's lookup/update
+ * interface.
+ */
+static const struct file_operations bpffs_map_fops = {
+ .open = bpffs_map_open,
+ .read = seq_read,
+ .release = bpffs_map_release,
+};
+
static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw,
- const struct inode_operations *iops)
+ const struct inode_operations *iops,
+ const struct file_operations *fops)
{
struct inode *dir = dentry->d_parent->d_inode;
struct inode *inode = bpf_get_inode(dir->i_sb, dir, mode);
@@ -159,6 +305,7 @@ static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw,
return PTR_ERR(inode);
inode->i_op = iops;
+ inode->i_fop = fops;
inode->i_private = raw;
bpf_dentry_finalize(dentry, inode, dir);
@@ -167,12 +314,15 @@ static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw,
static int bpf_mkprog(struct dentry *dentry, umode_t mode, void *arg)
{
- return bpf_mkobj_ops(dentry, mode, arg, &bpf_prog_iops);
+ return bpf_mkobj_ops(dentry, mode, arg, &bpf_prog_iops, NULL);
}
static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg)
{
- return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops);
+ struct bpf_map *map = arg;
+
+ return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops,
+ map->btf ? &bpffs_map_fops : NULL);
}
static struct dentry *
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ebfe9f29dae8..0bd2944eafb9 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -11,6 +11,7 @@
*/
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
+#include <linux/btf.h>
#include <linux/syscalls.h>
#include <linux/slab.h>
#include <linux/sched/signal.h>
@@ -26,6 +27,7 @@
#include <linux/cred.h>
#include <linux/timekeeping.h>
#include <linux/ctype.h>
+#include <linux/btf.h>
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \
(map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
@@ -250,6 +252,7 @@ static void bpf_map_free_deferred(struct work_struct *work)
bpf_map_uncharge_memlock(map);
security_bpf_map_free(map);
+ btf_put(map->btf);
/* implementation dependent freeing */
map->ops->map_free(map);
}
@@ -415,7 +418,7 @@ static int bpf_obj_name_cpy(char *dst, const char *src)
return 0;
}
-#define BPF_MAP_CREATE_LAST_FIELD map_ifindex
+#define BPF_MAP_CREATE_LAST_FIELD btf_value_id
/* called via syscall */
static int map_create(union bpf_attr *attr)
{
@@ -449,6 +452,33 @@ static int map_create(union bpf_attr *attr)
atomic_set(&map->refcnt, 1);
atomic_set(&map->usercnt, 1);
+ if (bpf_map_support_seq_show(map) &&
+ (attr->btf_key_id || attr->btf_value_id)) {
+ struct btf *btf;
+
+ if (!attr->btf_key_id || !attr->btf_value_id) {
+ err = -EINVAL;
+ goto free_map_nouncharge;
+ }
+
+ btf = btf_get_by_fd(attr->btf_fd);
+ if (IS_ERR(btf)) {
+ err = PTR_ERR(btf);
+ goto free_map_nouncharge;
+ }
+
+ err = map->ops->map_check_btf(map, btf, attr->btf_key_id,
+ attr->btf_value_id);
+ if (err) {
+ btf_put(btf);
+ goto free_map_nouncharge;
+ }
+
+ map->btf = btf;
+ map->btf_key_id = attr->btf_key_id;
+ map->btf_value_id = attr->btf_value_id;
+ }
+
err = security_bpf_map_alloc(map);
if (err)
goto free_map_nouncharge;
@@ -481,6 +511,7 @@ free_map:
free_map_sec:
security_bpf_map_free(map);
free_map_nouncharge:
+ btf_put(map->btf);
map->ops->map_free(map);
return err;
}
@@ -1883,6 +1914,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
info.load_time = prog->aux->load_time;
info.created_by_uid = from_kuid_munged(current_user_ns(),
prog->aux->user->uid);
+ info.gpl_compatible = prog->gpl_compatible;
memcpy(info.tag, prog->tag, sizeof(prog->tag));
memcpy(info.name, prog->aux->name, sizeof(prog->aux->name));
@@ -2016,6 +2048,8 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
else if (f.file->f_op == &bpf_map_fops)
err = bpf_map_get_info_by_fd(f.file->private_data, attr,
uattr);
+ else if (f.file->f_op == &btf_fops)
+ err = btf_get_info_by_fd(f.file->private_data, attr, uattr);
else
err = -EINVAL;
@@ -2023,6 +2057,19 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
return err;
}
+#define BPF_BTF_LOAD_LAST_FIELD btf_log_level
+
+static int bpf_btf_load(const union bpf_attr *attr)
+{
+ if (CHECK_ATTR(BPF_BTF_LOAD))
+ return -EINVAL;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ return btf_new_fd(attr);
+}
+
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
union bpf_attr attr = {};
@@ -2103,6 +2150,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_RAW_TRACEPOINT_OPEN:
err = bpf_raw_tracepoint_open(&attr);
break;
+ case BPF_BTF_LOAD:
+ err = bpf_btf_load(&attr);
+ break;
default:
err = -EINVAL;
break;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 5dd1dcb902bf..eb1a596aebd3 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1914,7 +1914,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
if (arg_type == ARG_PTR_TO_MAP_KEY ||
arg_type == ARG_PTR_TO_MAP_VALUE) {
expected_type = PTR_TO_STACK;
- if (!type_is_pkt_pointer(type) &&
+ if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE &&
type != expected_type)
goto err_type;
} else if (arg_type == ARG_CONST_SIZE ||
@@ -1966,14 +1966,9 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
verbose(env, "invalid map_ptr to access map->key\n");
return -EACCES;
}
- if (type_is_pkt_pointer(type))
- err = check_packet_access(env, regno, reg->off,
- meta->map_ptr->key_size,
- false);
- else
- err = check_stack_boundary(env, regno,
- meta->map_ptr->key_size,
- false, NULL);
+ err = check_helper_mem_access(env, regno,
+ meta->map_ptr->key_size, false,
+ NULL);
} else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
/* bpf_map_xxx(..., map_ptr, ..., value) call:
* check [value, value + map->value_size) validity
@@ -1983,14 +1978,9 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
verbose(env, "invalid map_ptr to access map->value\n");
return -EACCES;
}
- if (type_is_pkt_pointer(type))
- err = check_packet_access(env, regno, reg->off,
- meta->map_ptr->value_size,
- false);
- else
- err = check_stack_boundary(env, regno,
- meta->map_ptr->value_size,
- false, NULL);
+ err = check_helper_mem_access(env, regno,
+ meta->map_ptr->value_size, false,
+ NULL);
} else if (arg_type_is_mem_size(arg_type)) {
bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 15ea216a67ce..63d0816ab23b 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -22,6 +22,7 @@
#include <linux/socket.h>
#include <linux/skbuff.h>
#include <linux/netlink.h>
+#include <linux/uidgid.h>
#include <linux/uuid.h>
#include <linux/ctype.h>
#include <net/sock.h>
@@ -231,30 +232,6 @@ out:
return r;
}
-#ifdef CONFIG_NET
-static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data)
-{
- struct kobject *kobj = data, *ksobj;
- const struct kobj_ns_type_operations *ops;
-
- ops = kobj_ns_ops(kobj);
- if (!ops && kobj->kset) {
- ksobj = &kobj->kset->kobj;
- if (ksobj->parent != NULL)
- ops = kobj_ns_ops(ksobj->parent);
- }
-
- if (ops && ops->netlink_ns && kobj->ktype->namespace) {
- const void *sock_ns, *ns;
- ns = kobj->ktype->namespace(kobj);
- sock_ns = ops->netlink_ns(dsk);
- return sock_ns != ns;
- }
-
- return 0;
-}
-#endif
-
#ifdef CONFIG_UEVENT_HELPER
static int kobj_usermode_filter(struct kobject *kobj)
{
@@ -296,15 +273,44 @@ static void cleanup_uevent_env(struct subprocess_info *info)
}
#endif
-static int kobject_uevent_net_broadcast(struct kobject *kobj,
- struct kobj_uevent_env *env,
+#ifdef CONFIG_NET
+static struct sk_buff *alloc_uevent_skb(struct kobj_uevent_env *env,
const char *action_string,
const char *devpath)
{
- int retval = 0;
-#if defined(CONFIG_NET)
+ struct netlink_skb_parms *parms;
+ struct sk_buff *skb = NULL;
+ char *scratch;
+ size_t len;
+
+ /* allocate message with maximum possible size */
+ len = strlen(action_string) + strlen(devpath) + 2;
+ skb = alloc_skb(len + env->buflen, GFP_KERNEL);
+ if (!skb)
+ return NULL;
+
+ /* add header */
+ scratch = skb_put(skb, len);
+ sprintf(scratch, "%s@%s", action_string, devpath);
+
+ skb_put_data(skb, env->buf, env->buflen);
+
+ parms = &NETLINK_CB(skb);
+ parms->creds.uid = GLOBAL_ROOT_UID;
+ parms->creds.gid = GLOBAL_ROOT_GID;
+ parms->dst_group = 1;
+ parms->portid = 0;
+
+ return skb;
+}
+
+static int uevent_net_broadcast_untagged(struct kobj_uevent_env *env,
+ const char *action_string,
+ const char *devpath)
+{
struct sk_buff *skb = NULL;
struct uevent_sock *ue_sk;
+ int retval = 0;
/* send netlink message */
list_for_each_entry(ue_sk, &uevent_sock_list, list) {
@@ -314,37 +320,99 @@ static int kobject_uevent_net_broadcast(struct kobject *kobj,
continue;
if (!skb) {
- /* allocate message with the maximum possible size */
- size_t len = strlen(action_string) + strlen(devpath) + 2;
- char *scratch;
-
retval = -ENOMEM;
- skb = alloc_skb(len + env->buflen, GFP_KERNEL);
+ skb = alloc_uevent_skb(env, action_string, devpath);
if (!skb)
continue;
-
- /* add header */
- scratch = skb_put(skb, len);
- sprintf(scratch, "%s@%s", action_string, devpath);
-
- skb_put_data(skb, env->buf, env->buflen);
-
- NETLINK_CB(skb).dst_group = 1;
}
- retval = netlink_broadcast_filtered(uevent_sock, skb_get(skb),
- 0, 1, GFP_KERNEL,
- kobj_bcast_filter,
- kobj);
+ retval = netlink_broadcast(uevent_sock, skb_get(skb), 0, 1,
+ GFP_KERNEL);
/* ENOBUFS should be handled in userspace */
if (retval == -ENOBUFS || retval == -ESRCH)
retval = 0;
}
consume_skb(skb);
-#endif
+
return retval;
}
+static int uevent_net_broadcast_tagged(struct sock *usk,
+ struct kobj_uevent_env *env,
+ const char *action_string,
+ const char *devpath)
+{
+ struct user_namespace *owning_user_ns = sock_net(usk)->user_ns;
+ struct sk_buff *skb = NULL;
+ int ret = 0;
+
+ skb = alloc_uevent_skb(env, action_string, devpath);
+ if (!skb)
+ return -ENOMEM;
+
+ /* fix credentials */
+ if (owning_user_ns != &init_user_ns) {
+ struct netlink_skb_parms *parms = &NETLINK_CB(skb);
+ kuid_t root_uid;
+ kgid_t root_gid;
+
+ /* fix uid */
+ root_uid = make_kuid(owning_user_ns, 0);
+ if (uid_valid(root_uid))
+ parms->creds.uid = root_uid;
+
+ /* fix gid */
+ root_gid = make_kgid(owning_user_ns, 0);
+ if (gid_valid(root_gid))
+ parms->creds.gid = root_gid;
+ }
+
+ ret = netlink_broadcast(usk, skb, 0, 1, GFP_KERNEL);
+ /* ENOBUFS should be handled in userspace */
+ if (ret == -ENOBUFS || ret == -ESRCH)
+ ret = 0;
+
+ return ret;
+}
+#endif
+
+static int kobject_uevent_net_broadcast(struct kobject *kobj,
+ struct kobj_uevent_env *env,
+ const char *action_string,
+ const char *devpath)
+{
+ int ret = 0;
+
+#ifdef CONFIG_NET
+ const struct kobj_ns_type_operations *ops;
+ const struct net *net = NULL;
+
+ ops = kobj_ns_ops(kobj);
+ if (!ops && kobj->kset) {
+ struct kobject *ksobj = &kobj->kset->kobj;
+ if (ksobj->parent != NULL)
+ ops = kobj_ns_ops(ksobj->parent);
+ }
+
+ /* kobjects currently only carry network namespace tags and they
+ * are the only tag relevant here since we want to decide which
+ * network namespaces to broadcast the uevent into.
+ */
+ if (ops && ops->netlink_ns && kobj->ktype->namespace)
+ if (ops->type == KOBJ_NS_TYPE_NET)
+ net = kobj->ktype->namespace(kobj);
+
+ if (!net)
+ ret = uevent_net_broadcast_untagged(env, action_string,
+ devpath);
+ else
+ ret = uevent_net_broadcast_tagged(net->uevent_sock->sk, env,
+ action_string, devpath);
+#endif
+
+ return ret;
+}
+
static void zap_modalias_env(struct kobj_uevent_env *env)
{
static const char modalias_prefix[] = "MODALIAS=";
@@ -703,9 +771,13 @@ static int uevent_net_init(struct net *net)
net->uevent_sock = ue_sk;
- mutex_lock(&uevent_sock_mutex);
- list_add_tail(&ue_sk->list, &uevent_sock_list);
- mutex_unlock(&uevent_sock_mutex);
+ /* Restrict uevents to initial user namespace. */
+ if (sock_net(ue_sk->sk)->user_ns == &init_user_ns) {
+ mutex_lock(&uevent_sock_mutex);
+ list_add_tail(&ue_sk->list, &uevent_sock_list);
+ mutex_unlock(&uevent_sock_mutex);
+ }
+
return 0;
}
@@ -713,9 +785,11 @@ static void uevent_net_exit(struct net *net)
{
struct uevent_sock *ue_sk = net->uevent_sock;
- mutex_lock(&uevent_sock_mutex);
- list_del(&ue_sk->list);
- mutex_unlock(&uevent_sock_mutex);
+ if (sock_net(ue_sk->sk)->user_ns == &init_user_ns) {
+ mutex_lock(&uevent_sock_mutex);
+ list_del(&ue_sk->list);
+ mutex_unlock(&uevent_sock_mutex);
+ }
netlink_kernel_release(ue_sk->sk);
kfree(ue_sk);
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 2b2b79974b61..9427b5766134 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -668,8 +668,9 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
* For a completely stable walk you should construct your own data
* structure outside the hash table.
*
- * This function may sleep so you must not call it from interrupt
- * context or with spin locks held.
+ * This function may be called from any process context, including
+ * non-preemptable context, but cannot be called from softirq or
+ * hardirq context.
*
* You must call rhashtable_walk_exit after this function returns.
*/
@@ -726,6 +727,7 @@ int rhashtable_walk_start_check(struct rhashtable_iter *iter)
__acquires(RCU)
{
struct rhashtable *ht = iter->ht;
+ bool rhlist = ht->rhlist;
rcu_read_lock();
@@ -734,11 +736,52 @@ int rhashtable_walk_start_check(struct rhashtable_iter *iter)
list_del(&iter->walker.list);
spin_unlock(&ht->lock);
- if (!iter->walker.tbl && !iter->end_of_table) {
+ if (iter->end_of_table)
+ return 0;
+ if (!iter->walker.tbl) {
iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht);
+ iter->slot = 0;
+ iter->skip = 0;
return -EAGAIN;
}
+ if (iter->p && !rhlist) {
+ /*
+ * We need to validate that 'p' is still in the table, and
+ * if so, update 'skip'
+ */
+ struct rhash_head *p;
+ int skip = 0;
+ rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
+ skip++;
+ if (p == iter->p) {
+ iter->skip = skip;
+ goto found;
+ }
+ }
+ iter->p = NULL;
+ } else if (iter->p && rhlist) {
+ /* Need to validate that 'list' is still in the table, and
+ * if so, update 'skip' and 'p'.
+ */
+ struct rhash_head *p;
+ struct rhlist_head *list;
+ int skip = 0;
+ rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
+ for (list = container_of(p, struct rhlist_head, rhead);
+ list;
+ list = rcu_dereference(list->next)) {
+ skip++;
+ if (list == iter->list) {
+ iter->p = p;
+ skip = skip;
+ goto found;
+ }
+ }
+ }
+ iter->p = NULL;
+ }
+found:
return 0;
}
EXPORT_SYMBOL_GPL(rhashtable_walk_start_check);
@@ -914,8 +957,6 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter)
iter->walker.tbl = NULL;
spin_unlock(&ht->lock);
- iter->p = NULL;
-
out:
rcu_read_unlock();
}
diff --git a/net/Kconfig b/net/Kconfig
index 0428f12c25c2..b62089fb1332 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -407,6 +407,9 @@ config GRO_CELLS
bool
default n
+config SOCK_VALIDATE_XMIT
+ bool
+
config NET_DEVLINK
tristate "Network physical/parent device Netlink interface"
help
@@ -423,6 +426,9 @@ config MAY_USE_DEVLINK
on MAY_USE_DEVLINK to ensure they do not cause link errors when
devlink is a loadable module and the driver using it is built-in.
+config PAGE_POOL
+ bool
+
endif # if NET
# Used by archs to tell that they support BPF JIT compiler plus which flavour.
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 2ced48662c1f..68c3578343b4 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -170,7 +170,8 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
xdp.rxq = &rxqueue->xdp_rxq;
retval = bpf_test_run(prog, &xdp, repeat, &duration);
- if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN)
+ if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN ||
+ xdp.data_end != xdp.data + size)
size = xdp.data_end - xdp.data;
ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration);
kfree(data);
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 671d13c10f6f..b0a0b82e2d91 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -34,6 +34,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net_bridge_port *p;
struct net_bridge *br;
+ bool notified = false;
bool changed_addr;
int err;
@@ -67,7 +68,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
break;
case NETDEV_CHANGE:
- br_port_carrier_check(p);
+ br_port_carrier_check(p, &notified);
break;
case NETDEV_FEAT_CHANGE:
@@ -76,8 +77,10 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
case NETDEV_DOWN:
spin_lock_bh(&br->lock);
- if (br->dev->flags & IFF_UP)
+ if (br->dev->flags & IFF_UP) {
br_stp_disable_port(p);
+ notified = true;
+ }
spin_unlock_bh(&br->lock);
break;
@@ -85,6 +88,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
if (netif_running(br->dev) && netif_oper_up(dev)) {
spin_lock_bh(&br->lock);
br_stp_enable_port(p);
+ notified = true;
spin_unlock_bh(&br->lock);
}
break;
@@ -110,8 +114,8 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
}
/* Events that may cause spanning tree to refresh */
- if (event == NETDEV_CHANGEADDR || event == NETDEV_UP ||
- event == NETDEV_CHANGE || event == NETDEV_DOWN)
+ if (!notified && (event == NETDEV_CHANGEADDR || event == NETDEV_UP ||
+ event == NETDEV_CHANGE || event == NETDEV_DOWN))
br_ifinfo_notify(RTM_NEWLINK, NULL, p);
return NOTIFY_DONE;
@@ -141,7 +145,7 @@ static int br_switchdev_event(struct notifier_block *unused,
case SWITCHDEV_FDB_ADD_TO_BRIDGE:
fdb_info = ptr;
err = br_fdb_external_learn_add(br, p, fdb_info->addr,
- fdb_info->vid);
+ fdb_info->vid, false);
if (err) {
err = notifier_from_errno(err);
break;
@@ -152,7 +156,7 @@ static int br_switchdev_event(struct notifier_block *unused,
case SWITCHDEV_FDB_DEL_TO_BRIDGE:
fdb_info = ptr;
err = br_fdb_external_learn_del(br, p, fdb_info->addr,
- fdb_info->vid);
+ fdb_info->vid, false);
if (err)
err = notifier_from_errno(err);
break;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index d9e69e4514be..b19e3104afd6 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -40,7 +40,7 @@ static struct kmem_cache *br_fdb_cache __read_mostly;
static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr, u16 vid);
static void fdb_notify(struct net_bridge *br,
- const struct net_bridge_fdb_entry *, int);
+ const struct net_bridge_fdb_entry *, int, bool);
int __init br_fdb_init(void)
{
@@ -121,6 +121,28 @@ static struct net_bridge_fdb_entry *br_fdb_find(struct net_bridge *br,
return fdb;
}
+struct net_device *br_fdb_find_port(const struct net_device *br_dev,
+ const unsigned char *addr,
+ __u16 vid)
+{
+ struct net_bridge_fdb_entry *f;
+ struct net_device *dev = NULL;
+ struct net_bridge *br;
+
+ ASSERT_RTNL();
+
+ if (!netif_is_bridge_master(br_dev))
+ return NULL;
+
+ br = netdev_priv(br_dev);
+ f = br_fdb_find(br, addr, vid);
+ if (f && f->dst)
+ dev = f->dst->dev;
+
+ return dev;
+}
+EXPORT_SYMBOL_GPL(br_fdb_find_port);
+
struct net_bridge_fdb_entry *br_fdb_find_rcu(struct net_bridge *br,
const unsigned char *addr,
__u16 vid)
@@ -173,7 +195,8 @@ static void fdb_del_hw_addr(struct net_bridge *br, const unsigned char *addr)
}
}
-static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
+static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f,
+ bool swdev_notify)
{
trace_fdb_delete(br, f);
@@ -183,7 +206,7 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
hlist_del_init_rcu(&f->fdb_node);
rhashtable_remove_fast(&br->fdb_hash_tbl, &f->rhnode,
br_fdb_rht_params);
- fdb_notify(br, f, RTM_DELNEIGH);
+ fdb_notify(br, f, RTM_DELNEIGH, swdev_notify);
call_rcu(&f->rcu, fdb_rcu_free);
}
@@ -219,7 +242,7 @@ static void fdb_delete_local(struct net_bridge *br,
return;
}
- fdb_delete(br, f);
+ fdb_delete(br, f, true);
}
void br_fdb_find_delete_local(struct net_bridge *br,
@@ -334,7 +357,7 @@ void br_fdb_cleanup(struct work_struct *work)
} else {
spin_lock_bh(&br->hash_lock);
if (!hlist_unhashed(&f->fdb_node))
- fdb_delete(br, f);
+ fdb_delete(br, f, true);
spin_unlock_bh(&br->hash_lock);
}
}
@@ -354,7 +377,7 @@ void br_fdb_flush(struct net_bridge *br)
spin_lock_bh(&br->hash_lock);
hlist_for_each_entry_safe(f, tmp, &br->fdb_list, fdb_node) {
if (!f->is_static)
- fdb_delete(br, f);
+ fdb_delete(br, f, true);
}
spin_unlock_bh(&br->hash_lock);
}
@@ -383,7 +406,7 @@ void br_fdb_delete_by_port(struct net_bridge *br,
if (f->is_local)
fdb_delete_local(br, p, f);
else
- fdb_delete(br, f);
+ fdb_delete(br, f, true);
}
spin_unlock_bh(&br->hash_lock);
}
@@ -509,7 +532,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
return 0;
br_warn(br, "adding interface %s with same address as a received packet (addr:%pM, vlan:%u)\n",
source ? source->dev->name : br->dev->name, addr, vid);
- fdb_delete(br, fdb);
+ fdb_delete(br, fdb, true);
}
fdb = fdb_create(br, source, addr, vid, 1, 1);
@@ -517,7 +540,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
return -ENOMEM;
fdb_add_hw_addr(br, addr);
- fdb_notify(br, fdb, RTM_NEWNEIGH);
+ fdb_notify(br, fdb, RTM_NEWNEIGH, true);
return 0;
}
@@ -572,7 +595,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
fdb->added_by_user = 1;
if (unlikely(fdb_modified)) {
trace_br_fdb_update(br, source, addr, vid, added_by_user);
- fdb_notify(br, fdb, RTM_NEWNEIGH);
+ fdb_notify(br, fdb, RTM_NEWNEIGH, true);
}
}
} else {
@@ -583,7 +606,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
fdb->added_by_user = 1;
trace_br_fdb_update(br, source, addr, vid,
added_by_user);
- fdb_notify(br, fdb, RTM_NEWNEIGH);
+ fdb_notify(br, fdb, RTM_NEWNEIGH, true);
}
/* else we lose race and someone else inserts
* it first, don't bother updating
@@ -665,13 +688,15 @@ static inline size_t fdb_nlmsg_size(void)
}
static void fdb_notify(struct net_bridge *br,
- const struct net_bridge_fdb_entry *fdb, int type)
+ const struct net_bridge_fdb_entry *fdb, int type,
+ bool swdev_notify)
{
struct net *net = dev_net(br->dev);
struct sk_buff *skb;
int err = -ENOBUFS;
- br_switchdev_fdb_notify(fdb, type);
+ if (swdev_notify)
+ br_switchdev_fdb_notify(fdb, type);
skb = nlmsg_new(fdb_nlmsg_size(), GFP_ATOMIC);
if (skb == NULL)
@@ -810,7 +835,7 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
fdb->used = jiffies;
if (modified) {
fdb->updated = jiffies;
- fdb_notify(br, fdb, RTM_NEWNEIGH);
+ fdb_notify(br, fdb, RTM_NEWNEIGH, true);
}
return 0;
@@ -834,7 +859,7 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
rcu_read_unlock();
local_bh_enable();
} else if (ndm->ndm_flags & NTF_EXT_LEARNED) {
- err = br_fdb_external_learn_add(br, p, addr, vid);
+ err = br_fdb_external_learn_add(br, p, addr, vid, true);
} else {
spin_lock_bh(&br->hash_lock);
err = fdb_add_entry(br, p, addr, ndm->ndm_state,
@@ -923,7 +948,7 @@ static int fdb_delete_by_addr_and_port(struct net_bridge *br,
if (!fdb || fdb->dst != p)
return -ENOENT;
- fdb_delete(br, fdb);
+ fdb_delete(br, fdb, true);
return 0;
}
@@ -1043,7 +1068,8 @@ void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p)
}
int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
- const unsigned char *addr, u16 vid)
+ const unsigned char *addr, u16 vid,
+ bool swdev_notify)
{
struct net_bridge_fdb_entry *fdb;
bool modified = false;
@@ -1061,7 +1087,7 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
goto err_unlock;
}
fdb->added_by_external_learn = 1;
- fdb_notify(br, fdb, RTM_NEWNEIGH);
+ fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify);
} else {
fdb->updated = jiffies;
@@ -1080,7 +1106,7 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
}
if (modified)
- fdb_notify(br, fdb, RTM_NEWNEIGH);
+ fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify);
}
err_unlock:
@@ -1090,7 +1116,8 @@ err_unlock:
}
int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
- const unsigned char *addr, u16 vid)
+ const unsigned char *addr, u16 vid,
+ bool swdev_notify)
{
struct net_bridge_fdb_entry *fdb;
int err = 0;
@@ -1099,7 +1126,7 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
fdb = br_fdb_find(br, addr, vid);
if (fdb && fdb->added_by_external_learn)
- fdb_delete(br, fdb);
+ fdb_delete(br, fdb, swdev_notify);
else
err = -ENOENT;
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index b4eed113d2ec..7a7fd672ccf2 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -274,8 +274,7 @@ void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
struct net_bridge_port *port, *lport, *rport;
lport = p ? p->port : NULL;
- rport = rp ? hlist_entry(rp, struct net_bridge_port, rlist) :
- NULL;
+ rport = hlist_entry_safe(rp, struct net_bridge_port, rlist);
if ((unsigned long)lport > (unsigned long)rport) {
port = lport;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 5bb6681fa91e..05e42d86882d 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -64,7 +64,7 @@ static int port_cost(struct net_device *dev)
/* Check for port carrier transitions. */
-void br_port_carrier_check(struct net_bridge_port *p)
+void br_port_carrier_check(struct net_bridge_port *p, bool *notified)
{
struct net_device *dev = p->dev;
struct net_bridge *br = p->br;
@@ -73,16 +73,21 @@ void br_port_carrier_check(struct net_bridge_port *p)
netif_running(dev) && netif_oper_up(dev))
p->path_cost = port_cost(dev);
+ *notified = false;
if (!netif_running(br->dev))
return;
spin_lock_bh(&br->lock);
if (netif_running(dev) && netif_oper_up(dev)) {
- if (p->state == BR_STATE_DISABLED)
+ if (p->state == BR_STATE_DISABLED) {
br_stp_enable_port(p);
+ *notified = true;
+ }
} else {
- if (p->state != BR_STATE_DISABLED)
+ if (p->state != BR_STATE_DISABLED) {
br_stp_disable_port(p);
+ *notified = true;
+ }
}
spin_unlock_bh(&br->lock);
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index a7cb3ece5031..742f40aefdaf 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -553,9 +553,11 @@ int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p);
void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p);
int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
- const unsigned char *addr, u16 vid);
+ const unsigned char *addr, u16 vid,
+ bool swdev_notify);
int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
- const unsigned char *addr, u16 vid);
+ const unsigned char *addr, u16 vid,
+ bool swdev_notify);
void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
const unsigned char *addr, u16 vid);
@@ -573,7 +575,7 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb,
enum br_pkt_type pkt_type, bool local_rcv, bool local_orig);
/* br_if.c */
-void br_port_carrier_check(struct net_bridge_port *p);
+void br_port_carrier_check(struct net_bridge_port *p, bool *notified);
int br_add_bridge(struct net *net, const char *name);
int br_del_bridge(struct net *net, const char *name);
int br_add_if(struct net_bridge *br, struct net_device *dev,
@@ -594,11 +596,22 @@ static inline bool br_rx_handler_check_rcu(const struct net_device *dev)
return rcu_dereference(dev->rx_handler) == br_handle_frame;
}
+static inline bool br_rx_handler_check_rtnl(const struct net_device *dev)
+{
+ return rcu_dereference_rtnl(dev->rx_handler) == br_handle_frame;
+}
+
static inline struct net_bridge_port *br_port_get_check_rcu(const struct net_device *dev)
{
return br_rx_handler_check_rcu(dev) ? br_port_get_rcu(dev) : NULL;
}
+static inline struct net_bridge_port *
+br_port_get_check_rtnl(const struct net_device *dev)
+{
+ return br_rx_handler_check_rtnl(dev) ? br_port_get_rtnl_rcu(dev) : NULL;
+}
+
/* br_ioctl.c */
int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd,
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index ee775f4ff76c..35474d49555d 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -102,13 +102,15 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
static void
br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac,
- u16 vid, struct net_device *dev)
+ u16 vid, struct net_device *dev,
+ bool added_by_user)
{
struct switchdev_notifier_fdb_info info;
unsigned long notifier_type;
info.addr = mac;
info.vid = vid;
+ info.added_by_user = added_by_user;
notifier_type = adding ? SWITCHDEV_FDB_ADD_TO_DEVICE : SWITCHDEV_FDB_DEL_TO_DEVICE;
call_switchdev_notifiers(notifier_type, dev, &info.info);
}
@@ -116,19 +118,21 @@ br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac,
void
br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
{
- if (!fdb->added_by_user || !fdb->dst)
+ if (!fdb->dst)
return;
switch (type) {
case RTM_DELNEIGH:
br_switchdev_fdb_call_notifiers(false, fdb->key.addr.addr,
fdb->key.vlan_id,
- fdb->dst->dev);
+ fdb->dst->dev,
+ fdb->added_by_user);
break;
case RTM_NEWNEIGH:
br_switchdev_fdb_call_notifiers(true, fdb->key.addr.addr,
fdb->key.vlan_id,
- fdb->dst->dev);
+ fdb->dst->dev,
+ fdb->added_by_user);
break;
}
}
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 9896f4975353..df37a5137c25 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -1149,3 +1149,42 @@ void br_vlan_get_stats(const struct net_bridge_vlan *v,
stats->tx_packets += txpackets;
}
}
+
+int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid)
+{
+ struct net_bridge_vlan_group *vg;
+
+ ASSERT_RTNL();
+ if (netif_is_bridge_master(dev))
+ vg = br_vlan_group(netdev_priv(dev));
+ else
+ return -EINVAL;
+
+ *p_pvid = br_get_pvid(vg);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(br_vlan_get_pvid);
+
+int br_vlan_get_info(const struct net_device *dev, u16 vid,
+ struct bridge_vlan_info *p_vinfo)
+{
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_vlan *v;
+ struct net_bridge_port *p;
+
+ ASSERT_RTNL();
+ p = br_port_get_check_rtnl(dev);
+ if (p)
+ vg = nbp_vlan_group(p);
+ else
+ return -EINVAL;
+
+ v = br_vlan_find(vg, vid);
+ if (!v)
+ return -ENOENT;
+
+ p_vinfo->vid = vid;
+ p_vinfo->flags = v->flags;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(br_vlan_get_info);
diff --git a/net/core/Makefile b/net/core/Makefile
index 6dbbba8c57ae..7080417f8bc8 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -14,6 +14,7 @@ obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
fib_notifier.o xdp.o
obj-y += net-sysfs.o
+obj-$(CONFIG_PAGE_POOL) += page_pool.o
obj-$(CONFIG_PROC_FS) += net-procfs.o
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
obj-$(CONFIG_NETPOLL) += netpoll.o
diff --git a/net/core/dev.c b/net/core/dev.c
index af0558b00c6c..bb81a6e1d354 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1285,6 +1285,7 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
return len;
}
+EXPORT_SYMBOL(dev_set_alias);
/**
* dev_get_alias - get ifalias of a device
@@ -1586,7 +1587,7 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd)
N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO)
N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
- };
+ }
#undef N
return "UNKNOWN_NETDEV_EVENT";
}
@@ -2614,17 +2615,16 @@ EXPORT_SYMBOL(netif_device_attach);
* Returns a Tx hash based on the given packet descriptor a Tx queues' number
* to be used as a distribution range.
*/
-u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
- unsigned int num_tx_queues)
+static u16 skb_tx_hash(const struct net_device *dev, struct sk_buff *skb)
{
u32 hash;
u16 qoffset = 0;
- u16 qcount = num_tx_queues;
+ u16 qcount = dev->real_num_tx_queues;
if (skb_rx_queue_recorded(skb)) {
hash = skb_get_rx_queue(skb);
- while (unlikely(hash >= num_tx_queues))
- hash -= num_tx_queues;
+ while (unlikely(hash >= qcount))
+ hash -= qcount;
return hash;
}
@@ -2637,7 +2637,6 @@ u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
}
-EXPORT_SYMBOL(__skb_tx_hash);
static void skb_warn_bad_offload(const struct sk_buff *skb)
{
@@ -3113,6 +3112,10 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
if (unlikely(!skb))
goto out_null;
+ skb = sk_validate_xmit_skb(skb, dev);
+ if (unlikely(!skb))
+ goto out_null;
+
if (netif_needs_gso(skb, features)) {
struct sk_buff *segs;
@@ -3996,9 +3999,9 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
struct bpf_prog *xdp_prog)
{
struct netdev_rx_queue *rxqueue;
+ void *orig_data, *orig_data_end;
u32 metalen, act = XDP_DROP;
struct xdp_buff xdp;
- void *orig_data;
int hlen, off;
u32 mac_len;
@@ -4037,6 +4040,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
xdp.data_meta = xdp.data;
xdp.data_end = xdp.data + hlen;
xdp.data_hard_start = skb->data - skb_headroom(skb);
+ orig_data_end = xdp.data_end;
orig_data = xdp.data;
rxqueue = netif_get_rxqueue(skb);
@@ -4051,6 +4055,15 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
__skb_push(skb, -off);
skb->mac_header += off;
+ /* check if bpf_xdp_adjust_tail was used. it can only "shrink"
+ * pckt.
+ */
+ off = orig_data_end - xdp.data_end;
+ if (off != 0) {
+ skb_set_tail_pointer(skb, xdp.data_end - xdp.data);
+ skb->len -= off;
+ }
+
switch (act) {
case XDP_REDIRECT:
case XDP_TX:
@@ -7870,6 +7883,8 @@ int register_netdevice(struct net_device *dev)
int ret;
struct net *net = dev_net(dev);
+ BUILD_BUG_ON(sizeof(netdev_features_t) * BITS_PER_BYTE <
+ NETDEV_FEATURE_COUNT);
BUG_ON(dev_boot_phase);
ASSERT_RTNL();
diff --git a/net/core/dst.c b/net/core/dst.c
index 007aa0b08291..2d9b37f8944a 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -58,6 +58,7 @@ const struct dst_metrics dst_default_metrics = {
*/
.refcnt = REFCOUNT_INIT(1),
};
+EXPORT_SYMBOL(dst_default_metrics);
void dst_init(struct dst_entry *dst, struct dst_ops *ops,
struct net_device *dev, int initial_ref, int initial_obsolete,
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index ba02f0dfe85c..c15075dc7572 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -92,6 +92,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial",
[NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation",
[NETIF_F_GSO_ESP_BIT] = "tx-esp-segmentation",
+ [NETIF_F_GSO_UDP_L4_BIT] = "tx-udp-segmentation",
[NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
[NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp",
@@ -109,6 +110,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_HW_ESP_TX_CSUM_BIT] = "esp-tx-csum-hw-offload",
[NETIF_F_RX_UDP_TUNNEL_PORT_BIT] = "rx-udp_tunnel-port-offload",
[NETIF_F_HW_TLS_RECORD_BIT] = "tls-hw-record",
+ [NETIF_F_HW_TLS_TX_BIT] = "tls-hw-tx-offload",
};
static const char
@@ -210,23 +212,6 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr)
return ret;
}
-static int phy_get_sset_count(struct phy_device *phydev)
-{
- int ret;
-
- if (phydev->drv->get_sset_count &&
- phydev->drv->get_strings &&
- phydev->drv->get_stats) {
- mutex_lock(&phydev->lock);
- ret = phydev->drv->get_sset_count(phydev);
- mutex_unlock(&phydev->lock);
-
- return ret;
- }
-
- return -EOPNOTSUPP;
-}
-
static int __ethtool_get_sset_count(struct net_device *dev, int sset)
{
const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -243,12 +228,9 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset)
if (sset == ETH_SS_PHY_TUNABLES)
return ARRAY_SIZE(phy_tunable_strings);
- if (sset == ETH_SS_PHY_STATS) {
- if (dev->phydev)
- return phy_get_sset_count(dev->phydev);
- else
- return -EOPNOTSUPP;
- }
+ if (sset == ETH_SS_PHY_STATS && dev->phydev &&
+ !ops->get_ethtool_phy_stats)
+ return phy_ethtool_get_sset_count(dev->phydev);
if (ops->get_sset_count && ops->get_strings)
return ops->get_sset_count(dev, sset);
@@ -271,17 +253,10 @@ static void __ethtool_get_strings(struct net_device *dev,
memcpy(data, tunable_strings, sizeof(tunable_strings));
else if (stringset == ETH_SS_PHY_TUNABLES)
memcpy(data, phy_tunable_strings, sizeof(phy_tunable_strings));
- else if (stringset == ETH_SS_PHY_STATS) {
- struct phy_device *phydev = dev->phydev;
-
- if (phydev) {
- mutex_lock(&phydev->lock);
- phydev->drv->get_strings(phydev, data);
- mutex_unlock(&phydev->lock);
- } else {
- return;
- }
- } else
+ else if (stringset == ETH_SS_PHY_STATS && dev->phydev &&
+ !ops->get_ethtool_phy_stats)
+ phy_ethtool_get_strings(dev->phydev, data);
+ else
/* ops->get_strings is valid because checked earlier */
ops->get_strings(dev, stringset, data);
}
@@ -1998,15 +1973,19 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr)
{
- struct ethtool_stats stats;
+ const struct ethtool_ops *ops = dev->ethtool_ops;
struct phy_device *phydev = dev->phydev;
+ struct ethtool_stats stats;
u64 *data;
int ret, n_stats;
- if (!phydev)
+ if (!phydev && (!ops->get_ethtool_phy_stats || !ops->get_sset_count))
return -EOPNOTSUPP;
- n_stats = phy_get_sset_count(phydev);
+ if (dev->phydev && !ops->get_ethtool_phy_stats)
+ n_stats = phy_ethtool_get_sset_count(dev->phydev);
+ else
+ n_stats = ops->get_sset_count(dev, ETH_SS_PHY_STATS);
if (n_stats < 0)
return n_stats;
if (n_stats > S32_MAX / sizeof(u64))
@@ -2021,9 +2000,13 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr)
if (n_stats && !data)
return -ENOMEM;
- mutex_lock(&phydev->lock);
- phydev->drv->get_stats(phydev, &stats, data);
- mutex_unlock(&phydev->lock);
+ if (dev->phydev && !ops->get_ethtool_phy_stats) {
+ ret = phy_ethtool_get_stats(dev->phydev, &stats, data);
+ if (ret < 0)
+ return ret;
+ } else {
+ ops->get_ethtool_phy_stats(dev, &stats, data);
+ }
ret = -EFAULT;
if (copy_to_user(useraddr, &stats, sizeof(stats)))
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 33958f84c173..126ffc5bc630 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -387,247 +387,304 @@ unsigned int fib_rules_seq_read(struct net *net, int family)
}
EXPORT_SYMBOL_GPL(fib_rules_seq_read);
-static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb,
- struct fib_rules_ops *ops)
-{
- int err = -EINVAL;
-
- if (frh->src_len)
- if (tb[FRA_SRC] == NULL ||
- frh->src_len > (ops->addr_size * 8) ||
- nla_len(tb[FRA_SRC]) != ops->addr_size)
- goto errout;
-
- if (frh->dst_len)
- if (tb[FRA_DST] == NULL ||
- frh->dst_len > (ops->addr_size * 8) ||
- nla_len(tb[FRA_DST]) != ops->addr_size)
- goto errout;
-
- err = 0;
-errout:
- return err;
-}
-
-static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
- struct nlattr **tb, struct fib_rule *rule)
+static struct fib_rule *rule_find(struct fib_rules_ops *ops,
+ struct fib_rule_hdr *frh,
+ struct nlattr **tb,
+ struct fib_rule *rule,
+ bool user_priority)
{
struct fib_rule *r;
list_for_each_entry(r, &ops->rules_list, list) {
- if (r->action != rule->action)
+ if (rule->action && r->action != rule->action)
continue;
- if (r->table != rule->table)
+ if (rule->table && r->table != rule->table)
continue;
- if (r->pref != rule->pref)
+ if (user_priority && r->pref != rule->pref)
continue;
- if (memcmp(r->iifname, rule->iifname, IFNAMSIZ))
+ if (rule->iifname[0] &&
+ memcmp(r->iifname, rule->iifname, IFNAMSIZ))
continue;
- if (memcmp(r->oifname, rule->oifname, IFNAMSIZ))
+ if (rule->oifname[0] &&
+ memcmp(r->oifname, rule->oifname, IFNAMSIZ))
continue;
- if (r->mark != rule->mark)
+ if (rule->mark && r->mark != rule->mark)
continue;
- if (r->mark_mask != rule->mark_mask)
+ if (rule->mark_mask && r->mark_mask != rule->mark_mask)
continue;
- if (r->tun_id != rule->tun_id)
+ if (rule->tun_id && r->tun_id != rule->tun_id)
continue;
if (r->fr_net != rule->fr_net)
continue;
- if (r->l3mdev != rule->l3mdev)
+ if (rule->l3mdev && r->l3mdev != rule->l3mdev)
continue;
- if (!uid_eq(r->uid_range.start, rule->uid_range.start) ||
- !uid_eq(r->uid_range.end, rule->uid_range.end))
+ if (uid_range_set(&rule->uid_range) &&
+ (!uid_eq(r->uid_range.start, rule->uid_range.start) ||
+ !uid_eq(r->uid_range.end, rule->uid_range.end)))
continue;
- if (r->ip_proto != rule->ip_proto)
+ if (rule->ip_proto && r->ip_proto != rule->ip_proto)
continue;
- if (!fib_rule_port_range_compare(&r->sport_range,
+ if (fib_rule_port_range_set(&rule->sport_range) &&
+ !fib_rule_port_range_compare(&r->sport_range,
&rule->sport_range))
continue;
- if (!fib_rule_port_range_compare(&r->dport_range,
+ if (fib_rule_port_range_set(&rule->dport_range) &&
+ !fib_rule_port_range_compare(&r->dport_range,
&rule->dport_range))
continue;
if (!ops->compare(r, frh, tb))
continue;
- return 1;
+ return r;
+ }
+
+ return NULL;
+}
+
+#ifdef CONFIG_NET_L3_MASTER_DEV
+static int fib_nl2rule_l3mdev(struct nlattr *nla, struct fib_rule *nlrule,
+ struct netlink_ext_ack *extack)
+{
+ nlrule->l3mdev = nla_get_u8(nla);
+ if (nlrule->l3mdev != 1) {
+ NL_SET_ERR_MSG(extack, "Invalid l3mdev attribute");
+ return -1;
}
+
return 0;
}
+#else
+static int fib_nl2rule_l3mdev(struct nlattr *nla, struct fib_rule *nlrule,
+ struct netlink_ext_ack *extack)
+{
+ NL_SET_ERR_MSG(extack, "l3mdev support is not enabled in kernel");
+ return -1;
+}
+#endif
-int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
+static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack,
+ struct fib_rules_ops *ops,
+ struct nlattr *tb[],
+ struct fib_rule **rule,
+ bool *user_priority)
{
struct net *net = sock_net(skb->sk);
struct fib_rule_hdr *frh = nlmsg_data(nlh);
- struct fib_rules_ops *ops = NULL;
- struct fib_rule *rule, *r, *last = NULL;
- struct nlattr *tb[FRA_MAX+1];
- int err = -EINVAL, unresolved = 0;
-
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
- goto errout;
+ struct fib_rule *nlrule = NULL;
+ int err = -EINVAL;
- ops = lookup_rules_ops(net, frh->family);
- if (ops == NULL) {
- err = -EAFNOSUPPORT;
- goto errout;
+ if (frh->src_len)
+ if (!tb[FRA_SRC] ||
+ frh->src_len > (ops->addr_size * 8) ||
+ nla_len(tb[FRA_SRC]) != ops->addr_size) {
+ NL_SET_ERR_MSG(extack, "Invalid source address");
+ goto errout;
}
- err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack);
- if (err < 0)
- goto errout;
-
- err = validate_rulemsg(frh, tb, ops);
- if (err < 0)
- goto errout;
+ if (frh->dst_len)
+ if (!tb[FRA_DST] ||
+ frh->dst_len > (ops->addr_size * 8) ||
+ nla_len(tb[FRA_DST]) != ops->addr_size) {
+ NL_SET_ERR_MSG(extack, "Invalid dst address");
+ goto errout;
+ }
- rule = kzalloc(ops->rule_size, GFP_KERNEL);
- if (rule == NULL) {
+ nlrule = kzalloc(ops->rule_size, GFP_KERNEL);
+ if (!nlrule) {
err = -ENOMEM;
goto errout;
}
- refcount_set(&rule->refcnt, 1);
- rule->fr_net = net;
+ refcount_set(&nlrule->refcnt, 1);
+ nlrule->fr_net = net;
- rule->pref = tb[FRA_PRIORITY] ? nla_get_u32(tb[FRA_PRIORITY])
- : fib_default_rule_pref(ops);
+ if (tb[FRA_PRIORITY]) {
+ nlrule->pref = nla_get_u32(tb[FRA_PRIORITY]);
+ *user_priority = true;
+ } else {
+ nlrule->pref = fib_default_rule_pref(ops);
+ }
- rule->proto = tb[FRA_PROTOCOL] ?
+ nlrule->proto = tb[FRA_PROTOCOL] ?
nla_get_u8(tb[FRA_PROTOCOL]) : RTPROT_UNSPEC;
if (tb[FRA_IIFNAME]) {
struct net_device *dev;
- rule->iifindex = -1;
- nla_strlcpy(rule->iifname, tb[FRA_IIFNAME], IFNAMSIZ);
- dev = __dev_get_by_name(net, rule->iifname);
+ nlrule->iifindex = -1;
+ nla_strlcpy(nlrule->iifname, tb[FRA_IIFNAME], IFNAMSIZ);
+ dev = __dev_get_by_name(net, nlrule->iifname);
if (dev)
- rule->iifindex = dev->ifindex;
+ nlrule->iifindex = dev->ifindex;
}
if (tb[FRA_OIFNAME]) {
struct net_device *dev;
- rule->oifindex = -1;
- nla_strlcpy(rule->oifname, tb[FRA_OIFNAME], IFNAMSIZ);
- dev = __dev_get_by_name(net, rule->oifname);
+ nlrule->oifindex = -1;
+ nla_strlcpy(nlrule->oifname, tb[FRA_OIFNAME], IFNAMSIZ);
+ dev = __dev_get_by_name(net, nlrule->oifname);
if (dev)
- rule->oifindex = dev->ifindex;
+ nlrule->oifindex = dev->ifindex;
}
if (tb[FRA_FWMARK]) {
- rule->mark = nla_get_u32(tb[FRA_FWMARK]);
- if (rule->mark)
+ nlrule->mark = nla_get_u32(tb[FRA_FWMARK]);
+ if (nlrule->mark)
/* compatibility: if the mark value is non-zero all bits
* are compared unless a mask is explicitly specified.
*/
- rule->mark_mask = 0xFFFFFFFF;
+ nlrule->mark_mask = 0xFFFFFFFF;
}
if (tb[FRA_FWMASK])
- rule->mark_mask = nla_get_u32(tb[FRA_FWMASK]);
+ nlrule->mark_mask = nla_get_u32(tb[FRA_FWMASK]);
if (tb[FRA_TUN_ID])
- rule->tun_id = nla_get_be64(tb[FRA_TUN_ID]);
+ nlrule->tun_id = nla_get_be64(tb[FRA_TUN_ID]);
err = -EINVAL;
- if (tb[FRA_L3MDEV]) {
-#ifdef CONFIG_NET_L3_MASTER_DEV
- rule->l3mdev = nla_get_u8(tb[FRA_L3MDEV]);
- if (rule->l3mdev != 1)
-#endif
- goto errout_free;
- }
+ if (tb[FRA_L3MDEV] &&
+ fib_nl2rule_l3mdev(tb[FRA_L3MDEV], nlrule, extack) < 0)
+ goto errout_free;
- rule->action = frh->action;
- rule->flags = frh->flags;
- rule->table = frh_get_table(frh, tb);
+ nlrule->action = frh->action;
+ nlrule->flags = frh->flags;
+ nlrule->table = frh_get_table(frh, tb);
if (tb[FRA_SUPPRESS_PREFIXLEN])
- rule->suppress_prefixlen = nla_get_u32(tb[FRA_SUPPRESS_PREFIXLEN]);
+ nlrule->suppress_prefixlen = nla_get_u32(tb[FRA_SUPPRESS_PREFIXLEN]);
else
- rule->suppress_prefixlen = -1;
+ nlrule->suppress_prefixlen = -1;
if (tb[FRA_SUPPRESS_IFGROUP])
- rule->suppress_ifgroup = nla_get_u32(tb[FRA_SUPPRESS_IFGROUP]);
+ nlrule->suppress_ifgroup = nla_get_u32(tb[FRA_SUPPRESS_IFGROUP]);
else
- rule->suppress_ifgroup = -1;
+ nlrule->suppress_ifgroup = -1;
if (tb[FRA_GOTO]) {
- if (rule->action != FR_ACT_GOTO)
+ if (nlrule->action != FR_ACT_GOTO) {
+ NL_SET_ERR_MSG(extack, "Unexpected goto");
goto errout_free;
+ }
- rule->target = nla_get_u32(tb[FRA_GOTO]);
+ nlrule->target = nla_get_u32(tb[FRA_GOTO]);
/* Backward jumps are prohibited to avoid endless loops */
- if (rule->target <= rule->pref)
+ if (nlrule->target <= nlrule->pref) {
+ NL_SET_ERR_MSG(extack, "Backward goto not supported");
goto errout_free;
-
- list_for_each_entry(r, &ops->rules_list, list) {
- if (r->pref == rule->target) {
- RCU_INIT_POINTER(rule->ctarget, r);
- break;
- }
}
-
- if (rcu_dereference_protected(rule->ctarget, 1) == NULL)
- unresolved = 1;
- } else if (rule->action == FR_ACT_GOTO)
+ } else if (nlrule->action == FR_ACT_GOTO) {
+ NL_SET_ERR_MSG(extack, "Missing goto target for action goto");
goto errout_free;
+ }
- if (rule->l3mdev && rule->table)
+ if (nlrule->l3mdev && nlrule->table) {
+ NL_SET_ERR_MSG(extack, "l3mdev and table are mutually exclusive");
goto errout_free;
+ }
if (tb[FRA_UID_RANGE]) {
if (current_user_ns() != net->user_ns) {
err = -EPERM;
+ NL_SET_ERR_MSG(extack, "No permission to set uid");
goto errout_free;
}
- rule->uid_range = nla_get_kuid_range(tb);
+ nlrule->uid_range = nla_get_kuid_range(tb);
- if (!uid_range_set(&rule->uid_range) ||
- !uid_lte(rule->uid_range.start, rule->uid_range.end))
+ if (!uid_range_set(&nlrule->uid_range) ||
+ !uid_lte(nlrule->uid_range.start, nlrule->uid_range.end)) {
+ NL_SET_ERR_MSG(extack, "Invalid uid range");
goto errout_free;
+ }
} else {
- rule->uid_range = fib_kuid_range_unset;
+ nlrule->uid_range = fib_kuid_range_unset;
}
if (tb[FRA_IP_PROTO])
- rule->ip_proto = nla_get_u8(tb[FRA_IP_PROTO]);
+ nlrule->ip_proto = nla_get_u8(tb[FRA_IP_PROTO]);
if (tb[FRA_SPORT_RANGE]) {
err = nla_get_port_range(tb[FRA_SPORT_RANGE],
- &rule->sport_range);
- if (err)
+ &nlrule->sport_range);
+ if (err) {
+ NL_SET_ERR_MSG(extack, "Invalid sport range");
goto errout_free;
+ }
}
if (tb[FRA_DPORT_RANGE]) {
err = nla_get_port_range(tb[FRA_DPORT_RANGE],
- &rule->dport_range);
- if (err)
+ &nlrule->dport_range);
+ if (err) {
+ NL_SET_ERR_MSG(extack, "Invalid dport range");
goto errout_free;
+ }
}
+ *rule = nlrule;
+
+ return 0;
+
+errout_free:
+ kfree(nlrule);
+errout:
+ return err;
+}
+
+int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct fib_rule_hdr *frh = nlmsg_data(nlh);
+ struct fib_rules_ops *ops = NULL;
+ struct fib_rule *rule = NULL, *r, *last = NULL;
+ struct nlattr *tb[FRA_MAX + 1];
+ int err = -EINVAL, unresolved = 0;
+ bool user_priority = false;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) {
+ NL_SET_ERR_MSG(extack, "Invalid msg length");
+ goto errout;
+ }
+
+ ops = lookup_rules_ops(net, frh->family);
+ if (!ops) {
+ err = -EAFNOSUPPORT;
+ NL_SET_ERR_MSG(extack, "Rule family not supported");
+ goto errout;
+ }
+
+ err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack);
+ if (err < 0) {
+ NL_SET_ERR_MSG(extack, "Error parsing msg");
+ goto errout;
+ }
+
+ err = fib_nl2rule(skb, nlh, extack, ops, tb, &rule, &user_priority);
+ if (err)
+ goto errout;
+
if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
- rule_exists(ops, frh, tb, rule)) {
+ rule_find(ops, frh, tb, rule, user_priority)) {
err = -EEXIST;
goto errout_free;
}
- err = ops->configure(rule, skb, frh, tb);
+ err = ops->configure(rule, skb, frh, tb, extack);
if (err < 0)
goto errout_free;
@@ -637,6 +694,16 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
goto errout_free;
list_for_each_entry(r, &ops->rules_list, list) {
+ if (r->pref == rule->target) {
+ RCU_INIT_POINTER(rule->ctarget, r);
+ break;
+ }
+ }
+
+ if (rcu_dereference_protected(rule->ctarget, 1) == NULL)
+ unresolved = 1;
+
+ list_for_each_entry(r, &ops->rules_list, list) {
if (r->pref > rule->pref)
break;
last = r;
@@ -690,171 +757,97 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
{
struct net *net = sock_net(skb->sk);
struct fib_rule_hdr *frh = nlmsg_data(nlh);
- struct fib_rule_port_range sprange = {0, 0};
- struct fib_rule_port_range dprange = {0, 0};
struct fib_rules_ops *ops = NULL;
- struct fib_rule *rule, *r;
+ struct fib_rule *rule = NULL, *r, *nlrule = NULL;
struct nlattr *tb[FRA_MAX+1];
- struct fib_kuid_range range;
int err = -EINVAL;
+ bool user_priority = false;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) {
+ NL_SET_ERR_MSG(extack, "Invalid msg length");
goto errout;
+ }
ops = lookup_rules_ops(net, frh->family);
if (ops == NULL) {
err = -EAFNOSUPPORT;
+ NL_SET_ERR_MSG(extack, "Rule family not supported");
goto errout;
}
err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack);
- if (err < 0)
+ if (err < 0) {
+ NL_SET_ERR_MSG(extack, "Error parsing msg");
goto errout;
+ }
- err = validate_rulemsg(frh, tb, ops);
- if (err < 0)
+ err = fib_nl2rule(skb, nlh, extack, ops, tb, &nlrule, &user_priority);
+ if (err)
goto errout;
- if (tb[FRA_UID_RANGE]) {
- range = nla_get_kuid_range(tb);
- if (!uid_range_set(&range)) {
- err = -EINVAL;
- goto errout;
- }
- } else {
- range = fib_kuid_range_unset;
+ rule = rule_find(ops, frh, tb, nlrule, user_priority);
+ if (!rule) {
+ err = -ENOENT;
+ goto errout;
}
- if (tb[FRA_SPORT_RANGE]) {
- err = nla_get_port_range(tb[FRA_SPORT_RANGE],
- &sprange);
- if (err)
- goto errout;
+ if (rule->flags & FIB_RULE_PERMANENT) {
+ err = -EPERM;
+ goto errout;
}
- if (tb[FRA_DPORT_RANGE]) {
- err = nla_get_port_range(tb[FRA_DPORT_RANGE],
- &dprange);
+ if (ops->delete) {
+ err = ops->delete(rule);
if (err)
goto errout;
}
- list_for_each_entry(rule, &ops->rules_list, list) {
- if (tb[FRA_PROTOCOL] &&
- (rule->proto != nla_get_u8(tb[FRA_PROTOCOL])))
- continue;
-
- if (frh->action && (frh->action != rule->action))
- continue;
-
- if (frh_get_table(frh, tb) &&
- (frh_get_table(frh, tb) != rule->table))
- continue;
-
- if (tb[FRA_PRIORITY] &&
- (rule->pref != nla_get_u32(tb[FRA_PRIORITY])))
- continue;
-
- if (tb[FRA_IIFNAME] &&
- nla_strcmp(tb[FRA_IIFNAME], rule->iifname))
- continue;
-
- if (tb[FRA_OIFNAME] &&
- nla_strcmp(tb[FRA_OIFNAME], rule->oifname))
- continue;
-
- if (tb[FRA_FWMARK] &&
- (rule->mark != nla_get_u32(tb[FRA_FWMARK])))
- continue;
-
- if (tb[FRA_FWMASK] &&
- (rule->mark_mask != nla_get_u32(tb[FRA_FWMASK])))
- continue;
-
- if (tb[FRA_TUN_ID] &&
- (rule->tun_id != nla_get_be64(tb[FRA_TUN_ID])))
- continue;
-
- if (tb[FRA_L3MDEV] &&
- (rule->l3mdev != nla_get_u8(tb[FRA_L3MDEV])))
- continue;
-
- if (uid_range_set(&range) &&
- (!uid_eq(rule->uid_range.start, range.start) ||
- !uid_eq(rule->uid_range.end, range.end)))
- continue;
-
- if (tb[FRA_IP_PROTO] &&
- (rule->ip_proto != nla_get_u8(tb[FRA_IP_PROTO])))
- continue;
-
- if (fib_rule_port_range_set(&sprange) &&
- !fib_rule_port_range_compare(&rule->sport_range, &sprange))
- continue;
-
- if (fib_rule_port_range_set(&dprange) &&
- !fib_rule_port_range_compare(&rule->dport_range, &dprange))
- continue;
-
- if (!ops->compare(rule, frh, tb))
- continue;
-
- if (rule->flags & FIB_RULE_PERMANENT) {
- err = -EPERM;
- goto errout;
- }
-
- if (ops->delete) {
- err = ops->delete(rule);
- if (err)
- goto errout;
- }
+ if (rule->tun_id)
+ ip_tunnel_unneed_metadata();
- if (rule->tun_id)
- ip_tunnel_unneed_metadata();
+ list_del_rcu(&rule->list);
- list_del_rcu(&rule->list);
-
- if (rule->action == FR_ACT_GOTO) {
- ops->nr_goto_rules--;
- if (rtnl_dereference(rule->ctarget) == NULL)
- ops->unresolved_rules--;
- }
+ if (rule->action == FR_ACT_GOTO) {
+ ops->nr_goto_rules--;
+ if (rtnl_dereference(rule->ctarget) == NULL)
+ ops->unresolved_rules--;
+ }
- /*
- * Check if this rule is a target to any of them. If so,
- * adjust to the next one with the same preference or
- * disable them. As this operation is eventually very
- * expensive, it is only performed if goto rules, except
- * current if it is goto rule, have actually been added.
- */
- if (ops->nr_goto_rules > 0) {
- struct fib_rule *n;
-
- n = list_next_entry(rule, list);
- if (&n->list == &ops->rules_list || n->pref != rule->pref)
- n = NULL;
- list_for_each_entry(r, &ops->rules_list, list) {
- if (rtnl_dereference(r->ctarget) != rule)
- continue;
- rcu_assign_pointer(r->ctarget, n);
- if (!n)
- ops->unresolved_rules++;
- }
+ /*
+ * Check if this rule is a target to any of them. If so,
+ * adjust to the next one with the same preference or
+ * disable them. As this operation is eventually very
+ * expensive, it is only performed if goto rules, except
+ * current if it is goto rule, have actually been added.
+ */
+ if (ops->nr_goto_rules > 0) {
+ struct fib_rule *n;
+
+ n = list_next_entry(rule, list);
+ if (&n->list == &ops->rules_list || n->pref != rule->pref)
+ n = NULL;
+ list_for_each_entry(r, &ops->rules_list, list) {
+ if (rtnl_dereference(r->ctarget) != rule)
+ continue;
+ rcu_assign_pointer(r->ctarget, n);
+ if (!n)
+ ops->unresolved_rules++;
}
-
- call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops,
- NULL);
- notify_rule_change(RTM_DELRULE, rule, ops, nlh,
- NETLINK_CB(skb).portid);
- fib_rule_put(rule);
- flush_route_cache(ops);
- rules_ops_put(ops);
- return 0;
}
- err = -ENOENT;
+ call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops,
+ NULL);
+ notify_rule_change(RTM_DELRULE, rule, ops, nlh,
+ NETLINK_CB(skb).portid);
+ fib_rule_put(rule);
+ flush_route_cache(ops);
+ rules_ops_put(ops);
+ kfree(nlrule);
+ return 0;
+
errout:
+ if (nlrule)
+ kfree(nlrule);
rules_ops_put(ops);
return err;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index e77c30ca491d..d3781daa26ab 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -57,6 +57,7 @@
#include <net/sock_reuseport.h>
#include <net/busy_poll.h>
#include <net/tcp.h>
+#include <net/xfrm.h>
#include <linux/bpf_trace.h>
/**
@@ -2692,8 +2693,9 @@ static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
{
+ void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame);
unsigned long metalen = xdp_get_metalen(xdp);
- void *data_start = xdp->data_hard_start + metalen;
+ void *data_start = xdp_frame_end + metalen;
void *data = xdp->data + offset;
if (unlikely(data < data_start ||
@@ -2717,14 +2719,39 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
.arg2_type = ARG_ANYTHING,
};
+BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
+{
+ void *data_end = xdp->data_end + offset;
+
+ /* only shrinking is allowed for now. */
+ if (unlikely(offset >= 0))
+ return -EINVAL;
+
+ if (unlikely(data_end < xdp->data + ETH_HLEN))
+ return -EINVAL;
+
+ xdp->data_end = data_end;
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_adjust_tail_proto = {
+ .func = bpf_xdp_adjust_tail,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset)
{
+ void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame);
void *meta = xdp->data_meta + offset;
unsigned long metalen = xdp->data - meta;
if (xdp_data_meta_unsupported(xdp))
return -ENOTSUPP;
- if (unlikely(meta < xdp->data_hard_start ||
+ if (unlikely(meta < xdp_frame_end ||
meta > xdp->data))
return -EINVAL;
if (unlikely((metalen & (sizeof(__u32) - 1)) ||
@@ -2749,13 +2776,18 @@ static int __bpf_tx_xdp(struct net_device *dev,
struct xdp_buff *xdp,
u32 index)
{
+ struct xdp_frame *xdpf;
int err;
if (!dev->netdev_ops->ndo_xdp_xmit) {
return -EOPNOTSUPP;
}
- err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
+ xdpf = convert_to_xdp_frame(xdp);
+ if (unlikely(!xdpf))
+ return -EOVERFLOW;
+
+ err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
if (err)
return err;
dev->netdev_ops->ndo_xdp_flush(dev);
@@ -2771,11 +2803,19 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
struct net_device *dev = fwd;
+ struct xdp_frame *xdpf;
if (!dev->netdev_ops->ndo_xdp_xmit)
return -EOPNOTSUPP;
- err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
+ xdpf = convert_to_xdp_frame(xdp);
+ if (unlikely(!xdpf))
+ return -EOVERFLOW;
+
+ /* TODO: move to inside map code instead, for bulk support
+ * err = dev_map_enqueue(dev, xdp);
+ */
+ err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
if (err)
return err;
__dev_map_insert_ctx(map, index);
@@ -3053,7 +3093,8 @@ bool bpf_helper_changes_pkt_data(void *func)
func == bpf_l4_csum_replace ||
func == bpf_xdp_adjust_head ||
func == bpf_xdp_adjust_meta ||
- func == bpf_msg_pull_data)
+ func == bpf_msg_pull_data ||
+ func == bpf_xdp_adjust_tail)
return true;
return false;
@@ -3704,6 +3745,49 @@ static const struct bpf_func_proto bpf_bind_proto = {
.arg3_type = ARG_CONST_SIZE,
};
+#ifdef CONFIG_XFRM
+BPF_CALL_5(bpf_skb_get_xfrm_state, struct sk_buff *, skb, u32, index,
+ struct bpf_xfrm_state *, to, u32, size, u64, flags)
+{
+ const struct sec_path *sp = skb_sec_path(skb);
+ const struct xfrm_state *x;
+
+ if (!sp || unlikely(index >= sp->len || flags))
+ goto err_clear;
+
+ x = sp->xvec[index];
+
+ if (unlikely(size != sizeof(struct bpf_xfrm_state)))
+ goto err_clear;
+
+ to->reqid = x->props.reqid;
+ to->spi = x->id.spi;
+ to->family = x->props.family;
+ if (to->family == AF_INET6) {
+ memcpy(to->remote_ipv6, x->props.saddr.a6,
+ sizeof(to->remote_ipv6));
+ } else {
+ to->remote_ipv4 = x->props.saddr.a4;
+ }
+
+ return 0;
+err_clear:
+ memset(to, 0, size);
+ return -EINVAL;
+}
+
+static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = {
+ .func = bpf_skb_get_xfrm_state,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg4_type = ARG_CONST_SIZE,
+ .arg5_type = ARG_ANYTHING,
+};
+#endif
+
static const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id)
{
@@ -3845,6 +3929,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_socket_cookie_proto;
case BPF_FUNC_get_socket_uid:
return &bpf_get_socket_uid_proto;
+#ifdef CONFIG_XFRM
+ case BPF_FUNC_skb_get_xfrm_state:
+ return &bpf_skb_get_xfrm_state_proto;
+#endif
default:
return bpf_base_func_proto(func_id);
}
@@ -3868,6 +3956,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_xdp_redirect_proto;
case BPF_FUNC_redirect_map:
return &bpf_xdp_redirect_map_proto;
+ case BPF_FUNC_xdp_adjust_tail:
+ return &bpf_xdp_adjust_tail_proto;
default:
return bpf_base_func_proto(func_id);
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index ce519861be59..5afae29367c1 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -820,7 +820,8 @@ static void neigh_periodic_work(struct work_struct *work)
write_lock(&n->lock);
state = n->nud_state;
- if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
+ if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
+ (n->flags & NTF_EXT_LEARNED)) {
write_unlock(&n->lock);
goto next_elt;
}
@@ -1136,6 +1137,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
if (neigh->dead)
goto out;
+ neigh_update_ext_learned(neigh, flags, &notify);
+
if (!(new & NUD_VALID)) {
neigh_del_timer(neigh);
if (old & NUD_CONNECTED)
@@ -1781,6 +1784,9 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
flags &= ~NEIGH_UPDATE_F_OVERRIDE;
}
+ if (ndm->ndm_flags & NTF_EXT_LEARNED)
+ flags |= NEIGH_UPDATE_F_EXT_LEARNED;
+
if (ndm->ndm_flags & NTF_USE) {
neigh_event_send(neigh, NULL);
err = 0;
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
new file mode 100644
index 000000000000..68bf07206744
--- /dev/null
+++ b/net/core/page_pool.c
@@ -0,0 +1,317 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * page_pool.c
+ * Author: Jesper Dangaard Brouer <netoptimizer@brouer.com>
+ * Copyright (C) 2016 Red Hat, Inc.
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include <net/page_pool.h>
+#include <linux/dma-direction.h>
+#include <linux/dma-mapping.h>
+#include <linux/page-flags.h>
+#include <linux/mm.h> /* for __put_page() */
+
+static int page_pool_init(struct page_pool *pool,
+ const struct page_pool_params *params)
+{
+ unsigned int ring_qsize = 1024; /* Default */
+
+ memcpy(&pool->p, params, sizeof(pool->p));
+
+ /* Validate only known flags were used */
+ if (pool->p.flags & ~(PP_FLAG_ALL))
+ return -EINVAL;
+
+ if (pool->p.pool_size)
+ ring_qsize = pool->p.pool_size;
+
+ /* Sanity limit mem that can be pinned down */
+ if (ring_qsize > 32768)
+ return -E2BIG;
+
+ /* DMA direction is either DMA_FROM_DEVICE or DMA_BIDIRECTIONAL.
+ * DMA_BIDIRECTIONAL is for allowing page used for DMA sending,
+ * which is the XDP_TX use-case.
+ */
+ if ((pool->p.dma_dir != DMA_FROM_DEVICE) &&
+ (pool->p.dma_dir != DMA_BIDIRECTIONAL))
+ return -EINVAL;
+
+ if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+struct page_pool *page_pool_create(const struct page_pool_params *params)
+{
+ struct page_pool *pool;
+ int err = 0;
+
+ pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, params->nid);
+ if (!pool)
+ return ERR_PTR(-ENOMEM);
+
+ err = page_pool_init(pool, params);
+ if (err < 0) {
+ pr_warn("%s() gave up with errno %d\n", __func__, err);
+ kfree(pool);
+ return ERR_PTR(err);
+ }
+ return pool;
+}
+EXPORT_SYMBOL(page_pool_create);
+
+/* fast path */
+static struct page *__page_pool_get_cached(struct page_pool *pool)
+{
+ struct ptr_ring *r = &pool->ring;
+ struct page *page;
+
+ /* Quicker fallback, avoid locks when ring is empty */
+ if (__ptr_ring_empty(r))
+ return NULL;
+
+ /* Test for safe-context, caller should provide this guarantee */
+ if (likely(in_serving_softirq())) {
+ if (likely(pool->alloc.count)) {
+ /* Fast-path */
+ page = pool->alloc.cache[--pool->alloc.count];
+ return page;
+ }
+ /* Slower-path: Alloc array empty, time to refill
+ *
+ * Open-coded bulk ptr_ring consumer.
+ *
+ * Discussion: the ring consumer lock is not really
+ * needed due to the softirq/NAPI protection, but
+ * later need the ability to reclaim pages on the
+ * ring. Thus, keeping the locks.
+ */
+ spin_lock(&r->consumer_lock);
+ while ((page = __ptr_ring_consume(r))) {
+ if (pool->alloc.count == PP_ALLOC_CACHE_REFILL)
+ break;
+ pool->alloc.cache[pool->alloc.count++] = page;
+ }
+ spin_unlock(&r->consumer_lock);
+ return page;
+ }
+
+ /* Slow-path: Get page from locked ring queue */
+ page = ptr_ring_consume(&pool->ring);
+ return page;
+}
+
+/* slow path */
+noinline
+static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
+ gfp_t _gfp)
+{
+ struct page *page;
+ gfp_t gfp = _gfp;
+ dma_addr_t dma;
+
+ /* We could always set __GFP_COMP, and avoid this branch, as
+ * prep_new_page() can handle order-0 with __GFP_COMP.
+ */
+ if (pool->p.order)
+ gfp |= __GFP_COMP;
+
+ /* FUTURE development:
+ *
+ * Current slow-path essentially falls back to single page
+ * allocations, which doesn't improve performance. This code
+ * need bulk allocation support from the page allocator code.
+ */
+
+ /* Cache was empty, do real allocation */
+ page = alloc_pages_node(pool->p.nid, gfp, pool->p.order);
+ if (!page)
+ return NULL;
+
+ if (!(pool->p.flags & PP_FLAG_DMA_MAP))
+ goto skip_dma_map;
+
+ /* Setup DMA mapping: use page->private for DMA-addr
+ * This mapping is kept for lifetime of page, until leaving pool.
+ */
+ dma = dma_map_page(pool->p.dev, page, 0,
+ (PAGE_SIZE << pool->p.order),
+ pool->p.dma_dir);
+ if (dma_mapping_error(pool->p.dev, dma)) {
+ put_page(page);
+ return NULL;
+ }
+ set_page_private(page, dma); /* page->private = dma; */
+
+skip_dma_map:
+ /* When page just alloc'ed is should/must have refcnt 1. */
+ return page;
+}
+
+/* For using page_pool replace: alloc_pages() API calls, but provide
+ * synchronization guarantee for allocation side.
+ */
+struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp)
+{
+ struct page *page;
+
+ /* Fast-path: Get a page from cache */
+ page = __page_pool_get_cached(pool);
+ if (page)
+ return page;
+
+ /* Slow-path: cache empty, do real allocation */
+ page = __page_pool_alloc_pages_slow(pool, gfp);
+ return page;
+}
+EXPORT_SYMBOL(page_pool_alloc_pages);
+
+/* Cleanup page_pool state from page */
+static void __page_pool_clean_page(struct page_pool *pool,
+ struct page *page)
+{
+ if (!(pool->p.flags & PP_FLAG_DMA_MAP))
+ return;
+
+ /* DMA unmap */
+ dma_unmap_page(pool->p.dev, page_private(page),
+ PAGE_SIZE << pool->p.order, pool->p.dma_dir);
+ set_page_private(page, 0);
+}
+
+/* Return a page to the page allocator, cleaning up our state */
+static void __page_pool_return_page(struct page_pool *pool, struct page *page)
+{
+ __page_pool_clean_page(pool, page);
+ put_page(page);
+ /* An optimization would be to call __free_pages(page, pool->p.order)
+ * knowing page is not part of page-cache (thus avoiding a
+ * __page_cache_release() call).
+ */
+}
+
+static bool __page_pool_recycle_into_ring(struct page_pool *pool,
+ struct page *page)
+{
+ int ret;
+ /* BH protection not needed if current is serving softirq */
+ if (in_serving_softirq())
+ ret = ptr_ring_produce(&pool->ring, page);
+ else
+ ret = ptr_ring_produce_bh(&pool->ring, page);
+
+ return (ret == 0) ? true : false;
+}
+
+/* Only allow direct recycling in special circumstances, into the
+ * alloc side cache. E.g. during RX-NAPI processing for XDP_DROP use-case.
+ *
+ * Caller must provide appropriate safe context.
+ */
+static bool __page_pool_recycle_direct(struct page *page,
+ struct page_pool *pool)
+{
+ if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE))
+ return false;
+
+ /* Caller MUST have verified/know (page_ref_count(page) == 1) */
+ pool->alloc.cache[pool->alloc.count++] = page;
+ return true;
+}
+
+void __page_pool_put_page(struct page_pool *pool,
+ struct page *page, bool allow_direct)
+{
+ /* This allocator is optimized for the XDP mode that uses
+ * one-frame-per-page, but have fallbacks that act like the
+ * regular page allocator APIs.
+ *
+ * refcnt == 1 means page_pool owns page, and can recycle it.
+ */
+ if (likely(page_ref_count(page) == 1)) {
+ /* Read barrier done in page_ref_count / READ_ONCE */
+
+ if (allow_direct && in_serving_softirq())
+ if (__page_pool_recycle_direct(page, pool))
+ return;
+
+ if (!__page_pool_recycle_into_ring(pool, page)) {
+ /* Cache full, fallback to free pages */
+ __page_pool_return_page(pool, page);
+ }
+ return;
+ }
+ /* Fallback/non-XDP mode: API user have elevated refcnt.
+ *
+ * Many drivers split up the page into fragments, and some
+ * want to keep doing this to save memory and do refcnt based
+ * recycling. Support this use case too, to ease drivers
+ * switching between XDP/non-XDP.
+ *
+ * In-case page_pool maintains the DMA mapping, API user must
+ * call page_pool_put_page once. In this elevated refcnt
+ * case, the DMA is unmapped/released, as driver is likely
+ * doing refcnt based recycle tricks, meaning another process
+ * will be invoking put_page.
+ */
+ __page_pool_clean_page(pool, page);
+ put_page(page);
+}
+EXPORT_SYMBOL(__page_pool_put_page);
+
+static void __page_pool_empty_ring(struct page_pool *pool)
+{
+ struct page *page;
+
+ /* Empty recycle ring */
+ while ((page = ptr_ring_consume(&pool->ring))) {
+ /* Verify the refcnt invariant of cached pages */
+ if (!(page_ref_count(page) == 1))
+ pr_crit("%s() page_pool refcnt %d violation\n",
+ __func__, page_ref_count(page));
+
+ __page_pool_return_page(pool, page);
+ }
+}
+
+static void __page_pool_destroy_rcu(struct rcu_head *rcu)
+{
+ struct page_pool *pool;
+
+ pool = container_of(rcu, struct page_pool, rcu);
+
+ WARN(pool->alloc.count, "API usage violation");
+
+ __page_pool_empty_ring(pool);
+ ptr_ring_cleanup(&pool->ring, NULL);
+ kfree(pool);
+}
+
+/* Cleanup and release resources */
+void page_pool_destroy(struct page_pool *pool)
+{
+ struct page *page;
+
+ /* Empty alloc cache, assume caller made sure this is
+ * no-longer in use, and page_pool_alloc_pages() cannot be
+ * call concurrently.
+ */
+ while (pool->alloc.count) {
+ page = pool->alloc.cache[--pool->alloc.count];
+ __page_pool_return_page(pool, page);
+ }
+
+ /* No more consumers should exist, but producers could still
+ * be in-flight.
+ */
+ __page_pool_empty_ring(pool);
+
+ /* An xdp_mem_allocator can still ref page_pool pointer */
+ call_rcu(&pool->rcu, __page_pool_destroy_rcu);
+}
+EXPORT_SYMBOL(page_pool_destroy);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 45936922d7e2..80802546c279 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -785,13 +785,15 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
long expires, u32 error)
{
struct rta_cacheinfo ci = {
- .rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse),
- .rta_used = dst->__use,
- .rta_clntref = atomic_read(&(dst->__refcnt)),
.rta_error = error,
.rta_id = id,
};
+ if (dst) {
+ ci.rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse);
+ ci.rta_used = dst->__use;
+ ci.rta_clntref = atomic_read(&dst->__refcnt);
+ }
if (expires) {
unsigned long clock;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 345b51837ca8..c642304f178c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1305,7 +1305,7 @@ static void skb_headers_offset_update(struct sk_buff *skb, int off)
skb->inner_mac_header += off;
}
-static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
+void skb_copy_header(struct sk_buff *new, const struct sk_buff *old)
{
__copy_skb_header(new, old);
@@ -1313,6 +1313,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
}
+EXPORT_SYMBOL(skb_copy_header);
static inline int skb_alloc_rx_flag(const struct sk_buff *skb)
{
@@ -1355,7 +1356,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len));
- copy_skb_header(n, skb);
+ skb_copy_header(n, skb);
return n;
}
EXPORT_SYMBOL(skb_copy);
@@ -1419,7 +1420,7 @@ struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
skb_clone_fraglist(n);
}
- copy_skb_header(n, skb);
+ skb_copy_header(n, skb);
out:
return n;
}
@@ -1599,7 +1600,7 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
BUG_ON(skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
skb->len + head_copy_len));
- copy_skb_header(n, skb);
+ skb_copy_header(n, skb);
skb_headers_offset_update(n, newheadroom - oldheadroom);
@@ -1839,6 +1840,20 @@ done:
}
EXPORT_SYMBOL(___pskb_trim);
+/* Note : use pskb_trim_rcsum() instead of calling this directly
+ */
+int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len)
+{
+ if (skb->ip_summed == CHECKSUM_COMPLETE) {
+ int delta = skb->len - len;
+
+ skb->csum = csum_sub(skb->csum,
+ skb_checksum(skb, len, delta, 0));
+ }
+ return __pskb_trim(skb, len);
+}
+EXPORT_SYMBOL(pskb_trim_rcsum_slow);
+
/**
* __pskb_pull_tail - advance tail of skb header
* @skb: buffer to reallocate
@@ -4926,6 +4941,8 @@ static unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
thlen = tcp_hdrlen(skb);
} else if (unlikely(skb_is_gso_sctp(skb))) {
thlen = sizeof(struct sctphdr);
+ } else if (shinfo->gso_type & SKB_GSO_UDP_L4) {
+ thlen = sizeof(struct udphdr);
}
/* UFO sets gso_size to the size of the fragmentation
* payload, i.e. the size of the L4 (UDP) header is already
diff --git a/net/core/sock.c b/net/core/sock.c
index 6444525f610c..b2c3db169ca1 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -905,7 +905,10 @@ set_rcvbuf:
case SO_RCVLOWAT:
if (val < 0)
val = INT_MAX;
- sk->sk_rcvlowat = val ? : 1;
+ if (sock->ops->set_rcvlowat)
+ ret = sock->ops->set_rcvlowat(sk, val);
+ else
+ sk->sk_rcvlowat = val ? : 1;
break;
case SO_RCVTIMEO:
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 097a0f74e004..0c86b53a3a63 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -5,6 +5,10 @@
*/
#include <linux/types.h>
#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/idr.h>
+#include <linux/rhashtable.h>
+#include <net/page_pool.h>
#include <net/xdp.h>
@@ -13,6 +17,104 @@
#define REG_STATE_UNREGISTERED 0x2
#define REG_STATE_UNUSED 0x3
+static DEFINE_IDA(mem_id_pool);
+static DEFINE_MUTEX(mem_id_lock);
+#define MEM_ID_MAX 0xFFFE
+#define MEM_ID_MIN 1
+static int mem_id_next = MEM_ID_MIN;
+
+static bool mem_id_init; /* false */
+static struct rhashtable *mem_id_ht;
+
+struct xdp_mem_allocator {
+ struct xdp_mem_info mem;
+ union {
+ void *allocator;
+ struct page_pool *page_pool;
+ };
+ struct rhash_head node;
+ struct rcu_head rcu;
+};
+
+static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed)
+{
+ const u32 *k = data;
+ const u32 key = *k;
+
+ BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_mem_allocator, mem.id)
+ != sizeof(u32));
+
+ /* Use cyclic increasing ID as direct hash key, see rht_bucket_index */
+ return key << RHT_HASH_RESERVED_SPACE;
+}
+
+static int xdp_mem_id_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
+{
+ const struct xdp_mem_allocator *xa = ptr;
+ u32 mem_id = *(u32 *)arg->key;
+
+ return xa->mem.id != mem_id;
+}
+
+static const struct rhashtable_params mem_id_rht_params = {
+ .nelem_hint = 64,
+ .head_offset = offsetof(struct xdp_mem_allocator, node),
+ .key_offset = offsetof(struct xdp_mem_allocator, mem.id),
+ .key_len = FIELD_SIZEOF(struct xdp_mem_allocator, mem.id),
+ .max_size = MEM_ID_MAX,
+ .min_size = 8,
+ .automatic_shrinking = true,
+ .hashfn = xdp_mem_id_hashfn,
+ .obj_cmpfn = xdp_mem_id_cmp,
+};
+
+static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu)
+{
+ struct xdp_mem_allocator *xa;
+
+ xa = container_of(rcu, struct xdp_mem_allocator, rcu);
+
+ /* Allow this ID to be reused */
+ ida_simple_remove(&mem_id_pool, xa->mem.id);
+
+ /* Notice, driver is expected to free the *allocator,
+ * e.g. page_pool, and MUST also use RCU free.
+ */
+
+ /* Poison memory */
+ xa->mem.id = 0xFFFF;
+ xa->mem.type = 0xF0F0;
+ xa->allocator = (void *)0xDEAD9001;
+
+ kfree(xa);
+}
+
+static void __xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
+{
+ struct xdp_mem_allocator *xa;
+ int id = xdp_rxq->mem.id;
+ int err;
+
+ if (id == 0)
+ return;
+
+ mutex_lock(&mem_id_lock);
+
+ xa = rhashtable_lookup(mem_id_ht, &id, mem_id_rht_params);
+ if (!xa) {
+ mutex_unlock(&mem_id_lock);
+ return;
+ }
+
+ err = rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params);
+ WARN_ON(err);
+
+ call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
+
+ mutex_unlock(&mem_id_lock);
+}
+
void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
{
/* Simplify driver cleanup code paths, allow unreg "unused" */
@@ -21,8 +123,14 @@ void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG");
+ __xdp_rxq_info_unreg_mem_model(xdp_rxq);
+
xdp_rxq->reg_state = REG_STATE_UNREGISTERED;
xdp_rxq->dev = NULL;
+
+ /* Reset mem info to defaults */
+ xdp_rxq->mem.id = 0;
+ xdp_rxq->mem.type = 0;
}
EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg);
@@ -71,3 +179,164 @@ bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq)
return (xdp_rxq->reg_state == REG_STATE_REGISTERED);
}
EXPORT_SYMBOL_GPL(xdp_rxq_info_is_reg);
+
+static int __mem_id_init_hash_table(void)
+{
+ struct rhashtable *rht;
+ int ret;
+
+ if (unlikely(mem_id_init))
+ return 0;
+
+ rht = kzalloc(sizeof(*rht), GFP_KERNEL);
+ if (!rht)
+ return -ENOMEM;
+
+ ret = rhashtable_init(rht, &mem_id_rht_params);
+ if (ret < 0) {
+ kfree(rht);
+ return ret;
+ }
+ mem_id_ht = rht;
+ smp_mb(); /* mutex lock should provide enough pairing */
+ mem_id_init = true;
+
+ return 0;
+}
+
+/* Allocate a cyclic ID that maps to allocator pointer.
+ * See: https://www.kernel.org/doc/html/latest/core-api/idr.html
+ *
+ * Caller must lock mem_id_lock.
+ */
+static int __mem_id_cyclic_get(gfp_t gfp)
+{
+ int retries = 1;
+ int id;
+
+again:
+ id = ida_simple_get(&mem_id_pool, mem_id_next, MEM_ID_MAX, gfp);
+ if (id < 0) {
+ if (id == -ENOSPC) {
+ /* Cyclic allocator, reset next id */
+ if (retries--) {
+ mem_id_next = MEM_ID_MIN;
+ goto again;
+ }
+ }
+ return id; /* errno */
+ }
+ mem_id_next = id + 1;
+
+ return id;
+}
+
+static bool __is_supported_mem_type(enum xdp_mem_type type)
+{
+ if (type == MEM_TYPE_PAGE_POOL)
+ return is_page_pool_compiled_in();
+
+ if (type >= MEM_TYPE_MAX)
+ return false;
+
+ return true;
+}
+
+int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
+ enum xdp_mem_type type, void *allocator)
+{
+ struct xdp_mem_allocator *xdp_alloc;
+ gfp_t gfp = GFP_KERNEL;
+ int id, errno, ret;
+ void *ptr;
+
+ if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
+ WARN(1, "Missing register, driver bug");
+ return -EFAULT;
+ }
+
+ if (!__is_supported_mem_type(type))
+ return -EOPNOTSUPP;
+
+ xdp_rxq->mem.type = type;
+
+ if (!allocator) {
+ if (type == MEM_TYPE_PAGE_POOL)
+ return -EINVAL; /* Setup time check page_pool req */
+ return 0;
+ }
+
+ /* Delay init of rhashtable to save memory if feature isn't used */
+ if (!mem_id_init) {
+ mutex_lock(&mem_id_lock);
+ ret = __mem_id_init_hash_table();
+ mutex_unlock(&mem_id_lock);
+ if (ret < 0) {
+ WARN_ON(1);
+ return ret;
+ }
+ }
+
+ xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp);
+ if (!xdp_alloc)
+ return -ENOMEM;
+
+ mutex_lock(&mem_id_lock);
+ id = __mem_id_cyclic_get(gfp);
+ if (id < 0) {
+ errno = id;
+ goto err;
+ }
+ xdp_rxq->mem.id = id;
+ xdp_alloc->mem = xdp_rxq->mem;
+ xdp_alloc->allocator = allocator;
+
+ /* Insert allocator into ID lookup table */
+ ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node);
+ if (IS_ERR(ptr)) {
+ errno = PTR_ERR(ptr);
+ goto err;
+ }
+
+ mutex_unlock(&mem_id_lock);
+
+ return 0;
+err:
+ mutex_unlock(&mem_id_lock);
+ kfree(xdp_alloc);
+ return errno;
+}
+EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
+
+void xdp_return_frame(struct xdp_frame *xdpf)
+{
+ struct xdp_mem_info *mem = &xdpf->mem;
+ struct xdp_mem_allocator *xa;
+ void *data = xdpf->data;
+ struct page *page;
+
+ switch (mem->type) {
+ case MEM_TYPE_PAGE_POOL:
+ rcu_read_lock();
+ /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
+ xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
+ page = virt_to_head_page(data);
+ if (xa)
+ page_pool_put_page(xa->page_pool, page);
+ else
+ put_page(page);
+ rcu_read_unlock();
+ break;
+ case MEM_TYPE_PAGE_SHARED:
+ page_frag_free(data);
+ break;
+ case MEM_TYPE_PAGE_ORDER0:
+ page = virt_to_page(data); /* Assumes order0 page*/
+ put_page(page);
+ break;
+ default:
+ /* Not possible, checked in xdp_rxq_info_reg_mem_model() */
+ break;
+ }
+}
+EXPORT_SYMBOL_GPL(xdp_return_frame);
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index c795c3f509c9..72236695db3d 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -121,13 +121,16 @@ static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct fib_rule_hdr *frh,
- struct nlattr **tb)
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
{
int err = -EINVAL;
struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
- if (frh->tos)
+ if (frh->tos) {
+ NL_SET_ERR_MSG(extack, "Invalid tos value");
goto errout;
+ }
if (rule->table == RT_TABLE_UNSPEC) {
if (rule->action == FR_ACT_TO_TBL) {
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 90e6df0351eb..c90ee3227dea 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -22,7 +22,7 @@ static void dsa_master_get_ethtool_stats(struct net_device *dev,
int port = cpu_dp->index;
int count = 0;
- if (ops && ops->get_sset_count && ops->get_ethtool_stats) {
+ if (ops->get_sset_count && ops->get_ethtool_stats) {
count = ops->get_sset_count(dev, ETH_SS_STATS);
ops->get_ethtool_stats(dev, stats, data);
}
@@ -31,6 +31,32 @@ static void dsa_master_get_ethtool_stats(struct net_device *dev,
ds->ops->get_ethtool_stats(ds, port, data + count);
}
+static void dsa_master_get_ethtool_phy_stats(struct net_device *dev,
+ struct ethtool_stats *stats,
+ uint64_t *data)
+{
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+ const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops;
+ struct dsa_switch *ds = cpu_dp->ds;
+ int port = cpu_dp->index;
+ int count = 0;
+
+ if (dev->phydev && !ops->get_ethtool_phy_stats) {
+ count = phy_ethtool_get_sset_count(dev->phydev);
+ if (count >= 0)
+ phy_ethtool_get_stats(dev->phydev, stats, data);
+ } else if (ops->get_sset_count && ops->get_ethtool_phy_stats) {
+ count = ops->get_sset_count(dev, ETH_SS_PHY_STATS);
+ ops->get_ethtool_phy_stats(dev, stats, data);
+ }
+
+ if (count < 0)
+ count = 0;
+
+ if (ds->ops->get_ethtool_phy_stats)
+ ds->ops->get_ethtool_phy_stats(ds, port, data + count);
+}
+
static int dsa_master_get_sset_count(struct net_device *dev, int sset)
{
struct dsa_port *cpu_dp = dev->dsa_ptr;
@@ -38,11 +64,17 @@ static int dsa_master_get_sset_count(struct net_device *dev, int sset)
struct dsa_switch *ds = cpu_dp->ds;
int count = 0;
- if (ops && ops->get_sset_count)
- count += ops->get_sset_count(dev, sset);
+ if (sset == ETH_SS_PHY_STATS && dev->phydev &&
+ !ops->get_ethtool_phy_stats)
+ count = phy_ethtool_get_sset_count(dev->phydev);
+ else if (ops->get_sset_count)
+ count = ops->get_sset_count(dev, sset);
+
+ if (count < 0)
+ count = 0;
- if (sset == ETH_SS_STATS && ds->ops->get_sset_count)
- count += ds->ops->get_sset_count(ds, cpu_dp->index);
+ if (ds->ops->get_sset_count)
+ count += ds->ops->get_sset_count(ds, cpu_dp->index, sset);
return count;
}
@@ -64,19 +96,28 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset,
/* We do not want to be NULL-terminated, since this is a prefix */
pfx[sizeof(pfx) - 1] = '_';
- if (ops && ops->get_sset_count && ops->get_strings) {
- mcount = ops->get_sset_count(dev, ETH_SS_STATS);
+ if (stringset == ETH_SS_PHY_STATS && dev->phydev &&
+ !ops->get_ethtool_phy_stats) {
+ mcount = phy_ethtool_get_sset_count(dev->phydev);
+ if (mcount < 0)
+ mcount = 0;
+ else
+ phy_ethtool_get_strings(dev->phydev, data);
+ } else if (ops->get_sset_count && ops->get_strings) {
+ mcount = ops->get_sset_count(dev, stringset);
+ if (mcount < 0)
+ mcount = 0;
ops->get_strings(dev, stringset, data);
}
- if (stringset == ETH_SS_STATS && ds->ops->get_strings) {
+ if (ds->ops->get_strings) {
ndata = data + mcount * len;
/* This function copies ETH_GSTRINGS_LEN bytes, we will mangle
* the output after to prepend our CPU port prefix we
* constructed earlier
*/
- ds->ops->get_strings(ds, port, ndata);
- count = ds->ops->get_sset_count(ds, port);
+ ds->ops->get_strings(ds, port, stringset, ndata);
+ count = ds->ops->get_sset_count(ds, port, stringset);
for (i = 0; i < count; i++) {
memmove(ndata + (i * len + sizeof(pfx)),
ndata + i * len, len - sizeof(pfx));
@@ -102,6 +143,7 @@ static int dsa_master_ethtool_setup(struct net_device *dev)
ops->get_sset_count = dsa_master_get_sset_count;
ops->get_ethtool_stats = dsa_master_get_ethtool_stats;
ops->get_strings = dsa_master_get_strings;
+ ops->get_ethtool_phy_stats = dsa_master_get_ethtool_phy_stats;
dev->ethtool_ops = ops;
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 7acc1169d75e..2413beb995be 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -273,25 +273,38 @@ int dsa_port_vlan_del(struct dsa_port *dp,
return 0;
}
-static int dsa_port_setup_phy_of(struct dsa_port *dp, bool enable)
+static struct phy_device *dsa_port_get_phy_device(struct dsa_port *dp)
{
- struct device_node *port_dn = dp->dn;
struct device_node *phy_dn;
- struct dsa_switch *ds = dp->ds;
struct phy_device *phydev;
- int port = dp->index;
- int err = 0;
- phy_dn = of_parse_phandle(port_dn, "phy-handle", 0);
+ phy_dn = of_parse_phandle(dp->dn, "phy-handle", 0);
if (!phy_dn)
- return 0;
+ return NULL;
phydev = of_phy_find_device(phy_dn);
if (!phydev) {
- err = -EPROBE_DEFER;
- goto err_put_of;
+ of_node_put(phy_dn);
+ return ERR_PTR(-EPROBE_DEFER);
}
+ return phydev;
+}
+
+static int dsa_port_setup_phy_of(struct dsa_port *dp, bool enable)
+{
+ struct dsa_switch *ds = dp->ds;
+ struct phy_device *phydev;
+ int port = dp->index;
+ int err = 0;
+
+ phydev = dsa_port_get_phy_device(dp);
+ if (!phydev)
+ return 0;
+
+ if (IS_ERR(phydev))
+ return PTR_ERR(phydev);
+
if (enable) {
err = genphy_config_init(phydev);
if (err < 0)
@@ -317,8 +330,6 @@ static int dsa_port_setup_phy_of(struct dsa_port *dp, bool enable)
err_put_dev:
put_device(&phydev->mdio.dev);
-err_put_of:
- of_node_put(phy_dn);
return err;
}
@@ -372,3 +383,60 @@ void dsa_port_link_unregister_of(struct dsa_port *dp)
else
dsa_port_setup_phy_of(dp, false);
}
+
+int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data)
+{
+ struct phy_device *phydev;
+ int ret = -EOPNOTSUPP;
+
+ if (of_phy_is_fixed_link(dp->dn))
+ return ret;
+
+ phydev = dsa_port_get_phy_device(dp);
+ if (IS_ERR_OR_NULL(phydev))
+ return ret;
+
+ ret = phy_ethtool_get_strings(phydev, data);
+ put_device(&phydev->mdio.dev);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(dsa_port_get_phy_strings);
+
+int dsa_port_get_ethtool_phy_stats(struct dsa_port *dp, uint64_t *data)
+{
+ struct phy_device *phydev;
+ int ret = -EOPNOTSUPP;
+
+ if (of_phy_is_fixed_link(dp->dn))
+ return ret;
+
+ phydev = dsa_port_get_phy_device(dp);
+ if (IS_ERR_OR_NULL(phydev))
+ return ret;
+
+ ret = phy_ethtool_get_stats(phydev, NULL, data);
+ put_device(&phydev->mdio.dev);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(dsa_port_get_ethtool_phy_stats);
+
+int dsa_port_get_phy_sset_count(struct dsa_port *dp)
+{
+ struct phy_device *phydev;
+ int ret = -EOPNOTSUPP;
+
+ if (of_phy_is_fixed_link(dp->dn))
+ return ret;
+
+ phydev = dsa_port_get_phy_device(dp);
+ if (IS_ERR_OR_NULL(phydev))
+ return ret;
+
+ ret = phy_ethtool_get_sset_count(phydev);
+ put_device(&phydev->mdio.dev);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(dsa_port_get_phy_sset_count);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 18561af7a8f1..c287f1ef964c 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -560,7 +560,8 @@ static void dsa_slave_get_strings(struct net_device *dev,
strncpy(data + 2 * len, "rx_packets", len);
strncpy(data + 3 * len, "rx_bytes", len);
if (ds->ops->get_strings)
- ds->ops->get_strings(ds, dp->index, data + 4 * len);
+ ds->ops->get_strings(ds, dp->index, stringset,
+ data + 4 * len);
}
}
@@ -605,7 +606,7 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
count = 4;
if (ds->ops->get_sset_count)
- count += ds->ops->get_sset_count(ds, dp->index);
+ count += ds->ops->get_sset_count(ds, dp->index, sset);
return count;
}
@@ -1440,6 +1441,7 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+ struct switchdev_notifier_fdb_info *fdb_info = ptr;
struct dsa_switchdev_event_work *switchdev_work;
if (!dsa_slave_dev_check(dev))
@@ -1457,8 +1459,10 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
switch (event) {
case SWITCHDEV_FDB_ADD_TO_DEVICE: /* fall through */
case SWITCHDEV_FDB_DEL_TO_DEVICE:
+ if (!fdb_info->added_by_user)
+ break;
if (dsa_slave_switchdev_fdb_work_init(switchdev_work,
- ptr))
+ fdb_info))
goto err_fdb_work_init;
dev_hold(dev);
break;
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index a07b7dd06def..b379520f9133 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -13,7 +13,8 @@ obj-y := route.o inetpeer.o protocol.o \
tcp_offload.o datagram.o raw.o udp.o udplite.o \
udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \
fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \
- inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o
+ inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \
+ metrics.o
obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index eaed0367e669..b403499fdabe 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -994,7 +994,9 @@ const struct proto_ops inet_stream_ops = {
.getsockopt = sock_common_getsockopt,
.sendmsg = inet_sendmsg,
.recvmsg = inet_recvmsg,
- .mmap = sock_no_mmap,
+#ifdef CONFIG_MMU
+ .mmap = tcp_mmap,
+#endif
.sendpage = inet_sendpage,
.splice_read = tcp_splice_read,
.read_sock = tcp_read_sock,
@@ -1006,6 +1008,7 @@ const struct proto_ops inet_stream_ops = {
.compat_getsockopt = compat_sock_common_getsockopt,
.compat_ioctl = inet_compat_ioctl,
#endif
+ .set_rcvlowat = tcp_set_rcvlowat,
};
EXPORT_SYMBOL(inet_stream_ops);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 737d11bc8838..f8eb78d042a4 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -213,14 +213,17 @@ static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = {
static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct fib_rule_hdr *frh,
- struct nlattr **tb)
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
int err = -EINVAL;
struct fib4_rule *rule4 = (struct fib4_rule *) rule;
- if (frh->tos & ~IPTOS_TOS_MASK)
+ if (frh->tos & ~IPTOS_TOS_MASK) {
+ NL_SET_ERR_MSG(extack, "Invalid tos");
goto errout;
+ }
/* split local/main if they are not already split */
err = fib_unmerge(net);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index c27122f01b87..6608db23f54b 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1019,47 +1019,8 @@ static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
static int
fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
{
- bool ecn_ca = false;
- struct nlattr *nla;
- int remaining;
-
- if (!cfg->fc_mx)
- return 0;
-
- nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
- int type = nla_type(nla);
- u32 val;
-
- if (!type)
- continue;
- if (type > RTAX_MAX)
- return -EINVAL;
-
- if (type == RTAX_CC_ALGO) {
- char tmp[TCP_CA_NAME_MAX];
-
- nla_strlcpy(tmp, nla, sizeof(tmp));
- val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
- if (val == TCP_CA_UNSPEC)
- return -EINVAL;
- } else {
- val = nla_get_u32(nla);
- }
- if (type == RTAX_ADVMSS && val > 65535 - 40)
- val = 65535 - 40;
- if (type == RTAX_MTU && val > 65535 - 15)
- val = 65535 - 15;
- if (type == RTAX_HOPLIMIT && val > 255)
- val = 255;
- if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
- return -EINVAL;
- fi->fib_metrics->metrics[type - 1] = val;
- }
-
- if (ecn_ca)
- fi->fib_metrics->metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
-
- return 0;
+ return ip_metrics_convert(fi->fib_net, cfg->fc_mx, cfg->fc_mx_len,
+ fi->fib_metrics->metrics);
}
struct fib_info *fib_create_info(struct fib_config *cfg,
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 9c169bb2444d..dfe5b22f6ed4 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -578,6 +578,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
int tunnel_hlen;
int version;
__be16 df;
+ int nhoff;
tun_info = skb_tunnel_info(skb);
if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
@@ -605,6 +606,11 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
truncate = true;
}
+ nhoff = skb_network_header(skb) - skb_mac_header(skb);
+ if (skb->protocol == htons(ETH_P_IP) &&
+ (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
+ truncate = true;
+
if (version == 1) {
erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
ntohl(md->u.index), truncate, true);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 83c73bab2c3d..f2338e40c37d 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -878,11 +878,14 @@ static int __ip_append_data(struct sock *sk,
struct rtable *rt = (struct rtable *)cork->dst;
unsigned int wmem_alloc_delta = 0;
u32 tskey = 0;
+ bool paged;
skb = skb_peek_tail(queue);
exthdrlen = !skb ? rt->dst.header_len : 0;
- mtu = cork->fragsize;
+ mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize;
+ paged = !!cork->gso_size;
+
if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
tskey = sk->sk_tskey++;
@@ -906,7 +909,7 @@ static int __ip_append_data(struct sock *sk,
if (transhdrlen &&
length + fragheaderlen <= mtu &&
rt->dst.dev->features & (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM) &&
- !(flags & MSG_MORE) &&
+ (!(flags & MSG_MORE) || cork->gso_size) &&
!exthdrlen)
csummode = CHECKSUM_PARTIAL;
@@ -933,6 +936,7 @@ static int __ip_append_data(struct sock *sk,
unsigned int fraglen;
unsigned int fraggap;
unsigned int alloclen;
+ unsigned int pagedlen = 0;
struct sk_buff *skb_prev;
alloc_new_skb:
skb_prev = skb;
@@ -953,8 +957,12 @@ alloc_new_skb:
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
alloclen = mtu;
- else
+ else if (!paged)
alloclen = fraglen;
+ else {
+ alloclen = min_t(int, fraglen, MAX_HEADER);
+ pagedlen = fraglen - alloclen;
+ }
alloclen += exthdrlen;
@@ -998,7 +1006,7 @@ alloc_new_skb:
/*
* Find where to start putting bytes.
*/
- data = skb_put(skb, fraglen + exthdrlen);
+ data = skb_put(skb, fraglen + exthdrlen - pagedlen);
skb_set_network_header(skb, exthdrlen);
skb->transport_header = (skb->network_header +
fragheaderlen);
@@ -1014,7 +1022,7 @@ alloc_new_skb:
pskb_trim_unique(skb_prev, maxfraglen);
}
- copy = datalen - transhdrlen - fraggap;
+ copy = datalen - transhdrlen - fraggap - pagedlen;
if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
err = -EFAULT;
kfree_skb(skb);
@@ -1022,7 +1030,7 @@ alloc_new_skb:
}
offset += copy;
- length -= datalen - fraggap;
+ length -= copy + transhdrlen;
transhdrlen = 0;
exthdrlen = 0;
csummode = CHECKSUM_NONE;
@@ -1135,6 +1143,8 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
*rtp = NULL;
cork->fragsize = ip_sk_use_pmtu(sk) ?
dst_mtu(&rt->dst) : rt->dst.dev->mtu;
+
+ cork->gso_size = sk->sk_type == SOCK_DGRAM ? ipc->gso_size : 0;
cork->dst = &rt->dst;
cork->length = 0;
cork->ttl = ipc->ttl;
@@ -1214,7 +1224,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
return -EOPNOTSUPP;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
- mtu = cork->fragsize;
+ mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize;
fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
@@ -1470,9 +1480,8 @@ struct sk_buff *ip_make_skb(struct sock *sk,
int len, int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen,
struct ipcm_cookie *ipc, struct rtable **rtp,
- unsigned int flags)
+ struct inet_cork *cork, unsigned int flags)
{
- struct inet_cork cork;
struct sk_buff_head queue;
int err;
@@ -1481,22 +1490,22 @@ struct sk_buff *ip_make_skb(struct sock *sk,
__skb_queue_head_init(&queue);
- cork.flags = 0;
- cork.addr = 0;
- cork.opt = NULL;
- err = ip_setup_cork(sk, &cork, ipc, rtp);
+ cork->flags = 0;
+ cork->addr = 0;
+ cork->opt = NULL;
+ err = ip_setup_cork(sk, cork, ipc, rtp);
if (err)
return ERR_PTR(err);
- err = __ip_append_data(sk, fl4, &queue, &cork,
+ err = __ip_append_data(sk, fl4, &queue, cork,
&current->task_frag, getfrag,
from, length, transhdrlen, flags);
if (err) {
- __ip_flush_pending_frames(sk, &queue, &cork);
+ __ip_flush_pending_frames(sk, &queue, cork);
return ERR_PTR(err);
}
- return __ip_make_skb(sk, fl4, &queue, &cork);
+ return __ip_make_skb(sk, fl4, &queue, cork);
}
/*
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 43f620feb1c4..d839d74853fc 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -28,6 +28,9 @@
*
* Multiple Nameservers in /proc/net/pnp
* -- Josef Siemes <jsiemes@web.de>, Aug 2002
+ *
+ * NTP servers in /proc/net/ipconfig/ntp_servers
+ * -- Chris Novakovic <chris@chrisn.me.uk>, April 2018
*/
#include <linux/types.h>
@@ -93,6 +96,7 @@
#define CONF_TIMEOUT_MAX (HZ*30) /* Maximum allowed timeout */
#define CONF_NAMESERVERS_MAX 3 /* Maximum number of nameservers
- '3' from resolv.h */
+#define CONF_NTP_SERVERS_MAX 3 /* Maximum number of NTP servers */
#define NONE cpu_to_be32(INADDR_NONE)
#define ANY cpu_to_be32(INADDR_ANY)
@@ -152,12 +156,16 @@ static int ic_proto_used; /* Protocol used, if any */
#define ic_proto_used 0
#endif
static __be32 ic_nameservers[CONF_NAMESERVERS_MAX]; /* DNS Server IP addresses */
+static __be32 ic_ntp_servers[CONF_NTP_SERVERS_MAX]; /* NTP server IP addresses */
static u8 ic_domain[64]; /* DNS (not NIS) domain name */
/*
* Private state.
*/
+/* proc_dir_entry for /proc/net/ipconfig */
+static struct proc_dir_entry *ipconfig_dir;
+
/* Name of user-selected boot device */
static char user_dev_name[IFNAMSIZ] __initdata = { 0, };
@@ -576,6 +584,15 @@ static inline void __init ic_nameservers_predef(void)
ic_nameservers[i] = NONE;
}
+/* Predefine NTP servers */
+static inline void __init ic_ntp_servers_predef(void)
+{
+ int i;
+
+ for (i = 0; i < CONF_NTP_SERVERS_MAX; i++)
+ ic_ntp_servers[i] = NONE;
+}
+
/*
* DHCP/BOOTP support.
*/
@@ -671,6 +688,7 @@ ic_dhcp_init_options(u8 *options, struct ic_device *d)
17, /* Boot path */
26, /* MTU */
40, /* NIS domain name */
+ 42, /* NTP servers */
};
*e++ = 55; /* Parameter request list */
@@ -721,9 +739,11 @@ static void __init ic_bootp_init_ext(u8 *e)
*e++ = 3; /* Default gateway request */
*e++ = 4;
e += 4;
- *e++ = 5; /* Name server request */
- *e++ = 8;
- e += 8;
+#if CONF_NAMESERVERS_MAX > 0
+ *e++ = 6; /* (DNS) name server request */
+ *e++ = 4 * CONF_NAMESERVERS_MAX;
+ e += 4 * CONF_NAMESERVERS_MAX;
+#endif
*e++ = 12; /* Host name request */
*e++ = 32;
e += 32;
@@ -748,7 +768,13 @@ static void __init ic_bootp_init_ext(u8 *e)
*/
static inline void __init ic_bootp_init(void)
{
+ /* Re-initialise all name servers and NTP servers to NONE, in case any
+ * were set via the "ip=" or "nfsaddrs=" kernel command line parameters:
+ * any IP addresses specified there will already have been decoded but
+ * are no longer needed
+ */
ic_nameservers_predef();
+ ic_ntp_servers_predef();
dev_add_pack(&bootp_packet_type);
}
@@ -912,6 +938,15 @@ static void __init ic_do_bootp_ext(u8 *ext)
ic_bootp_string(utsname()->domainname, ext+1, *ext,
__NEW_UTS_LEN);
break;
+ case 42: /* NTP servers */
+ servers = *ext / 4;
+ if (servers > CONF_NTP_SERVERS_MAX)
+ servers = CONF_NTP_SERVERS_MAX;
+ for (i = 0; i < servers; i++) {
+ if (ic_ntp_servers[i] == NONE)
+ memcpy(&ic_ntp_servers[i], ext+1+4*i, 4);
+ }
+ break;
}
}
@@ -1258,6 +1293,7 @@ static int __init ic_dynamic(void)
#ifdef CONFIG_PROC_FS
+/* Name servers: */
static int pnp_seq_show(struct seq_file *seq, void *v)
{
int i;
@@ -1294,6 +1330,62 @@ static const struct file_operations pnp_seq_fops = {
.llseek = seq_lseek,
.release = single_release,
};
+
+/* Create the /proc/net/ipconfig directory */
+static int __init ipconfig_proc_net_init(void)
+{
+ ipconfig_dir = proc_net_mkdir(&init_net, "ipconfig", init_net.proc_net);
+ if (!ipconfig_dir)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/* Create a new file under /proc/net/ipconfig */
+static int ipconfig_proc_net_create(const char *name,
+ const struct file_operations *fops)
+{
+ char *pname;
+ struct proc_dir_entry *p;
+
+ if (!ipconfig_dir)
+ return -ENOMEM;
+
+ pname = kasprintf(GFP_KERNEL, "%s%s", "ipconfig/", name);
+ if (!pname)
+ return -ENOMEM;
+
+ p = proc_create(pname, 0444, init_net.proc_net, fops);
+ kfree(pname);
+ if (!p)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/* Write NTP server IP addresses to /proc/net/ipconfig/ntp_servers */
+static int ntp_servers_seq_show(struct seq_file *seq, void *v)
+{
+ int i;
+
+ for (i = 0; i < CONF_NTP_SERVERS_MAX; i++) {
+ if (ic_ntp_servers[i] != NONE)
+ seq_printf(seq, "%pI4\n", &ic_ntp_servers[i]);
+ }
+ return 0;
+}
+
+static int ntp_servers_seq_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, ntp_servers_seq_show, NULL);
+}
+
+static const struct file_operations ntp_servers_seq_fops = {
+ .open = ntp_servers_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
#endif /* CONFIG_PROC_FS */
/*
@@ -1368,8 +1460,20 @@ static int __init ip_auto_config(void)
int err;
unsigned int i;
+ /* Initialise all name servers and NTP servers to NONE (but only if the
+ * "ip=" or "nfsaddrs=" kernel command line parameters weren't decoded,
+ * otherwise we'll overwrite the IP addresses specified there)
+ */
+ if (ic_set_manually == 0) {
+ ic_nameservers_predef();
+ ic_ntp_servers_predef();
+ }
+
#ifdef CONFIG_PROC_FS
proc_create("pnp", 0444, init_net.proc_net, &pnp_seq_fops);
+
+ if (ipconfig_proc_net_init() == 0)
+ ipconfig_proc_net_create("ntp_servers", &ntp_servers_seq_fops);
#endif /* CONFIG_PROC_FS */
if (!ic_enable)
@@ -1481,16 +1585,32 @@ static int __init ip_auto_config(void)
&ic_servaddr, &root_server_addr, root_server_path);
if (ic_dev_mtu)
pr_cont(", mtu=%d", ic_dev_mtu);
- for (i = 0; i < CONF_NAMESERVERS_MAX; i++)
+ /* Name servers (if any): */
+ for (i = 0; i < CONF_NAMESERVERS_MAX; i++) {
if (ic_nameservers[i] != NONE) {
- pr_cont(" nameserver%u=%pI4",
- i, &ic_nameservers[i]);
- break;
+ if (i == 0)
+ pr_info(" nameserver%u=%pI4",
+ i, &ic_nameservers[i]);
+ else
+ pr_cont(", nameserver%u=%pI4",
+ i, &ic_nameservers[i]);
}
- for (i++; i < CONF_NAMESERVERS_MAX; i++)
- if (ic_nameservers[i] != NONE)
- pr_cont(", nameserver%u=%pI4", i, &ic_nameservers[i]);
- pr_cont("\n");
+ if (i + 1 == CONF_NAMESERVERS_MAX)
+ pr_cont("\n");
+ }
+ /* NTP servers (if any): */
+ for (i = 0; i < CONF_NTP_SERVERS_MAX; i++) {
+ if (ic_ntp_servers[i] != NONE) {
+ if (i == 0)
+ pr_info(" ntpserver%u=%pI4",
+ i, &ic_ntp_servers[i]);
+ else
+ pr_cont(", ntpserver%u=%pI4",
+ i, &ic_ntp_servers[i]);
+ }
+ if (i + 1 == CONF_NTP_SERVERS_MAX)
+ pr_cont("\n");
+ }
#endif /* !SILENT */
/*
@@ -1588,7 +1708,9 @@ static int __init ip_auto_config_setup(char *addrs)
return 1;
}
+ /* Initialise all name servers and NTP servers to NONE */
ic_nameservers_predef();
+ ic_ntp_servers_predef();
/* Parse string for static IP assignment. */
ip = addrs;
@@ -1647,6 +1769,13 @@ static int __init ip_auto_config_setup(char *addrs)
ic_nameservers[1] = NONE;
}
break;
+ case 9:
+ if (CONF_NTP_SERVERS_MAX >= 1) {
+ ic_ntp_servers[0] = in_aton(ip);
+ if (ic_ntp_servers[0] == ANY)
+ ic_ntp_servers[0] = NONE;
+ }
+ break;
}
}
ip = cp;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 2fb4de3f7f66..38e092eafc97 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -201,7 +201,8 @@ static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
};
static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
- struct fib_rule_hdr *frh, struct nlattr **tb)
+ struct fib_rule_hdr *frh, struct nlattr **tb,
+ struct netlink_ext_ack *extack)
{
return 0;
}
diff --git a/net/ipv4/metrics.c b/net/ipv4/metrics.c
new file mode 100644
index 000000000000..5121c6475e6b
--- /dev/null
+++ b/net/ipv4/metrics.c
@@ -0,0 +1,53 @@
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/types.h>
+#include <net/ip.h>
+#include <net/net_namespace.h>
+#include <net/tcp.h>
+
+int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, int fc_mx_len,
+ u32 *metrics)
+{
+ bool ecn_ca = false;
+ struct nlattr *nla;
+ int remaining;
+
+ if (!fc_mx)
+ return 0;
+
+ nla_for_each_attr(nla, fc_mx, fc_mx_len, remaining) {
+ int type = nla_type(nla);
+ u32 val;
+
+ if (!type)
+ continue;
+ if (type > RTAX_MAX)
+ return -EINVAL;
+
+ if (type == RTAX_CC_ALGO) {
+ char tmp[TCP_CA_NAME_MAX];
+
+ nla_strlcpy(tmp, nla, sizeof(tmp));
+ val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca);
+ if (val == TCP_CA_UNSPEC)
+ return -EINVAL;
+ } else {
+ val = nla_get_u32(nla);
+ }
+ if (type == RTAX_ADVMSS && val > 65535 - 40)
+ val = 65535 - 40;
+ if (type == RTAX_MTU && val > 65535 - 15)
+ val = 65535 - 15;
+ if (type == RTAX_HOPLIMIT && val > 255)
+ val = 255;
+ if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
+ return -EINVAL;
+ metrics[type - 1] = val;
+ }
+
+ if (ecn_ca)
+ metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ip_metrics_convert);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index a058de677e94..261b71d0ccc5 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -296,6 +296,8 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPKeepAlive", LINUX_MIB_TCPKEEPALIVE),
SNMP_MIB_ITEM("TCPMTUPFail", LINUX_MIB_TCPMTUPFAIL),
SNMP_MIB_ITEM("TCPMTUPSuccess", LINUX_MIB_TCPMTUPSUCCESS),
+ SNMP_MIB_ITEM("TCPDelivered", LINUX_MIB_TCPDELIVERED),
+ SNMP_MIB_ITEM("TCPDeliveredCE", LINUX_MIB_TCPDELIVEREDCE),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c9d00ef54dec..62b776f90037 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1702,6 +1702,139 @@ int tcp_peek_len(struct socket *sock)
}
EXPORT_SYMBOL(tcp_peek_len);
+/* Make sure sk_rcvbuf is big enough to satisfy SO_RCVLOWAT hint */
+int tcp_set_rcvlowat(struct sock *sk, int val)
+{
+ sk->sk_rcvlowat = val ? : 1;
+
+ /* Check if we need to signal EPOLLIN right now */
+ tcp_data_ready(sk);
+
+ if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
+ return 0;
+
+ /* val comes from user space and might be close to INT_MAX */
+ val <<= 1;
+ if (val < 0)
+ val = INT_MAX;
+
+ val = min(val, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
+ if (val > sk->sk_rcvbuf) {
+ sk->sk_rcvbuf = val;
+ tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val);
+ }
+ return 0;
+}
+EXPORT_SYMBOL(tcp_set_rcvlowat);
+
+#ifdef CONFIG_MMU
+static const struct vm_operations_struct tcp_vm_ops = {
+};
+
+int tcp_mmap(struct file *file, struct socket *sock,
+ struct vm_area_struct *vma)
+{
+ if (vma->vm_flags & (VM_WRITE | VM_EXEC))
+ return -EPERM;
+ vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC);
+
+ /* Instruct vm_insert_page() to not down_read(mmap_sem) */
+ vma->vm_flags |= VM_MIXEDMAP;
+
+ vma->vm_ops = &tcp_vm_ops;
+ return 0;
+}
+EXPORT_SYMBOL(tcp_mmap);
+
+static int tcp_zerocopy_receive(struct sock *sk,
+ struct tcp_zerocopy_receive *zc)
+{
+ unsigned long address = (unsigned long)zc->address;
+ const skb_frag_t *frags = NULL;
+ u32 length = 0, seq, offset;
+ struct vm_area_struct *vma;
+ struct sk_buff *skb = NULL;
+ struct tcp_sock *tp;
+ int ret;
+
+ if (address & (PAGE_SIZE - 1) || address != zc->address)
+ return -EINVAL;
+
+ if (sk->sk_state == TCP_LISTEN)
+ return -ENOTCONN;
+
+ sock_rps_record_flow(sk);
+
+ down_read(&current->mm->mmap_sem);
+
+ ret = -EINVAL;
+ vma = find_vma(current->mm, address);
+ if (!vma || vma->vm_start > address || vma->vm_ops != &tcp_vm_ops)
+ goto out;
+ zc->length = min_t(unsigned long, zc->length, vma->vm_end - address);
+
+ tp = tcp_sk(sk);
+ seq = tp->copied_seq;
+ zc->length = min_t(u32, zc->length, tcp_inq(sk));
+ zc->length &= ~(PAGE_SIZE - 1);
+
+ zap_page_range(vma, address, zc->length);
+
+ zc->recv_skip_hint = 0;
+ ret = 0;
+ while (length + PAGE_SIZE <= zc->length) {
+ if (zc->recv_skip_hint < PAGE_SIZE) {
+ if (skb) {
+ skb = skb->next;
+ offset = seq - TCP_SKB_CB(skb)->seq;
+ } else {
+ skb = tcp_recv_skb(sk, seq, &offset);
+ }
+
+ zc->recv_skip_hint = skb->len - offset;
+ offset -= skb_headlen(skb);
+ if ((int)offset < 0 || skb_has_frag_list(skb))
+ break;
+ frags = skb_shinfo(skb)->frags;
+ while (offset) {
+ if (frags->size > offset)
+ goto out;
+ offset -= frags->size;
+ frags++;
+ }
+ }
+ if (frags->size != PAGE_SIZE || frags->page_offset)
+ break;
+ ret = vm_insert_page(vma, address + length,
+ skb_frag_page(frags));
+ if (ret)
+ break;
+ length += PAGE_SIZE;
+ seq += PAGE_SIZE;
+ zc->recv_skip_hint -= PAGE_SIZE;
+ frags++;
+ }
+out:
+ up_read(&current->mm->mmap_sem);
+ if (length) {
+ tp->copied_seq = seq;
+ tcp_rcv_space_adjust(sk);
+
+ /* Clean up data we have read: This will do ACK frames. */
+ tcp_recv_skb(sk, seq, &offset);
+ tcp_cleanup_rbuf(sk, length);
+ ret = 0;
+ if (length == zc->length)
+ zc->recv_skip_hint = 0;
+ } else {
+ if (!zc->recv_skip_hint && sock_flag(sk, SOCK_DONE))
+ ret = -EIO;
+ }
+ zc->length = length;
+ return ret;
+}
+#endif
+
static void tcp_update_recv_tstamps(struct sk_buff *skb,
struct scm_timestamping *tss)
{
@@ -1757,6 +1890,22 @@ static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
}
}
+static int tcp_inq_hint(struct sock *sk)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ u32 copied_seq = READ_ONCE(tp->copied_seq);
+ u32 rcv_nxt = READ_ONCE(tp->rcv_nxt);
+ int inq;
+
+ inq = rcv_nxt - copied_seq;
+ if (unlikely(inq < 0 || copied_seq != READ_ONCE(tp->copied_seq))) {
+ lock_sock(sk);
+ inq = tp->rcv_nxt - tp->copied_seq;
+ release_sock(sk);
+ }
+ return inq;
+}
+
/*
* This routine copies from a sock struct into the user buffer.
*
@@ -1773,13 +1922,14 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
u32 peek_seq;
u32 *seq;
unsigned long used;
- int err;
+ int err, inq;
int target; /* Read at least this many bytes */
long timeo;
struct sk_buff *skb, *last;
u32 urg_hole = 0;
struct scm_timestamping tss;
bool has_tss = false;
+ bool has_cmsg;
if (unlikely(flags & MSG_ERRQUEUE))
return inet_recv_error(sk, msg, len, addr_len);
@@ -1794,6 +1944,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
if (sk->sk_state == TCP_LISTEN)
goto out;
+ has_cmsg = tp->recvmsg_inq;
timeo = sock_rcvtimeo(sk, nonblock);
/* Urgent data needs to be handled specially. */
@@ -1980,6 +2131,7 @@ skip_copy:
if (TCP_SKB_CB(skb)->has_rxtstamp) {
tcp_update_recv_tstamps(skb, &tss);
has_tss = true;
+ has_cmsg = true;
}
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
goto found_fin_ok;
@@ -1999,13 +2151,20 @@ skip_copy:
* on connected socket. I was just happy when found this 8) --ANK
*/
- if (has_tss)
- tcp_recv_timestamp(msg, sk, &tss);
-
/* Clean up data we have read: This will do ACK frames. */
tcp_cleanup_rbuf(sk, copied);
release_sock(sk);
+
+ if (has_cmsg) {
+ if (has_tss)
+ tcp_recv_timestamp(msg, sk, &tss);
+ if (tp->recvmsg_inq) {
+ inq = tcp_inq_hint(sk);
+ put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq);
+ }
+ }
+
return copied;
out:
@@ -2422,6 +2581,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
tp->snd_cwnd_cnt = 0;
tp->window_clamp = 0;
+ tp->delivered_ce = 0;
tcp_set_ca_state(sk, TCP_CA_Open);
tp->is_sack_reneg = 0;
tcp_clear_retrans(tp);
@@ -2873,6 +3033,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
tp->notsent_lowat = val;
sk->sk_write_space(sk);
break;
+ case TCP_INQ:
+ if (val > 1 || val < 0)
+ err = -EINVAL;
+ else
+ tp->recvmsg_inq = val;
+ break;
default:
err = -ENOPROTOOPT;
break;
@@ -3031,6 +3197,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
rate64 = tcp_compute_delivery_rate(tp);
if (rate64)
info->tcpi_delivery_rate = rate64;
+ info->tcpi_delivered = tp->delivered;
+ info->tcpi_delivered_ce = tp->delivered_ce;
unlock_sock_fast(sk, slow);
}
EXPORT_SYMBOL_GPL(tcp_get_info);
@@ -3044,7 +3212,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
u32 rate;
stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) +
- 5 * nla_total_size(sizeof(u32)) +
+ 7 * nla_total_size(sizeof(u32)) +
3 * nla_total_size(sizeof(u8)), GFP_ATOMIC);
if (!stats)
return NULL;
@@ -3075,9 +3243,12 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits);
nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited);
nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh);
+ nla_put_u32(stats, TCP_NLA_DELIVERED, tp->delivered);
+ nla_put_u32(stats, TCP_NLA_DELIVERED_CE, tp->delivered_ce);
nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una);
nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state);
+
return stats;
}
@@ -3293,6 +3464,9 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
case TCP_NOTSENT_LOWAT:
val = tp->notsent_lowat;
break;
+ case TCP_INQ:
+ val = tp->recvmsg_inq;
+ break;
case TCP_SAVE_SYN:
val = tp->save_syn;
break;
@@ -3329,6 +3503,25 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
}
return 0;
}
+#ifdef CONFIG_MMU
+ case TCP_ZEROCOPY_RECEIVE: {
+ struct tcp_zerocopy_receive zc;
+ int err;
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+ if (len != sizeof(zc))
+ return -EINVAL;
+ if (copy_from_user(&zc, optval, len))
+ return -EFAULT;
+ lock_sock(sk);
+ err = tcp_zerocopy_receive(sk, &zc);
+ release_sock(sk);
+ if (!err && copy_to_user(optval, &zc, len))
+ err = -EFAULT;
+ return err;
+ }
+#endif
default:
return -ENOPROTOOPT;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e51c644484dc..b188e0d75edd 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -111,6 +111,25 @@ int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
#define REXMIT_LOST 1 /* retransmit packets marked lost */
#define REXMIT_NEW 2 /* FRTO-style transmit of unsent/new packets */
+#if IS_ENABLED(CONFIG_TLS_DEVICE)
+static DEFINE_STATIC_KEY_FALSE(clean_acked_data_enabled);
+
+void clean_acked_data_enable(struct inet_connection_sock *icsk,
+ void (*cad)(struct sock *sk, u32 ack_seq))
+{
+ icsk->icsk_clean_acked = cad;
+ static_branch_inc(&clean_acked_data_enabled);
+}
+EXPORT_SYMBOL_GPL(clean_acked_data_enable);
+
+void clean_acked_data_disable(struct inet_connection_sock *icsk)
+{
+ static_branch_dec(&clean_acked_data_enabled);
+ icsk->icsk_clean_acked = NULL;
+}
+EXPORT_SYMBOL_GPL(clean_acked_data_disable);
+#endif
+
static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb,
unsigned int len)
{
@@ -582,6 +601,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
u32 copied;
int time;
+ trace_tcp_rcv_space_adjust(sk);
+
tcp_mstamp_refresh(tp);
time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
@@ -3496,6 +3517,22 @@ static void tcp_xmit_recovery(struct sock *sk, int rexmit)
tcp_xmit_retransmit_queue(sk);
}
+/* Returns the number of packets newly acked or sacked by the current ACK */
+static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered, int flag)
+{
+ const struct net *net = sock_net(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+ u32 delivered;
+
+ delivered = tp->delivered - prior_delivered;
+ NET_ADD_STATS(net, LINUX_MIB_TCPDELIVERED, delivered);
+ if (flag & FLAG_ECE) {
+ tp->delivered_ce += delivered;
+ NET_ADD_STATS(net, LINUX_MIB_TCPDELIVEREDCE, delivered);
+ }
+ return delivered;
+}
+
/* This routine deals with incoming acks, but not outgoing ones. */
static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
{
@@ -3542,6 +3579,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (after(ack, prior_snd_una)) {
flag |= FLAG_SND_UNA_ADVANCED;
icsk->icsk_retransmits = 0;
+
+#if IS_ENABLED(CONFIG_TLS_DEVICE)
+ if (static_branch_unlikely(&clean_acked_data_enabled))
+ if (icsk->icsk_clean_acked)
+ icsk->icsk_clean_acked(sk, ack);
+#endif
}
prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
@@ -3619,7 +3662,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
sk_dst_confirm(sk);
- delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */
+ delivered = tcp_newly_delivered(sk, delivered, flag);
lost = tp->lost - lost; /* freshly marked lost */
rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED);
tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
@@ -3629,9 +3672,11 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
no_queue:
/* If data was DSACKed, see if we can undo a cwnd reduction. */
- if (flag & FLAG_DSACKING_ACK)
+ if (flag & FLAG_DSACKING_ACK) {
tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
&rexmit);
+ tcp_newly_delivered(sk, delivered, flag);
+ }
/* If this ack opens up a zero window, clear backoff. It was
* being used to time the probes, and is probably far higher than
* it needs to be for normal retransmission.
@@ -3655,6 +3700,7 @@ old_ack:
&sack_state);
tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
&rexmit);
+ tcp_newly_delivered(sk, delivered, flag);
tcp_xmit_recovery(sk, rexmit);
}
@@ -4573,6 +4619,17 @@ err:
}
+void tcp_data_ready(struct sock *sk)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ int avail = tp->rcv_nxt - tp->copied_seq;
+
+ if (avail < sk->sk_rcvlowat && !sock_flag(sk, SOCK_DONE))
+ return;
+
+ sk->sk_data_ready(sk);
+}
+
static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -4630,7 +4687,7 @@ queue_and_out:
if (eaten > 0)
kfree_skb_partial(skb, fragstolen);
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk);
+ tcp_data_ready(sk);
return;
}
@@ -5023,9 +5080,12 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
/* More than one full frame received... */
if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
/* ... and right edge of window advances far enough.
- * (tcp_recvmsg() will send ACK otherwise). Or...
+ * (tcp_recvmsg() will send ACK otherwise).
+ * If application uses SO_RCVLOWAT, we want send ack now if
+ * we have not received enough bytes to satisfy the condition.
*/
- __tcp_select_window(sk) >= tp->rcv_wnd) ||
+ (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
+ __tcp_select_window(sk) >= tp->rcv_wnd)) ||
/* We ACK each frame or... */
tcp_in_quickack_mode(sk) ||
/* We have out of order data. */
@@ -5428,7 +5488,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
no_ack:
if (eaten)
kfree_skb_partial(skb, fragstolen);
- sk->sk_data_ready(sk);
+ tcp_data_ready(sk);
return;
}
}
@@ -5550,9 +5610,12 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
return true;
}
tp->syn_data_acked = tp->syn_data;
- if (tp->syn_data_acked)
- NET_INC_STATS(sock_net(sk),
- LINUX_MIB_TCPFASTOPENACTIVE);
+ if (tp->syn_data_acked) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
+ /* SYN-data is counted as two separate packets in tcp_ack() */
+ if (tp->delivered > 1)
+ --tp->delivered;
+ }
tcp_fastopen_add_skb(sk, synack);
@@ -5884,6 +5947,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
}
switch (sk->sk_state) {
case TCP_SYN_RECV:
+ tp->delivered++; /* SYN-ACK delivery isn't tracked in tcp_ack */
if (!tp->srtt_us)
tcp_synack_rtt_meas(sk, req);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 383cac0ff0ec..d07c0dcc99aa 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -229,11 +229,9 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
}
}
- if (mss > (1 << *rcv_wscale)) {
- if (!init_rcv_wnd) /* Use default unless specified otherwise */
- init_rcv_wnd = tcp_default_init_rwnd(mss);
- *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
- }
+ if (!init_rcv_wnd) /* Use default unless specified otherwise */
+ init_rcv_wnd = tcp_default_init_rwnd(mss);
+ *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
/* Set the clamp no higher than max representable value */
(*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp);
@@ -585,14 +583,15 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
unsigned int remaining = MAX_TCP_OPTION_SPACE;
struct tcp_fastopen_request *fastopen = tp->fastopen_req;
+ *md5 = NULL;
#ifdef CONFIG_TCP_MD5SIG
- *md5 = tp->af_specific->md5_lookup(sk, sk);
- if (*md5) {
- opts->options |= OPTION_MD5;
- remaining -= TCPOLEN_MD5SIG_ALIGNED;
+ if (unlikely(rcu_access_pointer(tp->md5sig_info))) {
+ *md5 = tp->af_specific->md5_lookup(sk, sk);
+ if (*md5) {
+ opts->options |= OPTION_MD5;
+ remaining -= TCPOLEN_MD5SIG_ALIGNED;
+ }
}
-#else
- *md5 = NULL;
#endif
/* We always get an MSS option. The option bytes which will be seen in
@@ -720,14 +719,15 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
opts->options = 0;
+ *md5 = NULL;
#ifdef CONFIG_TCP_MD5SIG
- *md5 = tp->af_specific->md5_lookup(sk, sk);
- if (unlikely(*md5)) {
- opts->options |= OPTION_MD5;
- size += TCPOLEN_MD5SIG_ALIGNED;
+ if (unlikely(rcu_access_pointer(tp->md5sig_info))) {
+ *md5 = tp->af_specific->md5_lookup(sk, sk);
+ if (*md5) {
+ opts->options |= OPTION_MD5;
+ size += TCPOLEN_MD5SIG_ALIGNED;
+ }
}
-#else
- *md5 = NULL;
#endif
if (likely(tp->rx_opt.tstamp_ok)) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 24b5c59b1c53..dd3102a37ef9 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -757,7 +757,8 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb,
}
EXPORT_SYMBOL(udp_set_csum);
-static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
+static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4,
+ struct inet_cork *cork)
{
struct sock *sk = skb->sk;
struct inet_sock *inet = inet_sk(sk);
@@ -777,6 +778,24 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
uh->len = htons(len);
uh->check = 0;
+ if (cork->gso_size) {
+ const int hlen = skb_network_header_len(skb) +
+ sizeof(struct udphdr);
+
+ if (hlen + cork->gso_size > cork->fragsize)
+ return -EINVAL;
+ if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS)
+ return -EINVAL;
+ if (sk->sk_no_check_tx)
+ return -EINVAL;
+ if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite)
+ return -EIO;
+
+ skb_shinfo(skb)->gso_size = cork->gso_size;
+ skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
+ goto csum_partial;
+ }
+
if (is_udplite) /* UDP-Lite */
csum = udplite_csum(skb);
@@ -786,6 +805,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
goto send;
} else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
+csum_partial:
udp4_hwcsum(skb, fl4->saddr, fl4->daddr);
goto send;
@@ -828,7 +848,7 @@ int udp_push_pending_frames(struct sock *sk)
if (!skb)
goto out;
- err = udp_send_skb(skb, fl4);
+ err = udp_send_skb(skb, fl4, &inet->cork.base);
out:
up->len = 0;
@@ -837,6 +857,43 @@ out:
}
EXPORT_SYMBOL(udp_push_pending_frames);
+static int __udp_cmsg_send(struct cmsghdr *cmsg, u16 *gso_size)
+{
+ switch (cmsg->cmsg_type) {
+ case UDP_SEGMENT:
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u16)))
+ return -EINVAL;
+ *gso_size = *(__u16 *)CMSG_DATA(cmsg);
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size)
+{
+ struct cmsghdr *cmsg;
+ bool need_ip = false;
+ int err;
+
+ for_each_cmsghdr(cmsg, msg) {
+ if (!CMSG_OK(msg, cmsg))
+ return -EINVAL;
+
+ if (cmsg->cmsg_level != SOL_UDP) {
+ need_ip = true;
+ continue;
+ }
+
+ err = __udp_cmsg_send(cmsg, gso_size);
+ if (err)
+ return err;
+ }
+
+ return need_ip;
+}
+EXPORT_SYMBOL_GPL(udp_cmsg_send);
+
int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct inet_sock *inet = inet_sk(sk);
@@ -922,10 +979,14 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipc.sockc.tsflags = sk->sk_tsflags;
ipc.addr = inet->inet_saddr;
ipc.oif = sk->sk_bound_dev_if;
+ ipc.gso_size = up->gso_size;
if (msg->msg_controllen) {
- err = ip_cmsg_send(sk, msg, &ipc, sk->sk_family == AF_INET6);
- if (unlikely(err)) {
+ err = udp_cmsg_send(sk, msg, &ipc.gso_size);
+ if (err > 0)
+ err = ip_cmsg_send(sk, msg, &ipc,
+ sk->sk_family == AF_INET6);
+ if (unlikely(err < 0)) {
kfree(ipc.opt);
return err;
}
@@ -1030,12 +1091,14 @@ back_from_confirm:
/* Lockless fast path for the non-corking case. */
if (!corkreq) {
+ struct inet_cork cork;
+
skb = ip_make_skb(sk, fl4, getfrag, msg, ulen,
sizeof(struct udphdr), &ipc, &rt,
- msg->msg_flags);
+ &cork, msg->msg_flags);
err = PTR_ERR(skb);
if (!IS_ERR_OR_NULL(skb))
- err = udp_send_skb(skb, fl4);
+ err = udp_send_skb(skb, fl4, &cork);
goto out;
}
@@ -2365,6 +2428,12 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
up->no_check6_rx = valbool;
break;
+ case UDP_SEGMENT:
+ if (val < 0 || val > USHRT_MAX)
+ return -EINVAL;
+ up->gso_size = val;
+ break;
+
/*
* UDP-Lite's partial checksum coverage (RFC 3828).
*/
@@ -2455,6 +2524,10 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
val = up->no_check6_rx;
break;
+ case UDP_SEGMENT:
+ val = up->gso_size;
+ break;
+
/* The following two cannot be changed on UDP sockets, the return is
* always 0 (which corresponds to the full checksum coverage of UDP). */
case UDPLITE_SEND_CSCOV:
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index ea6e6e7df0ee..006257092f06 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -187,6 +187,68 @@ out_unlock:
}
EXPORT_SYMBOL(skb_udp_tunnel_segment);
+struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
+ netdev_features_t features,
+ unsigned int mss, __sum16 check)
+{
+ struct sock *sk = gso_skb->sk;
+ unsigned int sum_truesize = 0;
+ struct sk_buff *segs, *seg;
+ unsigned int hdrlen;
+ struct udphdr *uh;
+
+ if (gso_skb->len <= sizeof(*uh) + mss)
+ return ERR_PTR(-EINVAL);
+
+ hdrlen = gso_skb->data - skb_mac_header(gso_skb);
+ skb_pull(gso_skb, sizeof(*uh));
+
+ /* clear destructor to avoid skb_segment assigning it to tail */
+ WARN_ON_ONCE(gso_skb->destructor != sock_wfree);
+ gso_skb->destructor = NULL;
+
+ segs = skb_segment(gso_skb, features);
+ if (unlikely(IS_ERR_OR_NULL(segs))) {
+ gso_skb->destructor = sock_wfree;
+ return segs;
+ }
+
+ for (seg = segs; seg; seg = seg->next) {
+ uh = udp_hdr(seg);
+ uh->len = htons(seg->len - hdrlen);
+ uh->check = check;
+
+ /* last packet can be partial gso_size */
+ if (!seg->next)
+ csum_replace2(&uh->check, htons(mss),
+ htons(seg->len - hdrlen - sizeof(*uh)));
+
+ uh->check = ~uh->check;
+ seg->destructor = sock_wfree;
+ seg->sk = sk;
+ sum_truesize += seg->truesize;
+ }
+
+ refcount_add(sum_truesize - gso_skb->truesize, &sk->sk_wmem_alloc);
+
+ return segs;
+}
+EXPORT_SYMBOL_GPL(__udp_gso_segment);
+
+static struct sk_buff *__udp4_gso_segment(struct sk_buff *gso_skb,
+ netdev_features_t features)
+{
+ const struct iphdr *iph = ip_hdr(gso_skb);
+ unsigned int mss = skb_shinfo(gso_skb)->gso_size;
+
+ if (!can_checksum_protocol(features, htons(ETH_P_IP)))
+ return ERR_PTR(-EIO);
+
+ return __udp_gso_segment(gso_skb, features, mss,
+ udp_v4_check(sizeof(struct udphdr) + mss,
+ iph->saddr, iph->daddr, 0));
+}
+
static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
netdev_features_t features)
{
@@ -203,12 +265,15 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
goto out;
}
- if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP))
+ if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_UDP | SKB_GSO_UDP_L4)))
goto out;
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto out;
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
+ return __udp4_gso_segment(skb, features);
+
mss = skb_shinfo(skb)->gso_size;
if (unlikely(skb->len <= mss))
goto out;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 78cef00c9596..fbfd71a2d9c8 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -170,7 +170,7 @@ static void addrconf_type_change(struct net_device *dev,
unsigned long event);
static int addrconf_ifdown(struct net_device *dev, int how);
-static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
+static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
int plen,
const struct net_device *dev,
u32 flags, u32 noflags);
@@ -916,7 +916,6 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
pr_warn("Freeing alive inet6 address %p\n", ifp);
return;
}
- ip6_rt_put(ifp->rt);
kfree_rcu(ifp, rcu);
}
@@ -995,7 +994,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
gfp_t gfp_flags = can_block ? GFP_KERNEL : GFP_ATOMIC;
struct net *net = dev_net(idev->dev);
struct inet6_ifaddr *ifa = NULL;
- struct rt6_info *rt = NULL;
+ struct fib6_info *f6i = NULL;
int err = 0;
int addr_type = ipv6_addr_type(addr);
@@ -1037,16 +1036,16 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
goto out;
}
- rt = addrconf_dst_alloc(idev, addr, false);
- if (IS_ERR(rt)) {
- err = PTR_ERR(rt);
- rt = NULL;
+ f6i = addrconf_f6i_alloc(net, idev, addr, false, gfp_flags);
+ if (IS_ERR(f6i)) {
+ err = PTR_ERR(f6i);
+ f6i = NULL;
goto out;
}
if (net->ipv6.devconf_all->disable_policy ||
idev->cnf.disable_policy)
- rt->dst.flags |= DST_NOPOLICY;
+ f6i->dst_nopolicy = true;
neigh_parms_data_state_setall(idev->nd_parms);
@@ -1068,7 +1067,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
ifa->cstamp = ifa->tstamp = jiffies;
ifa->tokenized = false;
- ifa->rt = rt;
+ ifa->rt = f6i;
ifa->idev = idev;
in6_dev_hold(idev);
@@ -1102,8 +1101,8 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
inet6addr_notifier_call_chain(NETDEV_UP, ifa);
out:
if (unlikely(err < 0)) {
- if (rt)
- ip6_rt_put(rt);
+ fib6_info_release(f6i);
+
if (ifa) {
if (ifa->idev)
in6_dev_put(ifa->idev);
@@ -1179,19 +1178,19 @@ check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires)
static void
cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_rt)
{
- struct rt6_info *rt;
+ struct fib6_info *f6i;
- rt = addrconf_get_prefix_route(&ifp->addr,
+ f6i = addrconf_get_prefix_route(&ifp->addr,
ifp->prefix_len,
ifp->idev->dev,
0, RTF_GATEWAY | RTF_DEFAULT);
- if (rt) {
+ if (f6i) {
if (del_rt)
- ip6_del_rt(rt);
+ ip6_del_rt(dev_net(ifp->idev->dev), f6i);
else {
- if (!(rt->rt6i_flags & RTF_EXPIRES))
- rt6_set_expires(rt, expires);
- ip6_rt_put(rt);
+ if (!(f6i->fib6_flags & RTF_EXPIRES))
+ fib6_set_expires(f6i, expires);
+ fib6_info_release(f6i);
}
}
}
@@ -2320,7 +2319,7 @@ static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpad
static void
addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
- unsigned long expires, u32 flags)
+ unsigned long expires, u32 flags, gfp_t gfp_flags)
{
struct fib6_config cfg = {
.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX,
@@ -2331,6 +2330,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
.fc_flags = RTF_UP | flags,
.fc_nlinfo.nl_net = dev_net(dev),
.fc_protocol = RTPROT_KERNEL,
+ .fc_type = RTN_UNICAST,
};
cfg.fc_dst = *pfx;
@@ -2344,17 +2344,17 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
cfg.fc_flags |= RTF_NONEXTHOP;
#endif
- ip6_route_add(&cfg, NULL);
+ ip6_route_add(&cfg, gfp_flags, NULL);
}
-static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
+static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
int plen,
const struct net_device *dev,
u32 flags, u32 noflags)
{
struct fib6_node *fn;
- struct rt6_info *rt = NULL;
+ struct fib6_info *rt = NULL;
struct fib6_table *table;
u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX;
@@ -2368,14 +2368,13 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
goto out;
for_each_fib6_node_rt_rcu(fn) {
- if (rt->dst.dev->ifindex != dev->ifindex)
+ if (rt->fib6_nh.nh_dev->ifindex != dev->ifindex)
continue;
- if ((rt->rt6i_flags & flags) != flags)
+ if ((rt->fib6_flags & flags) != flags)
continue;
- if ((rt->rt6i_flags & noflags) != 0)
+ if ((rt->fib6_flags & noflags) != 0)
continue;
- if (!dst_hold_safe(&rt->dst))
- rt = NULL;
+ fib6_info_hold(rt);
break;
}
out:
@@ -2394,12 +2393,13 @@ static void addrconf_add_mroute(struct net_device *dev)
.fc_ifindex = dev->ifindex,
.fc_dst_len = 8,
.fc_flags = RTF_UP,
+ .fc_type = RTN_UNICAST,
.fc_nlinfo.nl_net = dev_net(dev),
};
ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0);
- ip6_route_add(&cfg, NULL);
+ ip6_route_add(&cfg, GFP_ATOMIC, NULL);
}
static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
@@ -2529,7 +2529,6 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
if (IS_ERR_OR_NULL(ifp))
return -1;
- update_lft = 0;
create = 1;
spin_lock_bh(&ifp->lock);
ifp->flags |= IFA_F_MANAGETEMPADDR;
@@ -2551,7 +2550,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
else
stored_lft = 0;
- if (!update_lft && !create && stored_lft) {
+ if (!create && stored_lft) {
const u32 minimum_lft = min_t(u32,
stored_lft, MIN_VALID_LIFETIME);
valid_lft = max(valid_lft, minimum_lft);
@@ -2642,7 +2641,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
*/
if (pinfo->onlink) {
- struct rt6_info *rt;
+ struct fib6_info *rt;
unsigned long rt_expires;
/* Avoid arithmetic overflow. Really, we could
@@ -2667,13 +2666,13 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
if (rt) {
/* Autoconf prefix route */
if (valid_lft == 0) {
- ip6_del_rt(rt);
+ ip6_del_rt(net, rt);
rt = NULL;
} else if (addrconf_finite_timeout(rt_expires)) {
/* not infinity */
- rt6_set_expires(rt, jiffies + rt_expires);
+ fib6_set_expires(rt, jiffies + rt_expires);
} else {
- rt6_clean_expires(rt);
+ fib6_clean_expires(rt);
}
} else if (valid_lft) {
clock_t expires = 0;
@@ -2684,9 +2683,9 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
expires = jiffies_to_clock_t(rt_expires);
}
addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
- dev, expires, flags);
+ dev, expires, flags, GFP_ATOMIC);
}
- ip6_rt_put(rt);
+ fib6_info_release(rt);
}
/* Try to figure out our local address for this prefix */
@@ -2899,9 +2898,14 @@ static int inet6_addr_add(struct net *net, int ifindex,
if (!IS_ERR(ifp)) {
if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) {
addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev,
- expires, flags);
+ expires, flags, GFP_KERNEL);
}
+ /* Send a netlink notification if DAD is enabled and
+ * optimistic flag is not set
+ */
+ if (!(ifp->flags & (IFA_F_OPTIMISTIC | IFA_F_NODAD)))
+ ipv6_ifa_notify(0, ifp);
/*
* Note that section 3.1 of RFC 4429 indicates
* that the Optimistic flag should not be set for
@@ -3047,7 +3051,8 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
if (addr.s6_addr32[3]) {
add_addr(idev, &addr, plen, scope);
- addrconf_prefix_route(&addr, plen, idev->dev, 0, pflags);
+ addrconf_prefix_route(&addr, plen, idev->dev, 0, pflags,
+ GFP_ATOMIC);
return;
}
@@ -3072,7 +3077,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
add_addr(idev, &addr, plen, flag);
addrconf_prefix_route(&addr, plen, idev->dev, 0,
- pflags);
+ pflags, GFP_ATOMIC);
}
}
}
@@ -3112,7 +3117,8 @@ void addrconf_add_linklocal(struct inet6_dev *idev,
ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags,
INFINITY_LIFE_TIME, INFINITY_LIFE_TIME, true, NULL);
if (!IS_ERR(ifp)) {
- addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0);
+ addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev,
+ 0, 0, GFP_ATOMIC);
addrconf_dad_start(ifp);
in6_ifa_put(ifp);
}
@@ -3227,7 +3233,8 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
addrconf_add_linklocal(idev, &addr,
IFA_F_STABLE_PRIVACY);
else if (prefix_route)
- addrconf_prefix_route(&addr, 64, idev->dev, 0, 0);
+ addrconf_prefix_route(&addr, 64, idev->dev,
+ 0, 0, GFP_KERNEL);
break;
case IN6_ADDR_GEN_MODE_EUI64:
/* addrconf_add_linklocal also adds a prefix_route and we
@@ -3237,7 +3244,8 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
if (ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) == 0)
addrconf_add_linklocal(idev, &addr, 0);
else if (prefix_route)
- addrconf_prefix_route(&addr, 64, idev->dev, 0, 0);
+ addrconf_prefix_route(&addr, 64, idev->dev,
+ 0, 0, GFP_KERNEL);
break;
case IN6_ADDR_GEN_MODE_NONE:
default:
@@ -3329,32 +3337,34 @@ static void addrconf_gre_config(struct net_device *dev)
}
#endif
-static int fixup_permanent_addr(struct inet6_dev *idev,
+static int fixup_permanent_addr(struct net *net,
+ struct inet6_dev *idev,
struct inet6_ifaddr *ifp)
{
- /* !rt6i_node means the host route was removed from the
+ /* !fib6_node means the host route was removed from the
* FIB, for example, if 'lo' device is taken down. In that
* case regenerate the host route.
*/
- if (!ifp->rt || !ifp->rt->rt6i_node) {
- struct rt6_info *rt, *prev;
+ if (!ifp->rt || !ifp->rt->fib6_node) {
+ struct fib6_info *f6i, *prev;
- rt = addrconf_dst_alloc(idev, &ifp->addr, false);
- if (IS_ERR(rt))
- return PTR_ERR(rt);
+ f6i = addrconf_f6i_alloc(net, idev, &ifp->addr, false,
+ GFP_ATOMIC);
+ if (IS_ERR(f6i))
+ return PTR_ERR(f6i);
/* ifp->rt can be accessed outside of rtnl */
spin_lock(&ifp->lock);
prev = ifp->rt;
- ifp->rt = rt;
+ ifp->rt = f6i;
spin_unlock(&ifp->lock);
- ip6_rt_put(prev);
+ fib6_info_release(prev);
}
if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) {
addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
- idev->dev, 0, 0);
+ idev->dev, 0, 0, GFP_ATOMIC);
}
if (ifp->state == INET6_IFADDR_STATE_PREDAD)
@@ -3363,7 +3373,7 @@ static int fixup_permanent_addr(struct inet6_dev *idev,
return 0;
}
-static void addrconf_permanent_addr(struct net_device *dev)
+static void addrconf_permanent_addr(struct net *net, struct net_device *dev)
{
struct inet6_ifaddr *ifp, *tmp;
struct inet6_dev *idev;
@@ -3376,7 +3386,7 @@ static void addrconf_permanent_addr(struct net_device *dev)
list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) {
if ((ifp->flags & IFA_F_PERMANENT) &&
- fixup_permanent_addr(idev, ifp) < 0) {
+ fixup_permanent_addr(net, idev, ifp) < 0) {
write_unlock_bh(&idev->lock);
in6_ifa_hold(ifp);
ipv6_del_addr(ifp);
@@ -3445,7 +3455,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
if (event == NETDEV_UP) {
/* restore routes for permanent addresses */
- addrconf_permanent_addr(dev);
+ addrconf_permanent_addr(net, dev);
if (!addrconf_link_ready(dev)) {
/* device is not ready yet. */
@@ -3612,8 +3622,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
struct net *net = dev_net(dev);
struct inet6_dev *idev;
struct inet6_ifaddr *ifa, *tmp;
- int _keep_addr;
- bool keep_addr;
+ bool keep_addr = false;
int state, i;
ASSERT_RTNL();
@@ -3639,15 +3648,18 @@ static int addrconf_ifdown(struct net_device *dev, int how)
}
- /* aggregate the system setting and interface setting */
- _keep_addr = net->ipv6.devconf_all->keep_addr_on_down;
- if (!_keep_addr)
- _keep_addr = idev->cnf.keep_addr_on_down;
-
/* combine the user config with event to determine if permanent
* addresses are to be removed from address hash table
*/
- keep_addr = !(how || _keep_addr <= 0 || idev->cnf.disable_ipv6);
+ if (!how && !idev->cnf.disable_ipv6) {
+ /* aggregate the system setting and interface setting */
+ int _keep_addr = net->ipv6.devconf_all->keep_addr_on_down;
+
+ if (!_keep_addr)
+ _keep_addr = idev->cnf.keep_addr_on_down;
+
+ keep_addr = (_keep_addr > 0);
+ }
/* Step 2: clear hash table */
for (i = 0; i < IN6_ADDR_HSIZE; i++) {
@@ -3697,13 +3709,8 @@ restart:
write_lock_bh(&idev->lock);
}
- /* re-combine the user config with event to determine if permanent
- * addresses are to be removed from the interface list
- */
- keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6);
-
list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
- struct rt6_info *rt = NULL;
+ struct fib6_info *rt = NULL;
bool keep;
addrconf_del_dad_work(ifa);
@@ -3731,7 +3738,7 @@ restart:
spin_unlock_bh(&ifa->lock);
if (rt)
- ip6_del_rt(rt);
+ ip6_del_rt(net, rt);
if (state != INET6_IFADDR_STATE_DEAD) {
__ipv6_ifa_notify(RTM_DELADDR, ifa);
@@ -3849,6 +3856,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
struct inet6_dev *idev = ifp->idev;
struct net_device *dev = idev->dev;
bool bump_id, notify = false;
+ struct net *net;
addrconf_join_solict(dev, &ifp->addr);
@@ -3859,8 +3867,9 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
if (ifp->state == INET6_IFADDR_STATE_DEAD)
goto out;
+ net = dev_net(dev);
if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
- (dev_net(dev)->ipv6.devconf_all->accept_dad < 1 &&
+ (net->ipv6.devconf_all->accept_dad < 1 &&
idev->cnf.accept_dad < 1) ||
!(ifp->flags&IFA_F_TENTATIVE) ||
ifp->flags & IFA_F_NODAD) {
@@ -3896,8 +3905,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
* Frames right away
*/
if (ifp->flags & IFA_F_OPTIMISTIC) {
- ip6_ins_rt(ifp->rt);
- if (ipv6_use_optimistic_addr(dev_net(dev), idev)) {
+ ip6_ins_rt(net, ifp->rt);
+ if (ipv6_use_optimistic_addr(net, idev)) {
/* Because optimistic nodes can use this address,
* notify listeners. If DAD fails, RTM_DELADDR is sent.
*/
@@ -4563,8 +4572,9 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
ipv6_ifa_notify(0, ifp);
if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) {
- addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev,
- expires, flags);
+ addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
+ ifp->idev->dev, expires, flags,
+ GFP_KERNEL);
} else if (had_prefixroute) {
enum cleanup_prefix_rt_t action;
unsigned long rt_expires;
@@ -4804,9 +4814,10 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
u32 portid, u32 seq, int event, unsigned int flags)
{
+ struct net_device *dev = fib6_info_nh_dev(ifaca->aca_rt);
+ int ifindex = dev ? dev->ifindex : 1;
struct nlmsghdr *nlh;
u8 scope = RT_SCOPE_UNIVERSE;
- int ifindex = ifaca->aca_idev->dev->ifindex;
if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE)
scope = RT_SCOPE_SITE;
@@ -5029,14 +5040,6 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
struct net *net = dev_net(ifa->idev->dev);
int err = -ENOBUFS;
- /* Don't send DELADDR notification for TENTATIVE address,
- * since NEWADDR notification is sent only after removing
- * TENTATIVE flag, if DAD has not failed.
- */
- if (ifa->flags & IFA_F_TENTATIVE && !(ifa->flags & IFA_F_DADFAILED) &&
- event == RTM_DELADDR)
- return;
-
skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC);
if (!skb)
goto errout;
@@ -5607,29 +5610,30 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
* our DAD process, so we don't need
* to do it again
*/
- if (!rcu_access_pointer(ifp->rt->rt6i_node))
- ip6_ins_rt(ifp->rt);
+ if (!rcu_access_pointer(ifp->rt->fib6_node))
+ ip6_ins_rt(net, ifp->rt);
if (ifp->idev->cnf.forwarding)
addrconf_join_anycast(ifp);
if (!ipv6_addr_any(&ifp->peer_addr))
addrconf_prefix_route(&ifp->peer_addr, 128,
- ifp->idev->dev, 0, 0);
+ ifp->idev->dev, 0, 0,
+ GFP_ATOMIC);
break;
case RTM_DELADDR:
if (ifp->idev->cnf.forwarding)
addrconf_leave_anycast(ifp);
addrconf_leave_solict(ifp->idev, &ifp->addr);
if (!ipv6_addr_any(&ifp->peer_addr)) {
- struct rt6_info *rt;
+ struct fib6_info *rt;
rt = addrconf_get_prefix_route(&ifp->peer_addr, 128,
ifp->idev->dev, 0, 0);
if (rt)
- ip6_del_rt(rt);
+ ip6_del_rt(net, rt);
}
if (ifp->rt) {
- if (dst_hold_safe(&ifp->rt->dst))
- ip6_del_rt(ifp->rt);
+ ip6_del_rt(net, ifp->rt);
+ ifp->rt = NULL;
}
rt_genid_bump_ipv6(net);
break;
@@ -5976,11 +5980,11 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
list_for_each_entry(ifa, &idev->addr_list, if_list) {
spin_lock(&ifa->lock);
if (ifa->rt) {
- struct rt6_info *rt = ifa->rt;
+ struct fib6_info *rt = ifa->rt;
int cpu;
rcu_read_lock();
- addrconf_set_nopolicy(ifa->rt, val);
+ ifa->rt->dst_nopolicy = val ? true : false;
if (rt->rt6i_pcpu) {
for_each_possible_cpu(cpu) {
struct rt6_info **rtp;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 8da0b513f188..d0af96e0d109 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -273,33 +273,8 @@ out_rcu_unlock:
goto out;
}
-
-/* bind for INET6 API */
-int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
-{
- struct sock *sk = sock->sk;
- int err = 0;
-
- /* If the socket has its own bind function then use it. */
- if (sk->sk_prot->bind)
- return sk->sk_prot->bind(sk, uaddr, addr_len);
-
- if (addr_len < SIN6_LEN_RFC2133)
- return -EINVAL;
-
- /* BPF prog is run before any checks are done so that if the prog
- * changes context in a wrong way it will be caught.
- */
- err = BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr);
- if (err)
- return err;
-
- return __inet6_bind(sk, uaddr, addr_len, false, true);
-}
-EXPORT_SYMBOL(inet6_bind);
-
-int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
- bool force_bind_address_no_port, bool with_lock)
+static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
+ bool force_bind_address_no_port, bool with_lock)
{
struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
struct inet_sock *inet = inet_sk(sk);
@@ -444,6 +419,30 @@ out_unlock:
goto out;
}
+/* bind for INET6 API */
+int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+ struct sock *sk = sock->sk;
+ int err = 0;
+
+ /* If the socket has its own bind function then use it. */
+ if (sk->sk_prot->bind)
+ return sk->sk_prot->bind(sk, uaddr, addr_len);
+
+ if (addr_len < SIN6_LEN_RFC2133)
+ return -EINVAL;
+
+ /* BPF prog is run before any checks are done so that if the prog
+ * changes context in a wrong way it will be caught.
+ */
+ err = BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr);
+ if (err)
+ return err;
+
+ return __inet6_bind(sk, uaddr, addr_len, false, true);
+}
+EXPORT_SYMBOL(inet6_bind);
+
int inet6_release(struct socket *sock)
{
struct sock *sk = sock->sk;
@@ -579,7 +578,9 @@ const struct proto_ops inet6_stream_ops = {
.getsockopt = sock_common_getsockopt, /* ok */
.sendmsg = inet_sendmsg, /* ok */
.recvmsg = inet_recvmsg, /* ok */
- .mmap = sock_no_mmap,
+#ifdef CONFIG_MMU
+ .mmap = tcp_mmap,
+#endif
.sendpage = inet_sendpage,
.sendmsg_locked = tcp_sendmsg_locked,
.sendpage_locked = tcp_sendpage_locked,
@@ -590,6 +591,7 @@ const struct proto_ops inet6_stream_ops = {
.compat_setsockopt = compat_sock_common_setsockopt,
.compat_getsockopt = compat_sock_common_getsockopt,
#endif
+ .set_rcvlowat = tcp_set_rcvlowat,
};
const struct proto_ops inet6_dgram_ops = {
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index bbcabbba9bd8..0250d199e527 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -212,16 +212,14 @@ static void aca_get(struct ifacaddr6 *aca)
static void aca_put(struct ifacaddr6 *ac)
{
if (refcount_dec_and_test(&ac->aca_refcnt)) {
- in6_dev_put(ac->aca_idev);
- dst_release(&ac->aca_rt->dst);
+ fib6_info_release(ac->aca_rt);
kfree(ac);
}
}
-static struct ifacaddr6 *aca_alloc(struct rt6_info *rt,
+static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i,
const struct in6_addr *addr)
{
- struct inet6_dev *idev = rt->rt6i_idev;
struct ifacaddr6 *aca;
aca = kzalloc(sizeof(*aca), GFP_ATOMIC);
@@ -229,9 +227,8 @@ static struct ifacaddr6 *aca_alloc(struct rt6_info *rt,
return NULL;
aca->aca_addr = *addr;
- in6_dev_hold(idev);
- aca->aca_idev = idev;
- aca->aca_rt = rt;
+ fib6_info_hold(f6i);
+ aca->aca_rt = f6i;
aca->aca_users = 1;
/* aca_tstamp should be updated upon changes */
aca->aca_cstamp = aca->aca_tstamp = jiffies;
@@ -246,7 +243,8 @@ static struct ifacaddr6 *aca_alloc(struct rt6_info *rt,
int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
{
struct ifacaddr6 *aca;
- struct rt6_info *rt;
+ struct fib6_info *f6i;
+ struct net *net;
int err;
ASSERT_RTNL();
@@ -265,14 +263,15 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
}
}
- rt = addrconf_dst_alloc(idev, addr, true);
- if (IS_ERR(rt)) {
- err = PTR_ERR(rt);
+ net = dev_net(idev->dev);
+ f6i = addrconf_f6i_alloc(net, idev, addr, true, GFP_ATOMIC);
+ if (IS_ERR(f6i)) {
+ err = PTR_ERR(f6i);
goto out;
}
- aca = aca_alloc(rt, addr);
+ aca = aca_alloc(f6i, addr);
if (!aca) {
- ip6_rt_put(rt);
+ fib6_info_release(f6i);
err = -ENOMEM;
goto out;
}
@@ -286,7 +285,7 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
aca_get(aca);
write_unlock_bh(&idev->lock);
- ip6_ins_rt(rt);
+ ip6_ins_rt(net, f6i);
addrconf_join_solict(idev->dev, &aca->aca_addr);
@@ -328,8 +327,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
write_unlock_bh(&idev->lock);
addrconf_leave_solict(idev, &aca->aca_addr);
- dst_hold(&aca->aca_rt->dst);
- ip6_del_rt(aca->aca_rt);
+ ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
aca_put(aca);
return 0;
@@ -356,8 +354,7 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
addrconf_leave_solict(idev, &aca->aca_addr);
- dst_hold(&aca->aca_rt->dst);
- ip6_del_rt(aca->aca_rt);
+ ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
aca_put(aca);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index bc68eb661970..5bc2bf3733ab 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -280,6 +280,7 @@ static const struct tlvtype_proc tlvprocdestopt_lst[] = {
static int ipv6_destopt_rcv(struct sk_buff *skb)
{
+ struct inet6_dev *idev = __in6_dev_get(skb->dev);
struct inet6_skb_parm *opt = IP6CB(skb);
#if IS_ENABLED(CONFIG_IPV6_MIP6)
__u16 dstbuf;
@@ -291,7 +292,7 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
!pskb_may_pull(skb, (skb_transport_offset(skb) +
((skb_transport_header(skb)[1] + 1) << 3)))) {
- __IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
+ __IP6_INC_STATS(dev_net(dst->dev), idev,
IPSTATS_MIB_INHDRERRORS);
fail_and_free:
kfree_skb(skb);
@@ -319,8 +320,7 @@ fail_and_free:
return 1;
}
- __IP6_INC_STATS(dev_net(dst->dev),
- ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
return -1;
}
@@ -416,8 +416,7 @@ looped_back:
}
if (hdr->segments_left >= (hdr->hdrlen >> 1)) {
- __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
((&hdr->segments_left) -
skb_network_header(skb)));
@@ -456,8 +455,7 @@ looped_back:
if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) {
if (ipv6_hdr(skb)->hop_limit <= 1) {
- __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
icmpv6_send(skb, ICMPV6_TIME_EXCEED,
ICMPV6_EXC_HOPLIMIT, 0);
kfree_skb(skb);
@@ -481,10 +479,10 @@ looped_back:
/* called with rcu_read_lock() */
static int ipv6_rthdr_rcv(struct sk_buff *skb)
{
+ struct inet6_dev *idev = __in6_dev_get(skb->dev);
struct inet6_skb_parm *opt = IP6CB(skb);
struct in6_addr *addr = NULL;
struct in6_addr daddr;
- struct inet6_dev *idev;
int n, i;
struct ipv6_rt_hdr *hdr;
struct rt0_hdr *rthdr;
@@ -498,8 +496,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
!pskb_may_pull(skb, (skb_transport_offset(skb) +
((skb_transport_header(skb)[1] + 1) << 3)))) {
- __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
kfree_skb(skb);
return -1;
}
@@ -508,8 +505,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) ||
skb->pkt_type != PACKET_HOST) {
- __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INADDRERRORS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
kfree_skb(skb);
return -1;
}
@@ -527,7 +523,7 @@ looped_back:
* processed by own
*/
if (!addr) {
- __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ __IP6_INC_STATS(net, idev,
IPSTATS_MIB_INADDRERRORS);
kfree_skb(skb);
return -1;
@@ -553,8 +549,7 @@ looped_back:
goto unknown_rh;
/* Silently discard invalid RTH type 2 */
if (hdr->hdrlen != 2 || hdr->segments_left != 1) {
- __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
kfree_skb(skb);
return -1;
}
@@ -572,8 +567,7 @@ looped_back:
n = hdr->hdrlen >> 1;
if (hdr->segments_left > n) {
- __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
((&hdr->segments_left) -
skb_network_header(skb)));
@@ -609,14 +603,12 @@ looped_back:
if (xfrm6_input_addr(skb, (xfrm_address_t *)addr,
(xfrm_address_t *)&ipv6_hdr(skb)->saddr,
IPPROTO_ROUTING) < 0) {
- __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INADDRERRORS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
kfree_skb(skb);
return -1;
}
if (!ipv6_chk_home_addr(dev_net(skb_dst(skb)->dev), addr)) {
- __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INADDRERRORS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
kfree_skb(skb);
return -1;
}
@@ -627,8 +619,7 @@ looped_back:
}
if (ipv6_addr_is_multicast(addr)) {
- __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INADDRERRORS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
kfree_skb(skb);
return -1;
}
@@ -647,8 +638,7 @@ looped_back:
if (skb_dst(skb)->dev->flags&IFF_LOOPBACK) {
if (ipv6_hdr(skb)->hop_limit <= 1) {
- __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
0);
kfree_skb(skb);
@@ -663,7 +653,7 @@ looped_back:
return -1;
unknown_rh:
- __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
(&hdr->type) - skb_network_header(skb));
return -1;
@@ -755,34 +745,31 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff)
static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
{
const unsigned char *nh = skb_network_header(skb);
+ struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
struct net *net = ipv6_skb_net(skb);
u32 pkt_len;
if (nh[optoff + 1] != 4 || (optoff & 3) != 2) {
net_dbg_ratelimited("ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n",
nh[optoff+1]);
- __IP6_INC_STATS(net, ipv6_skb_idev(skb),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
goto drop;
}
pkt_len = ntohl(*(__be32 *)(nh + optoff + 2));
if (pkt_len <= IPV6_MAXPLEN) {
- __IP6_INC_STATS(net, ipv6_skb_idev(skb),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
return false;
}
if (ipv6_hdr(skb)->payload_len) {
- __IP6_INC_STATS(net, ipv6_skb_idev(skb),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff);
return false;
}
if (pkt_len > skb->len - sizeof(struct ipv6hdr)) {
- __IP6_INC_STATS(net, ipv6_skb_idev(skb),
- IPSTATS_MIB_INTRUNCATEDPKTS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTRUNCATEDPKTS);
goto drop;
}
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index df113c7b5fc8..6547fc6491a6 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -245,15 +245,18 @@ static const struct nla_policy fib6_rule_policy[FRA_MAX+1] = {
static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct fib_rule_hdr *frh,
- struct nlattr **tb)
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
{
int err = -EINVAL;
struct net *net = sock_net(skb->sk);
struct fib6_rule *rule6 = (struct fib6_rule *) rule;
if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) {
- if (rule->table == RT6_TABLE_UNSPEC)
+ if (rule->table == RT6_TABLE_UNSPEC) {
+ NL_SET_ERR_MSG(extack, "Invalid table");
goto errout;
+ }
if (fib6_new_table(net, rule->table) == NULL) {
err = -ENOBUFS;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index deab2db6692e..6421c893466e 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -43,7 +43,7 @@ static struct kmem_cache *fib6_node_kmem __read_mostly;
struct fib6_cleaner {
struct fib6_walker w;
struct net *net;
- int (*func)(struct rt6_info *, void *arg);
+ int (*func)(struct fib6_info *, void *arg);
int sernum;
void *arg;
};
@@ -54,7 +54,7 @@ struct fib6_cleaner {
#define FWS_INIT FWS_L
#endif
-static struct rt6_info *fib6_find_prefix(struct net *net,
+static struct fib6_info *fib6_find_prefix(struct net *net,
struct fib6_table *table,
struct fib6_node *fn);
static struct fib6_node *fib6_repair_tree(struct net *net,
@@ -105,13 +105,12 @@ enum {
FIB6_NO_SERNUM_CHANGE = 0,
};
-void fib6_update_sernum(struct rt6_info *rt)
+void fib6_update_sernum(struct net *net, struct fib6_info *f6i)
{
- struct net *net = dev_net(rt->dst.dev);
struct fib6_node *fn;
- fn = rcu_dereference_protected(rt->rt6i_node,
- lockdep_is_held(&rt->rt6i_table->tb6_lock));
+ fn = rcu_dereference_protected(f6i->fib6_node,
+ lockdep_is_held(&f6i->fib6_table->tb6_lock));
if (fn)
fn->fn_sernum = fib6_new_sernum(net);
}
@@ -146,6 +145,69 @@ static __be32 addr_bit_set(const void *token, int fn_bit)
addr[fn_bit >> 5];
}
+struct fib6_info *fib6_info_alloc(gfp_t gfp_flags)
+{
+ struct fib6_info *f6i;
+
+ f6i = kzalloc(sizeof(*f6i), gfp_flags);
+ if (!f6i)
+ return NULL;
+
+ f6i->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags);
+ if (!f6i->rt6i_pcpu) {
+ kfree(f6i);
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&f6i->fib6_siblings);
+ f6i->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
+
+ atomic_inc(&f6i->fib6_ref);
+
+ return f6i;
+}
+
+void fib6_info_destroy(struct fib6_info *f6i)
+{
+ struct rt6_exception_bucket *bucket;
+ struct dst_metrics *m;
+
+ WARN_ON(f6i->fib6_node);
+
+ bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket, 1);
+ if (bucket) {
+ f6i->rt6i_exception_bucket = NULL;
+ kfree(bucket);
+ }
+
+ if (f6i->rt6i_pcpu) {
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct rt6_info **ppcpu_rt;
+ struct rt6_info *pcpu_rt;
+
+ ppcpu_rt = per_cpu_ptr(f6i->rt6i_pcpu, cpu);
+ pcpu_rt = *ppcpu_rt;
+ if (pcpu_rt) {
+ dst_dev_put(&pcpu_rt->dst);
+ dst_release(&pcpu_rt->dst);
+ *ppcpu_rt = NULL;
+ }
+ }
+ }
+
+ if (f6i->fib6_nh.nh_dev)
+ dev_put(f6i->fib6_nh.nh_dev);
+
+ m = f6i->fib6_metrics;
+ if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
+ kfree(m);
+
+ kfree(f6i);
+}
+EXPORT_SYMBOL_GPL(fib6_info_destroy);
+
static struct fib6_node *node_alloc(struct net *net)
{
struct fib6_node *fn;
@@ -176,28 +238,6 @@ static void node_free(struct net *net, struct fib6_node *fn)
net->ipv6.rt6_stats->fib_nodes--;
}
-void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
-{
- int cpu;
-
- if (!non_pcpu_rt->rt6i_pcpu)
- return;
-
- for_each_possible_cpu(cpu) {
- struct rt6_info **ppcpu_rt;
- struct rt6_info *pcpu_rt;
-
- ppcpu_rt = per_cpu_ptr(non_pcpu_rt->rt6i_pcpu, cpu);
- pcpu_rt = *ppcpu_rt;
- if (pcpu_rt) {
- dst_dev_put(&pcpu_rt->dst);
- dst_release(&pcpu_rt->dst);
- *ppcpu_rt = NULL;
- }
- }
-}
-EXPORT_SYMBOL_GPL(rt6_free_pcpu);
-
static void fib6_free_table(struct fib6_table *table)
{
inetpeer_invalidate_tree(&table->tb6_peers);
@@ -232,7 +272,7 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
if (table) {
table->tb6_id = id;
rcu_assign_pointer(table->tb6_root.leaf,
- net->ipv6.ip6_null_entry);
+ net->ipv6.fib6_null_entry);
table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&table->tb6_peers);
}
@@ -340,7 +380,7 @@ unsigned int fib6_tables_seq_read(struct net *net)
static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net,
enum fib_event_type event_type,
- struct rt6_info *rt)
+ struct fib6_info *rt)
{
struct fib6_entry_notifier_info info = {
.rt = rt,
@@ -351,7 +391,7 @@ static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net,
static int call_fib6_entry_notifiers(struct net *net,
enum fib_event_type event_type,
- struct rt6_info *rt,
+ struct fib6_info *rt,
struct netlink_ext_ack *extack)
{
struct fib6_entry_notifier_info info = {
@@ -359,7 +399,7 @@ static int call_fib6_entry_notifiers(struct net *net,
.rt = rt,
};
- rt->rt6i_table->fib_seq++;
+ rt->fib6_table->fib_seq++;
return call_fib6_notifiers(net, event_type, &info.info);
}
@@ -368,16 +408,16 @@ struct fib6_dump_arg {
struct notifier_block *nb;
};
-static void fib6_rt_dump(struct rt6_info *rt, struct fib6_dump_arg *arg)
+static void fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg)
{
- if (rt == arg->net->ipv6.ip6_null_entry)
+ if (rt == arg->net->ipv6.fib6_null_entry)
return;
call_fib6_entry_notifier(arg->nb, arg->net, FIB_EVENT_ENTRY_ADD, rt);
}
static int fib6_node_dump(struct fib6_walker *w)
{
- struct rt6_info *rt;
+ struct fib6_info *rt;
for_each_fib6_walker_rt(w)
fib6_rt_dump(rt, w->args);
@@ -426,7 +466,7 @@ int fib6_tables_dump(struct net *net, struct notifier_block *nb)
static int fib6_dump_node(struct fib6_walker *w)
{
int res;
- struct rt6_info *rt;
+ struct fib6_info *rt;
for_each_fib6_walker_rt(w) {
res = rt6_dump_route(rt, w->args);
@@ -441,10 +481,10 @@ static int fib6_dump_node(struct fib6_walker *w)
* last sibling of this route (no need to dump the
* sibling routes again)
*/
- if (rt->rt6i_nsiblings)
- rt = list_last_entry(&rt->rt6i_siblings,
- struct rt6_info,
- rt6i_siblings);
+ if (rt->fib6_nsiblings)
+ rt = list_last_entry(&rt->fib6_siblings,
+ struct fib6_info,
+ fib6_siblings);
}
w->leaf = NULL;
return 0;
@@ -579,6 +619,24 @@ out:
return res;
}
+void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val)
+{
+ if (!f6i)
+ return;
+
+ if (f6i->fib6_metrics == &dst_default_metrics) {
+ struct dst_metrics *p = kzalloc(sizeof(*p), GFP_ATOMIC);
+
+ if (!p)
+ return;
+
+ refcount_set(&p->refcnt, 1);
+ f6i->fib6_metrics = p;
+ }
+
+ f6i->fib6_metrics->metrics[metric - 1] = val;
+}
+
/*
* Routing Table
*
@@ -608,7 +666,7 @@ static struct fib6_node *fib6_add_1(struct net *net,
fn = root;
do {
- struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
+ struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
lockdep_is_held(&table->tb6_lock));
key = (struct rt6key *)((u8 *)leaf + offset);
@@ -637,11 +695,11 @@ static struct fib6_node *fib6_add_1(struct net *net,
/* clean up an intermediate node */
if (!(fn->fn_flags & RTN_RTINFO)) {
RCU_INIT_POINTER(fn->leaf, NULL);
- rt6_release(leaf);
+ fib6_info_release(leaf);
/* remove null_entry in the root node */
} else if (fn->fn_flags & RTN_TL_ROOT &&
rcu_access_pointer(fn->leaf) ==
- net->ipv6.ip6_null_entry) {
+ net->ipv6.fib6_null_entry) {
RCU_INIT_POINTER(fn->leaf, NULL);
}
@@ -750,7 +808,7 @@ insert_above:
RCU_INIT_POINTER(in->parent, pn);
in->leaf = fn->leaf;
atomic_inc(&rcu_dereference_protected(in->leaf,
- lockdep_is_held(&table->tb6_lock))->rt6i_ref);
+ lockdep_is_held(&table->tb6_lock))->fib6_ref);
/* update parent pointer */
if (dir)
@@ -802,44 +860,37 @@ insert_above:
return ln;
}
-static void fib6_copy_metrics(u32 *mp, const struct mx6_config *mxc)
-{
- int i;
-
- for (i = 0; i < RTAX_MAX; i++) {
- if (test_bit(i, mxc->mx_valid))
- mp[i] = mxc->mx[i];
- }
-}
-
-static int fib6_commit_metrics(struct dst_entry *dst, struct mx6_config *mxc)
+static void fib6_drop_pcpu_from(struct fib6_info *f6i,
+ const struct fib6_table *table)
{
- if (!mxc->mx)
- return 0;
-
- if (dst->flags & DST_HOST) {
- u32 *mp = dst_metrics_write_ptr(dst);
+ int cpu;
- if (unlikely(!mp))
- return -ENOMEM;
+ /* release the reference to this fib entry from
+ * all of its cached pcpu routes
+ */
+ for_each_possible_cpu(cpu) {
+ struct rt6_info **ppcpu_rt;
+ struct rt6_info *pcpu_rt;
- fib6_copy_metrics(mp, mxc);
- } else {
- dst_init_metrics(dst, mxc->mx, false);
+ ppcpu_rt = per_cpu_ptr(f6i->rt6i_pcpu, cpu);
+ pcpu_rt = *ppcpu_rt;
+ if (pcpu_rt) {
+ struct fib6_info *from;
- /* We've stolen mx now. */
- mxc->mx = NULL;
+ from = rcu_dereference_protected(pcpu_rt->from,
+ lockdep_is_held(&table->tb6_lock));
+ rcu_assign_pointer(pcpu_rt->from, NULL);
+ fib6_info_release(from);
+ }
}
-
- return 0;
}
-static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
+static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
struct net *net)
{
- struct fib6_table *table = rt->rt6i_table;
+ struct fib6_table *table = rt->fib6_table;
- if (atomic_read(&rt->rt6i_ref) != 1) {
+ if (atomic_read(&rt->fib6_ref) != 1) {
/* This route is used as dummy address holder in some split
* nodes. It is not leaked, but it still holds other resources,
* which must be released in time. So, scan ascendant nodes
@@ -847,18 +898,22 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
* to still alive ones.
*/
while (fn) {
- struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
+ struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
lockdep_is_held(&table->tb6_lock));
- struct rt6_info *new_leaf;
+ struct fib6_info *new_leaf;
if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) {
new_leaf = fib6_find_prefix(net, table, fn);
- atomic_inc(&new_leaf->rt6i_ref);
+ atomic_inc(&new_leaf->fib6_ref);
+
rcu_assign_pointer(fn->leaf, new_leaf);
- rt6_release(rt);
+ fib6_info_release(rt);
}
fn = rcu_dereference_protected(fn->parent,
lockdep_is_held(&table->tb6_lock));
}
+
+ if (rt->rt6i_pcpu)
+ fib6_drop_pcpu_from(rt, table);
}
}
@@ -866,15 +921,15 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
* Insert routing information in a node.
*/
-static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
- struct nl_info *info, struct mx6_config *mxc,
+static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
+ struct nl_info *info,
struct netlink_ext_ack *extack)
{
- struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
- lockdep_is_held(&rt->rt6i_table->tb6_lock));
- struct rt6_info *iter = NULL;
- struct rt6_info __rcu **ins;
- struct rt6_info __rcu **fallback_ins = NULL;
+ struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
+ lockdep_is_held(&rt->fib6_table->tb6_lock));
+ struct fib6_info *iter = NULL;
+ struct fib6_info __rcu **ins;
+ struct fib6_info __rcu **fallback_ins = NULL;
int replace = (info->nlh &&
(info->nlh->nlmsg_flags & NLM_F_REPLACE));
int add = (!info->nlh ||
@@ -891,12 +946,12 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
for (iter = leaf; iter;
iter = rcu_dereference_protected(iter->rt6_next,
- lockdep_is_held(&rt->rt6i_table->tb6_lock))) {
+ lockdep_is_held(&rt->fib6_table->tb6_lock))) {
/*
* Search for duplicates
*/
- if (iter->rt6i_metric == rt->rt6i_metric) {
+ if (iter->fib6_metric == rt->fib6_metric) {
/*
* Same priority level
*/
@@ -916,15 +971,15 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
}
if (rt6_duplicate_nexthop(iter, rt)) {
- if (rt->rt6i_nsiblings)
- rt->rt6i_nsiblings = 0;
- if (!(iter->rt6i_flags & RTF_EXPIRES))
+ if (rt->fib6_nsiblings)
+ rt->fib6_nsiblings = 0;
+ if (!(iter->fib6_flags & RTF_EXPIRES))
return -EEXIST;
- if (!(rt->rt6i_flags & RTF_EXPIRES))
- rt6_clean_expires(iter);
+ if (!(rt->fib6_flags & RTF_EXPIRES))
+ fib6_clean_expires(iter);
else
- rt6_set_expires(iter, rt->dst.expires);
- iter->rt6i_pmtu = rt->rt6i_pmtu;
+ fib6_set_expires(iter, rt->expires);
+ fib6_metric_set(iter, RTAX_MTU, rt->fib6_pmtu);
return -EEXIST;
}
/* If we have the same destination and the same metric,
@@ -940,10 +995,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
*/
if (rt_can_ecmp &&
rt6_qualify_for_ecmp(iter))
- rt->rt6i_nsiblings++;
+ rt->fib6_nsiblings++;
}
- if (iter->rt6i_metric > rt->rt6i_metric)
+ if (iter->fib6_metric > rt->fib6_metric)
break;
next_iter:
@@ -954,7 +1009,7 @@ next_iter:
/* No ECMP-able route found, replace first non-ECMP one */
ins = fallback_ins;
iter = rcu_dereference_protected(*ins,
- lockdep_is_held(&rt->rt6i_table->tb6_lock));
+ lockdep_is_held(&rt->fib6_table->tb6_lock));
found++;
}
@@ -963,34 +1018,34 @@ next_iter:
fn->rr_ptr = NULL;
/* Link this route to others same route. */
- if (rt->rt6i_nsiblings) {
- unsigned int rt6i_nsiblings;
- struct rt6_info *sibling, *temp_sibling;
+ if (rt->fib6_nsiblings) {
+ unsigned int fib6_nsiblings;
+ struct fib6_info *sibling, *temp_sibling;
/* Find the first route that have the same metric */
sibling = leaf;
while (sibling) {
- if (sibling->rt6i_metric == rt->rt6i_metric &&
+ if (sibling->fib6_metric == rt->fib6_metric &&
rt6_qualify_for_ecmp(sibling)) {
- list_add_tail(&rt->rt6i_siblings,
- &sibling->rt6i_siblings);
+ list_add_tail(&rt->fib6_siblings,
+ &sibling->fib6_siblings);
break;
}
sibling = rcu_dereference_protected(sibling->rt6_next,
- lockdep_is_held(&rt->rt6i_table->tb6_lock));
+ lockdep_is_held(&rt->fib6_table->tb6_lock));
}
/* For each sibling in the list, increment the counter of
* siblings. BUG() if counters does not match, list of siblings
* is broken!
*/
- rt6i_nsiblings = 0;
+ fib6_nsiblings = 0;
list_for_each_entry_safe(sibling, temp_sibling,
- &rt->rt6i_siblings, rt6i_siblings) {
- sibling->rt6i_nsiblings++;
- BUG_ON(sibling->rt6i_nsiblings != rt->rt6i_nsiblings);
- rt6i_nsiblings++;
+ &rt->fib6_siblings, fib6_siblings) {
+ sibling->fib6_nsiblings++;
+ BUG_ON(sibling->fib6_nsiblings != rt->fib6_nsiblings);
+ fib6_nsiblings++;
}
- BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings);
+ BUG_ON(fib6_nsiblings != rt->fib6_nsiblings);
rt6_multipath_rebalance(temp_sibling);
}
@@ -1003,9 +1058,6 @@ next_iter:
add:
nlflags |= NLM_F_CREATE;
- err = fib6_commit_metrics(&rt->dst, mxc);
- if (err)
- return err;
err = call_fib6_entry_notifiers(info->nl_net,
FIB_EVENT_ENTRY_ADD,
@@ -1014,8 +1066,8 @@ add:
return err;
rcu_assign_pointer(rt->rt6_next, iter);
- atomic_inc(&rt->rt6i_ref);
- rcu_assign_pointer(rt->rt6i_node, fn);
+ atomic_inc(&rt->fib6_ref);
+ rcu_assign_pointer(rt->fib6_node, fn);
rcu_assign_pointer(*ins, rt);
if (!info->skip_notify)
inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
@@ -1036,18 +1088,14 @@ add:
return -ENOENT;
}
- err = fib6_commit_metrics(&rt->dst, mxc);
- if (err)
- return err;
-
err = call_fib6_entry_notifiers(info->nl_net,
FIB_EVENT_ENTRY_REPLACE,
rt, extack);
if (err)
return err;
- atomic_inc(&rt->rt6i_ref);
- rcu_assign_pointer(rt->rt6i_node, fn);
+ atomic_inc(&rt->fib6_ref);
+ rcu_assign_pointer(rt->fib6_node, fn);
rt->rt6_next = iter->rt6_next;
rcu_assign_pointer(*ins, rt);
if (!info->skip_notify)
@@ -1056,35 +1104,35 @@ add:
info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
fn->fn_flags |= RTN_RTINFO;
}
- nsiblings = iter->rt6i_nsiblings;
- iter->rt6i_node = NULL;
+ nsiblings = iter->fib6_nsiblings;
+ iter->fib6_node = NULL;
fib6_purge_rt(iter, fn, info->nl_net);
if (rcu_access_pointer(fn->rr_ptr) == iter)
fn->rr_ptr = NULL;
- rt6_release(iter);
+ fib6_info_release(iter);
if (nsiblings) {
/* Replacing an ECMP route, remove all siblings */
ins = &rt->rt6_next;
iter = rcu_dereference_protected(*ins,
- lockdep_is_held(&rt->rt6i_table->tb6_lock));
+ lockdep_is_held(&rt->fib6_table->tb6_lock));
while (iter) {
- if (iter->rt6i_metric > rt->rt6i_metric)
+ if (iter->fib6_metric > rt->fib6_metric)
break;
if (rt6_qualify_for_ecmp(iter)) {
*ins = iter->rt6_next;
- iter->rt6i_node = NULL;
+ iter->fib6_node = NULL;
fib6_purge_rt(iter, fn, info->nl_net);
if (rcu_access_pointer(fn->rr_ptr) == iter)
fn->rr_ptr = NULL;
- rt6_release(iter);
+ fib6_info_release(iter);
nsiblings--;
info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
} else {
ins = &iter->rt6_next;
}
iter = rcu_dereference_protected(*ins,
- lockdep_is_held(&rt->rt6i_table->tb6_lock));
+ lockdep_is_held(&rt->fib6_table->tb6_lock));
}
WARN_ON(nsiblings != 0);
}
@@ -1093,10 +1141,10 @@ add:
return 0;
}
-static void fib6_start_gc(struct net *net, struct rt6_info *rt)
+static void fib6_start_gc(struct net *net, struct fib6_info *rt)
{
if (!timer_pending(&net->ipv6.ip6_fib_timer) &&
- (rt->rt6i_flags & (RTF_EXPIRES | RTF_CACHE)))
+ (rt->fib6_flags & RTF_EXPIRES))
mod_timer(&net->ipv6.ip6_fib_timer,
jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
}
@@ -1108,22 +1156,22 @@ void fib6_force_start_gc(struct net *net)
jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
}
-static void __fib6_update_sernum_upto_root(struct rt6_info *rt,
+static void __fib6_update_sernum_upto_root(struct fib6_info *rt,
int sernum)
{
- struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
- lockdep_is_held(&rt->rt6i_table->tb6_lock));
+ struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node,
+ lockdep_is_held(&rt->fib6_table->tb6_lock));
/* paired with smp_rmb() in rt6_get_cookie_safe() */
smp_wmb();
while (fn) {
fn->fn_sernum = sernum;
fn = rcu_dereference_protected(fn->parent,
- lockdep_is_held(&rt->rt6i_table->tb6_lock));
+ lockdep_is_held(&rt->fib6_table->tb6_lock));
}
}
-void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt)
+void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt)
{
__fib6_update_sernum_upto_root(rt, fib6_new_sernum(net));
}
@@ -1135,22 +1183,16 @@ void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt)
* Need to own table->tb6_lock
*/
-int fib6_add(struct fib6_node *root, struct rt6_info *rt,
- struct nl_info *info, struct mx6_config *mxc,
- struct netlink_ext_ack *extack)
+int fib6_add(struct fib6_node *root, struct fib6_info *rt,
+ struct nl_info *info, struct netlink_ext_ack *extack)
{
- struct fib6_table *table = rt->rt6i_table;
+ struct fib6_table *table = rt->fib6_table;
struct fib6_node *fn, *pn = NULL;
int err = -ENOMEM;
int allow_create = 1;
int replace_required = 0;
int sernum = fib6_new_sernum(info->nl_net);
- if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt)))
- return -EINVAL;
- if (WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE))
- return -EINVAL;
-
if (info->nlh) {
if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
allow_create = 0;
@@ -1161,8 +1203,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n");
fn = fib6_add_1(info->nl_net, table, root,
- &rt->rt6i_dst.addr, rt->rt6i_dst.plen,
- offsetof(struct rt6_info, rt6i_dst), allow_create,
+ &rt->fib6_dst.addr, rt->fib6_dst.plen,
+ offsetof(struct fib6_info, fib6_dst), allow_create,
replace_required, extack);
if (IS_ERR(fn)) {
err = PTR_ERR(fn);
@@ -1173,7 +1215,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
pn = fn;
#ifdef CONFIG_IPV6_SUBTREES
- if (rt->rt6i_src.plen) {
+ if (rt->fib6_src.plen) {
struct fib6_node *sn;
if (!rcu_access_pointer(fn->subtree)) {
@@ -1194,16 +1236,16 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
if (!sfn)
goto failure;
- atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref);
+ atomic_inc(&info->nl_net->ipv6.fib6_null_entry->fib6_ref);
rcu_assign_pointer(sfn->leaf,
- info->nl_net->ipv6.ip6_null_entry);
+ info->nl_net->ipv6.fib6_null_entry);
sfn->fn_flags = RTN_ROOT;
/* Now add the first leaf node to new subtree */
sn = fib6_add_1(info->nl_net, table, sfn,
- &rt->rt6i_src.addr, rt->rt6i_src.plen,
- offsetof(struct rt6_info, rt6i_src),
+ &rt->fib6_src.addr, rt->fib6_src.plen,
+ offsetof(struct fib6_info, fib6_src),
allow_create, replace_required, extack);
if (IS_ERR(sn)) {
@@ -1221,8 +1263,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
rcu_assign_pointer(fn->subtree, sfn);
} else {
sn = fib6_add_1(info->nl_net, table, FIB6_SUBTREE(fn),
- &rt->rt6i_src.addr, rt->rt6i_src.plen,
- offsetof(struct rt6_info, rt6i_src),
+ &rt->fib6_src.addr, rt->fib6_src.plen,
+ offsetof(struct fib6_info, fib6_src),
allow_create, replace_required, extack);
if (IS_ERR(sn)) {
@@ -1235,9 +1277,9 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
if (fn->fn_flags & RTN_TL_ROOT) {
/* put back null_entry for root node */
rcu_assign_pointer(fn->leaf,
- info->nl_net->ipv6.ip6_null_entry);
+ info->nl_net->ipv6.fib6_null_entry);
} else {
- atomic_inc(&rt->rt6i_ref);
+ atomic_inc(&rt->fib6_ref);
rcu_assign_pointer(fn->leaf, rt);
}
}
@@ -1245,7 +1287,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
}
#endif
- err = fib6_add_rt2node(fn, rt, info, mxc, extack);
+ err = fib6_add_rt2node(fn, rt, info, extack);
if (!err) {
__fib6_update_sernum_upto_root(rt, sernum);
fib6_start_gc(info->nl_net, rt);
@@ -1259,13 +1301,13 @@ out:
* super-tree leaf node we have to find a new one for it.
*/
if (pn != fn) {
- struct rt6_info *pn_leaf =
+ struct fib6_info *pn_leaf =
rcu_dereference_protected(pn->leaf,
lockdep_is_held(&table->tb6_lock));
if (pn_leaf == rt) {
pn_leaf = NULL;
RCU_INIT_POINTER(pn->leaf, NULL);
- atomic_dec(&rt->rt6i_ref);
+ fib6_info_release(rt);
}
if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
pn_leaf = fib6_find_prefix(info->nl_net, table,
@@ -1274,10 +1316,10 @@ out:
if (!pn_leaf) {
WARN_ON(!pn_leaf);
pn_leaf =
- info->nl_net->ipv6.ip6_null_entry;
+ info->nl_net->ipv6.fib6_null_entry;
}
#endif
- atomic_inc(&pn_leaf->rt6i_ref);
+ fib6_info_hold(pn_leaf);
rcu_assign_pointer(pn->leaf, pn_leaf);
}
}
@@ -1299,10 +1341,6 @@ failure:
(fn->fn_flags & RTN_TL_ROOT &&
!rcu_access_pointer(fn->leaf))))
fib6_repair_tree(info->nl_net, table, fn);
- /* Always release dst as dst->__refcnt is guaranteed
- * to be taken before entering this function
- */
- dst_release_immediate(&rt->dst);
return err;
}
@@ -1312,7 +1350,7 @@ failure:
*/
struct lookup_args {
- int offset; /* key offset on rt6_info */
+ int offset; /* key offset on fib6_info */
const struct in6_addr *addr; /* search key */
};
@@ -1350,7 +1388,7 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
struct fib6_node *subtree = FIB6_SUBTREE(fn);
if (subtree || fn->fn_flags & RTN_RTINFO) {
- struct rt6_info *leaf = rcu_dereference(fn->leaf);
+ struct fib6_info *leaf = rcu_dereference(fn->leaf);
struct rt6key *key;
if (!leaf)
@@ -1390,12 +1428,12 @@ struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *dad
struct fib6_node *fn;
struct lookup_args args[] = {
{
- .offset = offsetof(struct rt6_info, rt6i_dst),
+ .offset = offsetof(struct fib6_info, fib6_dst),
.addr = daddr,
},
#ifdef CONFIG_IPV6_SUBTREES
{
- .offset = offsetof(struct rt6_info, rt6i_src),
+ .offset = offsetof(struct fib6_info, fib6_src),
.addr = saddr,
},
#endif
@@ -1431,7 +1469,7 @@ static struct fib6_node *fib6_locate_1(struct fib6_node *root,
struct fib6_node *fn, *prev = NULL;
for (fn = root; fn ; ) {
- struct rt6_info *leaf = rcu_dereference(fn->leaf);
+ struct fib6_info *leaf = rcu_dereference(fn->leaf);
struct rt6key *key;
/* This node is being deleted */
@@ -1480,7 +1518,7 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
struct fib6_node *fn;
fn = fib6_locate_1(root, daddr, dst_len,
- offsetof(struct rt6_info, rt6i_dst),
+ offsetof(struct fib6_info, fib6_dst),
exact_match);
#ifdef CONFIG_IPV6_SUBTREES
@@ -1491,7 +1529,7 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
if (subtree) {
fn = fib6_locate_1(subtree, saddr, src_len,
- offsetof(struct rt6_info, rt6i_src),
+ offsetof(struct fib6_info, fib6_src),
exact_match);
}
}
@@ -1510,14 +1548,14 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
*
*/
-static struct rt6_info *fib6_find_prefix(struct net *net,
+static struct fib6_info *fib6_find_prefix(struct net *net,
struct fib6_table *table,
struct fib6_node *fn)
{
struct fib6_node *child_left, *child_right;
if (fn->fn_flags & RTN_ROOT)
- return net->ipv6.ip6_null_entry;
+ return net->ipv6.fib6_null_entry;
while (fn) {
child_left = rcu_dereference_protected(fn->left,
@@ -1554,7 +1592,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
/* Set fn->leaf to null_entry for root node. */
if (fn->fn_flags & RTN_TL_ROOT) {
- rcu_assign_pointer(fn->leaf, net->ipv6.ip6_null_entry);
+ rcu_assign_pointer(fn->leaf, net->ipv6.fib6_null_entry);
return fn;
}
@@ -1569,11 +1607,11 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
lockdep_is_held(&table->tb6_lock));
struct fib6_node *pn_l = rcu_dereference_protected(pn->left,
lockdep_is_held(&table->tb6_lock));
- struct rt6_info *fn_leaf = rcu_dereference_protected(fn->leaf,
+ struct fib6_info *fn_leaf = rcu_dereference_protected(fn->leaf,
lockdep_is_held(&table->tb6_lock));
- struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
+ struct fib6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
lockdep_is_held(&table->tb6_lock));
- struct rt6_info *new_fn_leaf;
+ struct fib6_info *new_fn_leaf;
RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
iter++;
@@ -1599,10 +1637,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
#if RT6_DEBUG >= 2
if (!new_fn_leaf) {
WARN_ON(!new_fn_leaf);
- new_fn_leaf = net->ipv6.ip6_null_entry;
+ new_fn_leaf = net->ipv6.fib6_null_entry;
}
#endif
- atomic_inc(&new_fn_leaf->rt6i_ref);
+ fib6_info_hold(new_fn_leaf);
rcu_assign_pointer(fn->leaf, new_fn_leaf);
return pn;
}
@@ -1658,26 +1696,24 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
return pn;
RCU_INIT_POINTER(pn->leaf, NULL);
- rt6_release(pn_leaf);
+ fib6_info_release(pn_leaf);
fn = pn;
}
}
static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
- struct rt6_info __rcu **rtp, struct nl_info *info)
+ struct fib6_info __rcu **rtp, struct nl_info *info)
{
struct fib6_walker *w;
- struct rt6_info *rt = rcu_dereference_protected(*rtp,
+ struct fib6_info *rt = rcu_dereference_protected(*rtp,
lockdep_is_held(&table->tb6_lock));
struct net *net = info->nl_net;
RT6_TRACE("fib6_del_route\n");
- WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE);
-
/* Unlink it */
*rtp = rt->rt6_next;
- rt->rt6i_node = NULL;
+ rt->fib6_node = NULL;
net->ipv6.rt6_stats->fib_rt_entries--;
net->ipv6.rt6_stats->fib_discarded_routes++;
@@ -1689,14 +1725,14 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
fn->rr_ptr = NULL;
/* Remove this entry from other siblings */
- if (rt->rt6i_nsiblings) {
- struct rt6_info *sibling, *next_sibling;
+ if (rt->fib6_nsiblings) {
+ struct fib6_info *sibling, *next_sibling;
list_for_each_entry_safe(sibling, next_sibling,
- &rt->rt6i_siblings, rt6i_siblings)
- sibling->rt6i_nsiblings--;
- rt->rt6i_nsiblings = 0;
- list_del_init(&rt->rt6i_siblings);
+ &rt->fib6_siblings, fib6_siblings)
+ sibling->fib6_nsiblings--;
+ rt->fib6_nsiblings = 0;
+ list_del_init(&rt->fib6_siblings);
rt6_multipath_rebalance(next_sibling);
}
@@ -1730,40 +1766,30 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL);
if (!info->skip_notify)
inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
- rt6_release(rt);
+ fib6_info_release(rt);
}
/* Need to own table->tb6_lock */
-int fib6_del(struct rt6_info *rt, struct nl_info *info)
+int fib6_del(struct fib6_info *rt, struct nl_info *info)
{
- struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
- lockdep_is_held(&rt->rt6i_table->tb6_lock));
- struct fib6_table *table = rt->rt6i_table;
+ struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node,
+ lockdep_is_held(&rt->fib6_table->tb6_lock));
+ struct fib6_table *table = rt->fib6_table;
struct net *net = info->nl_net;
- struct rt6_info __rcu **rtp;
- struct rt6_info __rcu **rtp_next;
+ struct fib6_info __rcu **rtp;
+ struct fib6_info __rcu **rtp_next;
-#if RT6_DEBUG >= 2
- if (rt->dst.obsolete > 0) {
- WARN_ON(fn);
- return -ENOENT;
- }
-#endif
- if (!fn || rt == net->ipv6.ip6_null_entry)
+ if (!fn || rt == net->ipv6.fib6_null_entry)
return -ENOENT;
WARN_ON(!(fn->fn_flags & RTN_RTINFO));
- /* remove cached dst from exception table */
- if (rt->rt6i_flags & RTF_CACHE)
- return rt6_remove_exception_rt(rt);
-
/*
* Walk the leaf entries looking for ourself
*/
for (rtp = &fn->leaf; *rtp; rtp = rtp_next) {
- struct rt6_info *cur = rcu_dereference_protected(*rtp,
+ struct fib6_info *cur = rcu_dereference_protected(*rtp,
lockdep_is_held(&table->tb6_lock));
if (rt == cur) {
fib6_del_route(table, fn, rtp, info);
@@ -1907,7 +1933,7 @@ static int fib6_walk(struct net *net, struct fib6_walker *w)
static int fib6_clean_node(struct fib6_walker *w)
{
int res;
- struct rt6_info *rt;
+ struct fib6_info *rt;
struct fib6_cleaner *c = container_of(w, struct fib6_cleaner, w);
struct nl_info info = {
.nl_net = c->net,
@@ -1932,17 +1958,17 @@ static int fib6_clean_node(struct fib6_walker *w)
#if RT6_DEBUG >= 2
pr_debug("%s: del failed: rt=%p@%p err=%d\n",
__func__, rt,
- rcu_access_pointer(rt->rt6i_node),
+ rcu_access_pointer(rt->fib6_node),
res);
#endif
continue;
}
return 0;
} else if (res == -2) {
- if (WARN_ON(!rt->rt6i_nsiblings))
+ if (WARN_ON(!rt->fib6_nsiblings))
continue;
- rt = list_last_entry(&rt->rt6i_siblings,
- struct rt6_info, rt6i_siblings);
+ rt = list_last_entry(&rt->fib6_siblings,
+ struct fib6_info, fib6_siblings);
continue;
}
WARN_ON(res != 0);
@@ -1961,7 +1987,7 @@ static int fib6_clean_node(struct fib6_walker *w)
*/
static void fib6_clean_tree(struct net *net, struct fib6_node *root,
- int (*func)(struct rt6_info *, void *arg),
+ int (*func)(struct fib6_info *, void *arg),
int sernum, void *arg)
{
struct fib6_cleaner c;
@@ -1979,7 +2005,7 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root,
}
static void __fib6_clean_all(struct net *net,
- int (*func)(struct rt6_info *, void *),
+ int (*func)(struct fib6_info *, void *),
int sernum, void *arg)
{
struct fib6_table *table;
@@ -1999,7 +2025,7 @@ static void __fib6_clean_all(struct net *net,
rcu_read_unlock();
}
-void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *),
+void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *),
void *arg)
{
__fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg);
@@ -2016,7 +2042,7 @@ static void fib6_flush_trees(struct net *net)
* Garbage collection
*/
-static int fib6_age(struct rt6_info *rt, void *arg)
+static int fib6_age(struct fib6_info *rt, void *arg)
{
struct fib6_gc_args *gc_args = arg;
unsigned long now = jiffies;
@@ -2026,8 +2052,8 @@ static int fib6_age(struct rt6_info *rt, void *arg)
* Routes are expired even if they are in use.
*/
- if (rt->rt6i_flags & RTF_EXPIRES && rt->dst.expires) {
- if (time_after(now, rt->dst.expires)) {
+ if (rt->fib6_flags & RTF_EXPIRES && rt->expires) {
+ if (time_after(now, rt->expires)) {
RT6_TRACE("expiring %p\n", rt);
return -1;
}
@@ -2110,7 +2136,7 @@ static int __net_init fib6_net_init(struct net *net)
net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN;
rcu_assign_pointer(net->ipv6.fib6_main_tbl->tb6_root.leaf,
- net->ipv6.ip6_null_entry);
+ net->ipv6.fib6_null_entry);
net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
@@ -2122,7 +2148,7 @@ static int __net_init fib6_net_init(struct net *net)
goto out_fib6_main_tbl;
net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL;
rcu_assign_pointer(net->ipv6.fib6_local_tbl->tb6_root.leaf,
- net->ipv6.ip6_null_entry);
+ net->ipv6.fib6_null_entry);
net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
@@ -2220,25 +2246,26 @@ struct ipv6_route_iter {
static int ipv6_route_seq_show(struct seq_file *seq, void *v)
{
- struct rt6_info *rt = v;
+ struct fib6_info *rt = v;
struct ipv6_route_iter *iter = seq->private;
+ const struct net_device *dev;
- seq_printf(seq, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
+ seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);
#ifdef CONFIG_IPV6_SUBTREES
- seq_printf(seq, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
+ seq_printf(seq, "%pi6 %02x ", &rt->fib6_src.addr, rt->fib6_src.plen);
#else
seq_puts(seq, "00000000000000000000000000000000 00 ");
#endif
- if (rt->rt6i_flags & RTF_GATEWAY)
- seq_printf(seq, "%pi6", &rt->rt6i_gateway);
+ if (rt->fib6_flags & RTF_GATEWAY)
+ seq_printf(seq, "%pi6", &rt->fib6_nh.nh_gw);
else
seq_puts(seq, "00000000000000000000000000000000");
+ dev = rt->fib6_nh.nh_dev;
seq_printf(seq, " %08x %08x %08x %08x %8s\n",
- rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
- rt->dst.__use, rt->rt6i_flags,
- rt->dst.dev ? rt->dst.dev->name : "");
+ rt->fib6_metric, atomic_read(&rt->fib6_ref), 0,
+ rt->fib6_flags, dev ? dev->name : "");
iter->w.leaf = NULL;
return 0;
}
@@ -2311,14 +2338,14 @@ static void ipv6_route_check_sernum(struct ipv6_route_iter *iter)
static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
int r;
- struct rt6_info *n;
+ struct fib6_info *n;
struct net *net = seq_file_net(seq);
struct ipv6_route_iter *iter = seq->private;
if (!v)
goto iter_table;
- n = rcu_dereference_bh(((struct rt6_info *)v)->rt6_next);
+ n = rcu_dereference_bh(((struct fib6_info *)v)->rt6_next);
if (n) {
++*pos;
return n;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 69727bc168cb..04c69e0c84b3 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -807,7 +807,7 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
}
/**
- * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
+ * ip6gre_tnl_addr_conflict - compare packet addresses to tunnel's own
* @t: the outgoing tunnel device
* @hdr: IPv6 header from the incoming packet
*
@@ -896,6 +896,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
struct flowi6 fl6;
int err = -EINVAL;
__u32 mtu;
+ int nhoff;
if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr))
goto tx_err;
@@ -908,6 +909,11 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
truncate = true;
}
+ nhoff = skb_network_header(skb) - skb_mac_header(skb);
+ if (skb->protocol == htons(ETH_P_IP) &&
+ (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
+ truncate = true;
+
if (skb_cow_head(skb, dev->needed_headroom))
goto tx_err;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 9ee208a348f5..f08d34491ece 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -336,7 +336,7 @@ int ip6_mc_input(struct sk_buff *skb)
bool deliver;
__IP6_UPD_PO_STATS(dev_net(skb_dst(skb)->dev),
- ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INMCAST,
+ __in6_dev_get_safely(skb->dev), IPSTATS_MIB_INMCAST,
skb->len);
hdr = ipv6_hdr(skb);
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 4a87f9428ca5..5b3f2f89ef41 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -88,9 +88,11 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
if (skb->encapsulation &&
skb_shinfo(skb)->gso_type & (SKB_GSO_IPXIP4 | SKB_GSO_IPXIP6))
- udpfrag = proto == IPPROTO_UDP && encap;
+ udpfrag = proto == IPPROTO_UDP && encap &&
+ (skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
else
- udpfrag = proto == IPPROTO_UDP && !skb->encapsulation;
+ udpfrag = proto == IPPROTO_UDP && !skb->encapsulation &&
+ (skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
ops = rcu_dereference(inet6_offloads[proto]);
if (likely(ops && ops->callbacks.gso_segment)) {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 2e891d2c30ef..dfd8af41824e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -425,6 +425,7 @@ static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
int ip6_forward(struct sk_buff *skb)
{
+ struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
struct dst_entry *dst = skb_dst(skb);
struct ipv6hdr *hdr = ipv6_hdr(skb);
struct inet6_skb_parm *opt = IP6CB(skb);
@@ -444,8 +445,7 @@ int ip6_forward(struct sk_buff *skb)
goto drop;
if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
- __IP6_INC_STATS(net, ip6_dst_idev(dst),
- IPSTATS_MIB_INDISCARDS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
goto drop;
}
@@ -476,8 +476,7 @@ int ip6_forward(struct sk_buff *skb)
/* Force OUTPUT device used as source address */
skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
- __IP6_INC_STATS(net, ip6_dst_idev(dst),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
kfree_skb(skb);
return -ETIMEDOUT;
@@ -490,15 +489,13 @@ int ip6_forward(struct sk_buff *skb)
if (proxied > 0)
return ip6_input(skb);
else if (proxied < 0) {
- __IP6_INC_STATS(net, ip6_dst_idev(dst),
- IPSTATS_MIB_INDISCARDS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
goto drop;
}
}
if (!xfrm6_route_forward(skb)) {
- __IP6_INC_STATS(net, ip6_dst_idev(dst),
- IPSTATS_MIB_INDISCARDS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
goto drop;
}
dst = skb_dst(skb);
@@ -554,8 +551,7 @@ int ip6_forward(struct sk_buff *skb)
/* Again, force OUTPUT device used as source address */
skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- __IP6_INC_STATS(net, ip6_dst_idev(dst),
- IPSTATS_MIB_INTOOBIGERRORS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
__IP6_INC_STATS(net, ip6_dst_idev(dst),
IPSTATS_MIB_FRAGFAILS);
kfree_skb(skb);
@@ -579,7 +575,7 @@ int ip6_forward(struct sk_buff *skb)
ip6_forward_finish);
error:
- __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
drop:
kfree_skb(skb);
return -EINVAL;
@@ -966,15 +962,21 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
* that's why we try it again later.
*/
if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
+ struct fib6_info *from;
struct rt6_info *rt;
bool had_dst = *dst != NULL;
if (!had_dst)
*dst = ip6_route_output(net, sk, fl6);
rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
- err = ip6_route_get_saddr(net, rt, &fl6->daddr,
+
+ rcu_read_lock();
+ from = rt ? rcu_dereference(rt->from) : NULL;
+ err = ip6_route_get_saddr(net, from, &fl6->daddr,
sk ? inet6_sk(sk)->srcprefs : 0,
&fl6->saddr);
+ rcu_read_unlock();
+
if (err)
goto out_err_release;
@@ -1238,6 +1240,8 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
if (mtu < IPV6_MIN_MTU)
return -EINVAL;
cork->base.fragsize = mtu;
+ cork->base.gso_size = sk->sk_type == SOCK_DGRAM ? ipc6->gso_size : 0;
+
if (dst_allfrag(xfrm_dst_path(&rt->dst)))
cork->base.flags |= IPCORK_ALLFRAG;
cork->base.length = 0;
@@ -1272,6 +1276,7 @@ static int __ip6_append_data(struct sock *sk,
int csummode = CHECKSUM_NONE;
unsigned int maxnonfragsize, headersize;
unsigned int wmem_alloc_delta = 0;
+ bool paged;
skb = skb_peek_tail(queue);
if (!skb) {
@@ -1279,7 +1284,8 @@ static int __ip6_append_data(struct sock *sk,
dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
}
- mtu = cork->fragsize;
+ paged = !!cork->gso_size;
+ mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
orig_mtu = mtu;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
@@ -1327,7 +1333,7 @@ emsgsize:
if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
headersize == sizeof(struct ipv6hdr) &&
length <= mtu - headersize &&
- !(flags & MSG_MORE) &&
+ (!(flags & MSG_MORE) || cork->gso_size) &&
rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
csummode = CHECKSUM_PARTIAL;
@@ -1370,6 +1376,7 @@ emsgsize:
unsigned int fraglen;
unsigned int fraggap;
unsigned int alloclen;
+ unsigned int pagedlen = 0;
alloc_new_skb:
/* There's no room in the current skb */
if (skb)
@@ -1392,11 +1399,17 @@ alloc_new_skb:
if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
+ fraglen = datalen + fragheaderlen;
+
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
alloclen = mtu;
- else
- alloclen = datalen + fragheaderlen;
+ else if (!paged)
+ alloclen = fraglen;
+ else {
+ alloclen = min_t(int, fraglen, MAX_HEADER);
+ pagedlen = fraglen - alloclen;
+ }
alloclen += dst_exthdrlen;
@@ -1418,7 +1431,7 @@ alloc_new_skb:
*/
alloclen += sizeof(struct frag_hdr);
- copy = datalen - transhdrlen - fraggap;
+ copy = datalen - transhdrlen - fraggap - pagedlen;
if (copy < 0) {
err = -EINVAL;
goto error;
@@ -1457,7 +1470,7 @@ alloc_new_skb:
/*
* Find where to start putting bytes
*/
- data = skb_put(skb, fraglen);
+ data = skb_put(skb, fraglen - pagedlen);
skb_set_network_header(skb, exthdrlen);
data += fragheaderlen;
skb->transport_header = (skb->network_header +
@@ -1480,7 +1493,7 @@ alloc_new_skb:
}
offset += copy;
- length -= datalen - fraggap;
+ length -= copy + transhdrlen;
transhdrlen = 0;
exthdrlen = 0;
dst_exthdrlen = 0;
@@ -1753,9 +1766,9 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
void *from, int length, int transhdrlen,
struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
struct rt6_info *rt, unsigned int flags,
+ struct inet_cork_full *cork,
const struct sockcm_cookie *sockc)
{
- struct inet_cork_full cork;
struct inet6_cork v6_cork;
struct sk_buff_head queue;
int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
@@ -1766,27 +1779,27 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
__skb_queue_head_init(&queue);
- cork.base.flags = 0;
- cork.base.addr = 0;
- cork.base.opt = NULL;
- cork.base.dst = NULL;
+ cork->base.flags = 0;
+ cork->base.addr = 0;
+ cork->base.opt = NULL;
+ cork->base.dst = NULL;
v6_cork.opt = NULL;
- err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
+ err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
if (err) {
- ip6_cork_release(&cork, &v6_cork);
+ ip6_cork_release(cork, &v6_cork);
return ERR_PTR(err);
}
if (ipc6->dontfrag < 0)
ipc6->dontfrag = inet6_sk(sk)->dontfrag;
- err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
+ err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
&current->task_frag, getfrag, from,
length + exthdrlen, transhdrlen + exthdrlen,
flags, ipc6, sockc);
if (err) {
- __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
+ __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
return ERR_PTR(err);
}
- return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
+ return __ip6_make_skb(sk, &queue, cork, &v6_cork);
}
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 298fd8b6ed17..20a419ee8000 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -180,7 +180,8 @@ static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
};
static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
- struct fib_rule_hdr *frh, struct nlattr **tb)
+ struct fib_rule_hdr *frh, struct nlattr **tb,
+ struct netlink_ext_ack *extack)
{
return 0;
}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 9de4dfb126ba..9ac5366064e3 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1155,7 +1155,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
struct neighbour *neigh = NULL;
struct inet6_dev *in6_dev;
- struct rt6_info *rt = NULL;
+ struct fib6_info *rt = NULL;
+ struct net *net;
int lifetime;
struct ndisc_options ndopts;
int optlen;
@@ -1253,9 +1254,9 @@ static void ndisc_router_discovery(struct sk_buff *skb)
/* Do not accept RA with source-addr found on local machine unless
* accept_ra_from_local is set to true.
*/
+ net = dev_net(in6_dev->dev);
if (!in6_dev->cnf.accept_ra_from_local &&
- ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
- in6_dev->dev, 0)) {
+ ipv6_chk_addr(net, &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) {
ND_PRINTK(2, info,
"RA from local address detected on dev: %s: default router ignored\n",
skb->dev->name);
@@ -1272,20 +1273,22 @@ static void ndisc_router_discovery(struct sk_buff *skb)
pref = ICMPV6_ROUTER_PREF_MEDIUM;
#endif
- rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev);
+ rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev);
if (rt) {
- neigh = dst_neigh_lookup(&rt->dst, &ipv6_hdr(skb)->saddr);
+ neigh = ip6_neigh_lookup(&rt->fib6_nh.nh_gw,
+ rt->fib6_nh.nh_dev, NULL,
+ &ipv6_hdr(skb)->saddr);
if (!neigh) {
ND_PRINTK(0, err,
"RA: %s got default router without neighbour\n",
__func__);
- ip6_rt_put(rt);
+ fib6_info_release(rt);
return;
}
}
if (rt && lifetime == 0) {
- ip6_del_rt(rt);
+ ip6_del_rt(net, rt);
rt = NULL;
}
@@ -1294,7 +1297,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
if (!rt && lifetime) {
ND_PRINTK(3, info, "RA: adding default router\n");
- rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref);
+ rt = rt6_add_dflt_router(net, &ipv6_hdr(skb)->saddr,
+ skb->dev, pref);
if (!rt) {
ND_PRINTK(0, err,
"RA: %s failed to add default route\n",
@@ -1302,28 +1306,29 @@ static void ndisc_router_discovery(struct sk_buff *skb)
return;
}
- neigh = dst_neigh_lookup(&rt->dst, &ipv6_hdr(skb)->saddr);
+ neigh = ip6_neigh_lookup(&rt->fib6_nh.nh_gw,
+ rt->fib6_nh.nh_dev, NULL,
+ &ipv6_hdr(skb)->saddr);
if (!neigh) {
ND_PRINTK(0, err,
"RA: %s got default router without neighbour\n",
__func__);
- ip6_rt_put(rt);
+ fib6_info_release(rt);
return;
}
neigh->flags |= NTF_ROUTER;
} else if (rt) {
- rt->rt6i_flags = (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
+ rt->fib6_flags = (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
}
if (rt)
- rt6_set_expires(rt, jiffies + (HZ * lifetime));
+ fib6_set_expires(rt, jiffies + (HZ * lifetime));
if (in6_dev->cnf.accept_ra_min_hop_limit < 256 &&
ra_msg->icmph.icmp6_hop_limit) {
if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) {
in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
- if (rt)
- dst_metric_set(&rt->dst, RTAX_HOPLIMIT,
- ra_msg->icmph.icmp6_hop_limit);
+ fib6_metric_set(rt, RTAX_HOPLIMIT,
+ ra_msg->icmph.icmp6_hop_limit);
} else {
ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than minimum\n");
}
@@ -1475,10 +1480,7 @@ skip_routeinfo:
ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu);
} else if (in6_dev->cnf.mtu6 != mtu) {
in6_dev->cnf.mtu6 = mtu;
-
- if (rt)
- dst_metric_set(&rt->dst, RTAX_MTU, mtu);
-
+ fib6_metric_set(rt, RTAX_MTU, mtu);
rt6_mtu_change(skb->dev, mtu);
}
}
@@ -1497,7 +1499,7 @@ skip_routeinfo:
ND_PRINTK(2, warn, "RA: invalid RA options\n");
}
out:
- ip6_rt_put(rt);
+ fib6_info_release(rt);
if (neigh)
neigh_release(neigh);
}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 4979610287e2..b939b94e7e91 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -163,7 +163,8 @@ fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
}
static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
- struct frag_hdr *fhdr, int nhoff)
+ struct frag_hdr *fhdr, int nhoff,
+ u32 *prob_offset)
{
struct sk_buff *prev, *next;
struct net_device *dev;
@@ -179,11 +180,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
if ((unsigned int)end > IPV6_MAXPLEN) {
- __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INHDRERRORS);
- icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
- ((u8 *)&fhdr->frag_off -
- skb_network_header(skb)));
+ *prob_offset = (u8 *)&fhdr->frag_off - skb_network_header(skb);
return -1;
}
@@ -214,10 +211,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
/* RFC2460 says always send parameter problem in
* this case. -DaveM
*/
- __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INHDRERRORS);
- icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
- offsetof(struct ipv6hdr, payload_len));
+ *prob_offset = offsetof(struct ipv6hdr, payload_len);
return -1;
}
if (end > fq->q.len) {
@@ -519,15 +513,22 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
iif = skb->dev ? skb->dev->ifindex : 0;
fq = fq_find(net, fhdr->identification, hdr, iif);
if (fq) {
+ u32 prob_offset = 0;
int ret;
spin_lock(&fq->q.lock);
fq->iif = iif;
- ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
+ ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff,
+ &prob_offset);
spin_unlock(&fq->q.lock);
inet_frag_put(&fq->q);
+ if (prob_offset) {
+ __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
+ IPSTATS_MIB_INHDRERRORS);
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, prob_offset);
+ }
return ret;
}
@@ -536,7 +537,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
return -1;
fail_hdr:
- __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb));
return -1;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f4d61736c41a..8ed1b519c2b0 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -78,7 +78,6 @@ enum rt6_nud_state {
RT6_NUD_SUCCEED = 1
};
-static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
static unsigned int ip6_default_advmss(const struct dst_entry *dst);
static unsigned int ip6_mtu(const struct dst_entry *dst);
@@ -97,25 +96,24 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu);
static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb);
-static void rt6_dst_from_metrics_check(struct rt6_info *rt);
-static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
-static size_t rt6_nlmsg_size(struct rt6_info *rt);
-static int rt6_fill_node(struct net *net,
- struct sk_buff *skb, struct rt6_info *rt,
- struct in6_addr *dst, struct in6_addr *src,
+static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
+static size_t rt6_nlmsg_size(struct fib6_info *rt);
+static int rt6_fill_node(struct net *net, struct sk_buff *skb,
+ struct fib6_info *rt, struct dst_entry *dst,
+ struct in6_addr *dest, struct in6_addr *src,
int iif, int type, u32 portid, u32 seq,
unsigned int flags);
-static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
+static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
struct in6_addr *daddr,
struct in6_addr *saddr);
#ifdef CONFIG_IPV6_ROUTE_INFO
-static struct rt6_info *rt6_add_route_info(struct net *net,
+static struct fib6_info *rt6_add_route_info(struct net *net,
const struct in6_addr *prefix, int prefixlen,
const struct in6_addr *gwaddr,
struct net_device *dev,
unsigned int pref);
-static struct rt6_info *rt6_get_route_info(struct net *net,
+static struct fib6_info *rt6_get_route_info(struct net *net,
const struct in6_addr *prefix, int prefixlen,
const struct in6_addr *gwaddr,
struct net_device *dev);
@@ -184,29 +182,10 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
}
}
-static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
-{
- return dst_metrics_write_ptr(&rt->from->dst);
-}
-
-static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
-{
- struct rt6_info *rt = (struct rt6_info *)dst;
-
- if (rt->rt6i_flags & RTF_PCPU)
- return rt6_pcpu_cow_metrics(rt);
- else if (rt->rt6i_flags & RTF_CACHE)
- return NULL;
- else
- return dst_cow_metrics_generic(dst, old);
-}
-
-static inline const void *choose_neigh_daddr(struct rt6_info *rt,
+static inline const void *choose_neigh_daddr(const struct in6_addr *p,
struct sk_buff *skb,
const void *daddr)
{
- struct in6_addr *p = &rt->rt6i_gateway;
-
if (!ipv6_addr_any(p))
return (const void *) p;
else if (skb)
@@ -214,18 +193,27 @@ static inline const void *choose_neigh_daddr(struct rt6_info *rt,
return daddr;
}
-static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
- struct sk_buff *skb,
- const void *daddr)
+struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
+ struct net_device *dev,
+ struct sk_buff *skb,
+ const void *daddr)
{
- struct rt6_info *rt = (struct rt6_info *) dst;
struct neighbour *n;
- daddr = choose_neigh_daddr(rt, skb, daddr);
- n = __ipv6_neigh_lookup(dst->dev, daddr);
+ daddr = choose_neigh_daddr(gw, skb, daddr);
+ n = __ipv6_neigh_lookup(dev, daddr);
if (n)
return n;
- return neigh_create(&nd_tbl, daddr, dst->dev);
+ return neigh_create(&nd_tbl, daddr, dev);
+}
+
+static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
+ struct sk_buff *skb,
+ const void *daddr)
+{
+ const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
+
+ return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
}
static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
@@ -233,7 +221,7 @@ static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
struct net_device *dev = dst->dev;
struct rt6_info *rt = (struct rt6_info *)dst;
- daddr = choose_neigh_daddr(rt, NULL, daddr);
+ daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
if (!daddr)
return;
if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
@@ -250,7 +238,7 @@ static struct dst_ops ip6_dst_ops_template = {
.check = ip6_dst_check,
.default_advmss = ip6_default_advmss,
.mtu = ip6_mtu,
- .cow_metrics = ipv6_cow_metrics,
+ .cow_metrics = dst_cow_metrics_generic,
.destroy = ip6_dst_destroy,
.ifdown = ip6_dst_ifdown,
.negative_advice = ip6_negative_advice,
@@ -258,7 +246,7 @@ static struct dst_ops ip6_dst_ops_template = {
.update_pmtu = ip6_rt_update_pmtu,
.redirect = rt6_do_redirect,
.local_out = __ip6_local_out,
- .neigh_lookup = ip6_neigh_lookup,
+ .neigh_lookup = ip6_dst_neigh_lookup,
.confirm_neigh = ip6_confirm_neigh,
};
@@ -288,13 +276,22 @@ static struct dst_ops ip6_dst_blackhole_ops = {
.update_pmtu = ip6_rt_blackhole_update_pmtu,
.redirect = ip6_rt_blackhole_redirect,
.cow_metrics = dst_cow_metrics_generic,
- .neigh_lookup = ip6_neigh_lookup,
+ .neigh_lookup = ip6_dst_neigh_lookup,
};
static const u32 ip6_template_metrics[RTAX_MAX] = {
[RTAX_HOPLIMIT - 1] = 0,
};
+static const struct fib6_info fib6_null_entry_template = {
+ .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP),
+ .fib6_protocol = RTPROT_KERNEL,
+ .fib6_metric = ~(u32)0,
+ .fib6_ref = ATOMIC_INIT(1),
+ .fib6_type = RTN_UNREACHABLE,
+ .fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
+};
+
static const struct rt6_info ip6_null_entry_template = {
.dst = {
.__refcnt = ATOMIC_INIT(1),
@@ -305,9 +302,6 @@ static const struct rt6_info ip6_null_entry_template = {
.output = ip6_pkt_discard_out,
},
.rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
- .rt6i_protocol = RTPROT_KERNEL,
- .rt6i_metric = ~(u32) 0,
- .rt6i_ref = ATOMIC_INIT(1),
};
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
@@ -322,9 +316,6 @@ static const struct rt6_info ip6_prohibit_entry_template = {
.output = ip6_pkt_prohibit_out,
},
.rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
- .rt6i_protocol = RTPROT_KERNEL,
- .rt6i_metric = ~(u32) 0,
- .rt6i_ref = ATOMIC_INIT(1),
};
static const struct rt6_info ip6_blk_hole_entry_template = {
@@ -337,9 +328,6 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
.output = dst_discard_out,
},
.rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
- .rt6i_protocol = RTPROT_KERNEL,
- .rt6i_metric = ~(u32) 0,
- .rt6i_ref = ATOMIC_INIT(1),
};
#endif
@@ -349,14 +337,12 @@ static void rt6_info_init(struct rt6_info *rt)
struct dst_entry *dst = &rt->dst;
memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
- INIT_LIST_HEAD(&rt->rt6i_siblings);
INIT_LIST_HEAD(&rt->rt6i_uncached);
}
/* allocate dst with ip6_dst_ops */
-static struct rt6_info *__ip6_dst_alloc(struct net *net,
- struct net_device *dev,
- int flags)
+struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
+ int flags)
{
struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
1, DST_OBSOLETE_FORCE_CHK, flags);
@@ -368,34 +354,15 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net,
return rt;
}
-
-struct rt6_info *ip6_dst_alloc(struct net *net,
- struct net_device *dev,
- int flags)
-{
- struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
-
- if (rt) {
- rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
- if (!rt->rt6i_pcpu) {
- dst_release_immediate(&rt->dst);
- return NULL;
- }
- }
-
- return rt;
-}
EXPORT_SYMBOL(ip6_dst_alloc);
static void ip6_dst_destroy(struct dst_entry *dst)
{
struct rt6_info *rt = (struct rt6_info *)dst;
- struct rt6_exception_bucket *bucket;
- struct rt6_info *from = rt->from;
+ struct fib6_info *from;
struct inet6_dev *idev;
dst_destroy_metrics_generic(dst);
- free_percpu(rt->rt6i_pcpu);
rt6_uncached_list_del(rt);
idev = rt->rt6i_idev;
@@ -403,14 +370,12 @@ static void ip6_dst_destroy(struct dst_entry *dst)
rt->rt6i_idev = NULL;
in6_dev_put(idev);
}
- bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
- if (bucket) {
- rt->rt6i_exception_bucket = NULL;
- kfree(bucket);
- }
- rt->from = NULL;
- dst_release(&from->dst);
+ rcu_read_lock();
+ from = rcu_dereference(rt->from);
+ rcu_assign_pointer(rt->from, NULL);
+ fib6_info_release(from);
+ rcu_read_unlock();
}
static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -440,23 +405,27 @@ static bool __rt6_check_expired(const struct rt6_info *rt)
static bool rt6_check_expired(const struct rt6_info *rt)
{
+ struct fib6_info *from;
+
+ from = rcu_dereference(rt->from);
+
if (rt->rt6i_flags & RTF_EXPIRES) {
if (time_after(jiffies, rt->dst.expires))
return true;
- } else if (rt->from) {
+ } else if (from) {
return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
- rt6_check_expired(rt->from);
+ fib6_check_expired(from);
}
return false;
}
-static struct rt6_info *rt6_multipath_select(const struct net *net,
- struct rt6_info *match,
+static struct fib6_info *rt6_multipath_select(const struct net *net,
+ struct fib6_info *match,
struct flowi6 *fl6, int oif,
const struct sk_buff *skb,
int strict)
{
- struct rt6_info *sibling, *next_sibling;
+ struct fib6_info *sibling, *next_sibling;
/* We might have already computed the hash for ICMPv6 errors. In such
* case it will always be non-zero. Otherwise now is the time to do it.
@@ -464,12 +433,15 @@ static struct rt6_info *rt6_multipath_select(const struct net *net,
if (!fl6->mp_hash)
fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
- if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
+ if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
return match;
- list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings,
- rt6i_siblings) {
- if (fl6->mp_hash > atomic_read(&sibling->rt6i_nh_upper_bound))
+ list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
+ fib6_siblings) {
+ int nh_upper_bound;
+
+ nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
+ if (fl6->mp_hash > nh_upper_bound)
continue;
if (rt6_score_route(sibling, oif, strict) < 0)
break;
@@ -484,38 +456,27 @@ static struct rt6_info *rt6_multipath_select(const struct net *net,
* Route lookup. rcu_read_lock() should be held.
*/
-static inline struct rt6_info *rt6_device_match(struct net *net,
- struct rt6_info *rt,
+static inline struct fib6_info *rt6_device_match(struct net *net,
+ struct fib6_info *rt,
const struct in6_addr *saddr,
int oif,
int flags)
{
- struct rt6_info *local = NULL;
- struct rt6_info *sprt;
+ struct fib6_info *sprt;
- if (!oif && ipv6_addr_any(saddr) && !(rt->rt6i_nh_flags & RTNH_F_DEAD))
+ if (!oif && ipv6_addr_any(saddr) &&
+ !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
return rt;
for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {
- struct net_device *dev = sprt->dst.dev;
+ const struct net_device *dev = sprt->fib6_nh.nh_dev;
- if (sprt->rt6i_nh_flags & RTNH_F_DEAD)
+ if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
continue;
if (oif) {
if (dev->ifindex == oif)
return sprt;
- if (dev->flags & IFF_LOOPBACK) {
- if (!sprt->rt6i_idev ||
- sprt->rt6i_idev->dev->ifindex != oif) {
- if (flags & RT6_LOOKUP_F_IFACE)
- continue;
- if (local &&
- local->rt6i_idev->dev->ifindex == oif)
- continue;
- }
- local = sprt;
- }
} else {
if (ipv6_chk_addr(net, saddr, dev,
flags & RT6_LOOKUP_F_IFACE))
@@ -523,15 +484,10 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
}
}
- if (oif) {
- if (local)
- return local;
-
- if (flags & RT6_LOOKUP_F_IFACE)
- return net->ipv6.ip6_null_entry;
- }
+ if (oif && flags & RT6_LOOKUP_F_IFACE)
+ return net->ipv6.fib6_null_entry;
- return rt->rt6i_nh_flags & RTNH_F_DEAD ? net->ipv6.ip6_null_entry : rt;
+ return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
}
#ifdef CONFIG_IPV6_ROUTER_PREF
@@ -553,10 +509,13 @@ static void rt6_probe_deferred(struct work_struct *w)
kfree(work);
}
-static void rt6_probe(struct rt6_info *rt)
+static void rt6_probe(struct fib6_info *rt)
{
struct __rt6_probe_work *work;
+ const struct in6_addr *nh_gw;
struct neighbour *neigh;
+ struct net_device *dev;
+
/*
* Okay, this does not seem to be appropriate
* for now, however, we need to check if it
@@ -565,20 +524,25 @@ static void rt6_probe(struct rt6_info *rt)
* Router Reachability Probe MUST be rate-limited
* to no more than one per minute.
*/
- if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
+ if (!rt || !(rt->fib6_flags & RTF_GATEWAY))
return;
+
+ nh_gw = &rt->fib6_nh.nh_gw;
+ dev = rt->fib6_nh.nh_dev;
rcu_read_lock_bh();
- neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
+ neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
if (neigh) {
+ struct inet6_dev *idev;
+
if (neigh->nud_state & NUD_VALID)
goto out;
+ idev = __in6_dev_get(dev);
work = NULL;
write_lock(&neigh->lock);
if (!(neigh->nud_state & NUD_VALID) &&
time_after(jiffies,
- neigh->updated +
- rt->rt6i_idev->cnf.rtr_probe_interval)) {
+ neigh->updated + idev->cnf.rtr_probe_interval)) {
work = kmalloc(sizeof(*work), GFP_ATOMIC);
if (work)
__neigh_set_probe_once(neigh);
@@ -590,9 +554,9 @@ static void rt6_probe(struct rt6_info *rt)
if (work) {
INIT_WORK(&work->work, rt6_probe_deferred);
- work->target = rt->rt6i_gateway;
- dev_hold(rt->dst.dev);
- work->dev = rt->dst.dev;
+ work->target = *nh_gw;
+ dev_hold(dev);
+ work->dev = dev;
schedule_work(&work->work);
}
@@ -600,7 +564,7 @@ out:
rcu_read_unlock_bh();
}
#else
-static inline void rt6_probe(struct rt6_info *rt)
+static inline void rt6_probe(struct fib6_info *rt)
{
}
#endif
@@ -608,28 +572,27 @@ static inline void rt6_probe(struct rt6_info *rt)
/*
* Default Router Selection (RFC 2461 6.3.6)
*/
-static inline int rt6_check_dev(struct rt6_info *rt, int oif)
+static inline int rt6_check_dev(struct fib6_info *rt, int oif)
{
- struct net_device *dev = rt->dst.dev;
+ const struct net_device *dev = rt->fib6_nh.nh_dev;
+
if (!oif || dev->ifindex == oif)
return 2;
- if ((dev->flags & IFF_LOOPBACK) &&
- rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
- return 1;
return 0;
}
-static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
+static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
{
- struct neighbour *neigh;
enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
+ struct neighbour *neigh;
- if (rt->rt6i_flags & RTF_NONEXTHOP ||
- !(rt->rt6i_flags & RTF_GATEWAY))
+ if (rt->fib6_flags & RTF_NONEXTHOP ||
+ !(rt->fib6_flags & RTF_GATEWAY))
return RT6_NUD_SUCCEED;
rcu_read_lock_bh();
- neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
+ neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
+ &rt->fib6_nh.nh_gw);
if (neigh) {
read_lock(&neigh->lock);
if (neigh->nud_state & NUD_VALID)
@@ -650,8 +613,7 @@ static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
return ret;
}
-static int rt6_score_route(struct rt6_info *rt, int oif,
- int strict)
+static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
{
int m;
@@ -659,7 +621,7 @@ static int rt6_score_route(struct rt6_info *rt, int oif,
if (!m && (strict & RT6_LOOKUP_F_IFACE))
return RT6_NUD_FAIL_HARD;
#ifdef CONFIG_IPV6_ROUTER_PREF
- m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
+ m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
#endif
if (strict & RT6_LOOKUP_F_REACHABLE) {
int n = rt6_check_neigh(rt);
@@ -669,23 +631,37 @@ static int rt6_score_route(struct rt6_info *rt, int oif,
return m;
}
-static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
- int *mpri, struct rt6_info *match,
+/* called with rc_read_lock held */
+static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i)
+{
+ const struct net_device *dev = fib6_info_nh_dev(f6i);
+ bool rc = false;
+
+ if (dev) {
+ const struct inet6_dev *idev = __in6_dev_get(dev);
+
+ rc = !!idev->cnf.ignore_routes_with_linkdown;
+ }
+
+ return rc;
+}
+
+static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
+ int *mpri, struct fib6_info *match,
bool *do_rr)
{
int m;
bool match_do_rr = false;
- struct inet6_dev *idev = rt->rt6i_idev;
- if (rt->rt6i_nh_flags & RTNH_F_DEAD)
+ if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
goto out;
- if (idev->cnf.ignore_routes_with_linkdown &&
- rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
+ if (fib6_ignore_linkdown(rt) &&
+ rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
!(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
goto out;
- if (rt6_check_expired(rt))
+ if (fib6_check_expired(rt))
goto out;
m = rt6_score_route(rt, oif, strict);
@@ -709,19 +685,19 @@ out:
return match;
}
-static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
- struct rt6_info *leaf,
- struct rt6_info *rr_head,
+static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
+ struct fib6_info *leaf,
+ struct fib6_info *rr_head,
u32 metric, int oif, int strict,
bool *do_rr)
{
- struct rt6_info *rt, *match, *cont;
+ struct fib6_info *rt, *match, *cont;
int mpri = -1;
match = NULL;
cont = NULL;
for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) {
- if (rt->rt6i_metric != metric) {
+ if (rt->fib6_metric != metric) {
cont = rt;
break;
}
@@ -731,7 +707,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
for (rt = leaf; rt && rt != rr_head;
rt = rcu_dereference(rt->rt6_next)) {
- if (rt->rt6i_metric != metric) {
+ if (rt->fib6_metric != metric) {
cont = rt;
break;
}
@@ -748,16 +724,16 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
return match;
}
-static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
+static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
int oif, int strict)
{
- struct rt6_info *leaf = rcu_dereference(fn->leaf);
- struct rt6_info *match, *rt0;
+ struct fib6_info *leaf = rcu_dereference(fn->leaf);
+ struct fib6_info *match, *rt0;
bool do_rr = false;
int key_plen;
- if (!leaf || leaf == net->ipv6.ip6_null_entry)
- return net->ipv6.ip6_null_entry;
+ if (!leaf || leaf == net->ipv6.fib6_null_entry)
+ return net->ipv6.fib6_null_entry;
rt0 = rcu_dereference(fn->rr_ptr);
if (!rt0)
@@ -768,39 +744,39 @@ static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
* (This might happen if all routes under fn are deleted from
* the tree and fib6_repair_tree() is called on the node.)
*/
- key_plen = rt0->rt6i_dst.plen;
+ key_plen = rt0->fib6_dst.plen;
#ifdef CONFIG_IPV6_SUBTREES
- if (rt0->rt6i_src.plen)
- key_plen = rt0->rt6i_src.plen;
+ if (rt0->fib6_src.plen)
+ key_plen = rt0->fib6_src.plen;
#endif
if (fn->fn_bit != key_plen)
- return net->ipv6.ip6_null_entry;
+ return net->ipv6.fib6_null_entry;
- match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict,
+ match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
&do_rr);
if (do_rr) {
- struct rt6_info *next = rcu_dereference(rt0->rt6_next);
+ struct fib6_info *next = rcu_dereference(rt0->rt6_next);
/* no entries matched; do round-robin */
- if (!next || next->rt6i_metric != rt0->rt6i_metric)
+ if (!next || next->fib6_metric != rt0->fib6_metric)
next = leaf;
if (next != rt0) {
- spin_lock_bh(&leaf->rt6i_table->tb6_lock);
+ spin_lock_bh(&leaf->fib6_table->tb6_lock);
/* make sure next is not being deleted from the tree */
- if (next->rt6i_node)
+ if (next->fib6_node)
rcu_assign_pointer(fn->rr_ptr, next);
- spin_unlock_bh(&leaf->rt6i_table->tb6_lock);
+ spin_unlock_bh(&leaf->fib6_table->tb6_lock);
}
}
- return match ? match : net->ipv6.ip6_null_entry;
+ return match ? match : net->ipv6.fib6_null_entry;
}
-static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
+static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
{
- return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
+ return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
}
#ifdef CONFIG_IPV6_ROUTE_INFO
@@ -812,7 +788,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
struct in6_addr prefix_buf, *prefix;
unsigned int pref;
unsigned long lifetime;
- struct rt6_info *rt;
+ struct fib6_info *rt;
if (len < sizeof(struct route_info)) {
return -EINVAL;
@@ -850,13 +826,13 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
}
if (rinfo->prefix_len == 0)
- rt = rt6_get_dflt_router(gwaddr, dev);
+ rt = rt6_get_dflt_router(net, gwaddr, dev);
else
rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
gwaddr, dev);
if (rt && !lifetime) {
- ip6_del_rt(rt);
+ ip6_del_rt(net, rt);
rt = NULL;
}
@@ -864,21 +840,162 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
dev, pref);
else if (rt)
- rt->rt6i_flags = RTF_ROUTEINFO |
- (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
+ rt->fib6_flags = RTF_ROUTEINFO |
+ (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
if (rt) {
if (!addrconf_finite_timeout(lifetime))
- rt6_clean_expires(rt);
+ fib6_clean_expires(rt);
else
- rt6_set_expires(rt, jiffies + HZ * lifetime);
+ fib6_set_expires(rt, jiffies + HZ * lifetime);
- ip6_rt_put(rt);
+ fib6_info_release(rt);
}
return 0;
}
#endif
+/*
+ * Misc support functions
+ */
+
+/* called with rcu_lock held */
+static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
+{
+ struct net_device *dev = rt->fib6_nh.nh_dev;
+
+ if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
+ /* for copies of local routes, dst->dev needs to be the
+ * device if it is a master device, the master device if
+ * device is enslaved, and the loopback as the default
+ */
+ if (netif_is_l3_slave(dev) &&
+ !rt6_need_strict(&rt->fib6_dst.addr))
+ dev = l3mdev_master_dev_rcu(dev);
+ else if (!netif_is_l3_master(dev))
+ dev = dev_net(dev)->loopback_dev;
+ /* last case is netif_is_l3_master(dev) is true in which
+ * case we want dev returned to be dev
+ */
+ }
+
+ return dev;
+}
+
+static const int fib6_prop[RTN_MAX + 1] = {
+ [RTN_UNSPEC] = 0,
+ [RTN_UNICAST] = 0,
+ [RTN_LOCAL] = 0,
+ [RTN_BROADCAST] = 0,
+ [RTN_ANYCAST] = 0,
+ [RTN_MULTICAST] = 0,
+ [RTN_BLACKHOLE] = -EINVAL,
+ [RTN_UNREACHABLE] = -EHOSTUNREACH,
+ [RTN_PROHIBIT] = -EACCES,
+ [RTN_THROW] = -EAGAIN,
+ [RTN_NAT] = -EINVAL,
+ [RTN_XRESOLVE] = -EINVAL,
+};
+
+static int ip6_rt_type_to_error(u8 fib6_type)
+{
+ return fib6_prop[fib6_type];
+}
+
+static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
+{
+ unsigned short flags = 0;
+
+ if (rt->dst_nocount)
+ flags |= DST_NOCOUNT;
+ if (rt->dst_nopolicy)
+ flags |= DST_NOPOLICY;
+ if (rt->dst_host)
+ flags |= DST_HOST;
+
+ return flags;
+}
+
+static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
+{
+ rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
+
+ switch (ort->fib6_type) {
+ case RTN_BLACKHOLE:
+ rt->dst.output = dst_discard_out;
+ rt->dst.input = dst_discard;
+ break;
+ case RTN_PROHIBIT:
+ rt->dst.output = ip6_pkt_prohibit_out;
+ rt->dst.input = ip6_pkt_prohibit;
+ break;
+ case RTN_THROW:
+ case RTN_UNREACHABLE:
+ default:
+ rt->dst.output = ip6_pkt_discard_out;
+ rt->dst.input = ip6_pkt_discard;
+ break;
+ }
+}
+
+static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
+{
+ rt->dst.flags |= fib6_info_dst_flags(ort);
+
+ if (ort->fib6_flags & RTF_REJECT) {
+ ip6_rt_init_dst_reject(rt, ort);
+ return;
+ }
+
+ rt->dst.error = 0;
+ rt->dst.output = ip6_output;
+
+ if (ort->fib6_type == RTN_LOCAL) {
+ rt->dst.input = ip6_input;
+ } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
+ rt->dst.input = ip6_mc_input;
+ } else {
+ rt->dst.input = ip6_forward;
+ }
+
+ if (ort->fib6_nh.nh_lwtstate) {
+ rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
+ lwtunnel_set_redirect(&rt->dst);
+ }
+
+ rt->dst.lastuse = jiffies;
+}
+
+static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
+{
+ rt->rt6i_flags &= ~RTF_EXPIRES;
+ fib6_info_hold(from);
+ rcu_assign_pointer(rt->from, from);
+ dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
+ if (from->fib6_metrics != &dst_default_metrics) {
+ rt->dst._metrics |= DST_METRICS_REFCOUNTED;
+ refcount_inc(&from->fib6_metrics->refcnt);
+ }
+}
+
+static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
+{
+ struct net_device *dev = fib6_info_nh_dev(ort);
+
+ ip6_rt_init_dst(rt, ort);
+
+ rt->rt6i_dst = ort->fib6_dst;
+ rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
+ rt->rt6i_gateway = ort->fib6_nh.nh_gw;
+ rt->rt6i_flags = ort->fib6_flags;
+ rt6_set_from(rt, ort);
+#ifdef CONFIG_IPV6_SUBTREES
+ rt->rt6i_src = ort->fib6_src;
+#endif
+ rt->rt6i_prefsrc = ort->fib6_prefsrc;
+ rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
+}
+
static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
struct in6_addr *saddr)
{
@@ -914,14 +1031,29 @@ static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
return false;
}
+/* called with rcu_lock held */
+static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
+{
+ unsigned short flags = fib6_info_dst_flags(rt);
+ struct net_device *dev = rt->fib6_nh.nh_dev;
+ struct rt6_info *nrt;
+
+ nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
+ if (nrt)
+ ip6_rt_copy_init(nrt, rt);
+
+ return nrt;
+}
+
static struct rt6_info *ip6_pol_route_lookup(struct net *net,
struct fib6_table *table,
struct flowi6 *fl6,
const struct sk_buff *skb,
int flags)
{
- struct rt6_info *rt, *rt_cache;
+ struct fib6_info *f6i;
struct fib6_node *fn;
+ struct rt6_info *rt;
if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
flags &= ~RT6_LOOKUP_F_IFACE;
@@ -929,35 +1061,43 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
rcu_read_lock();
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
- rt = rcu_dereference(fn->leaf);
- if (!rt) {
- rt = net->ipv6.ip6_null_entry;
+ f6i = rcu_dereference(fn->leaf);
+ if (!f6i) {
+ f6i = net->ipv6.fib6_null_entry;
} else {
- rt = rt6_device_match(net, rt, &fl6->saddr,
+ f6i = rt6_device_match(net, f6i, &fl6->saddr,
fl6->flowi6_oif, flags);
- if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
- rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif,
- skb, flags);
+ if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
+ f6i = rt6_multipath_select(net, f6i, fl6,
+ fl6->flowi6_oif, skb, flags);
}
- if (rt == net->ipv6.ip6_null_entry) {
+ if (f6i == net->ipv6.fib6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr);
if (fn)
goto restart;
}
- /* Search through exception table */
- rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
- if (rt_cache)
- rt = rt_cache;
- if (ip6_hold_safe(net, &rt, true))
- dst_use_noref(&rt->dst, jiffies);
+ /* Search through exception table */
+ rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
+ if (rt) {
+ if (ip6_hold_safe(net, &rt, true))
+ dst_use_noref(&rt->dst, jiffies);
+ } else if (f6i == net->ipv6.fib6_null_entry) {
+ rt = net->ipv6.ip6_null_entry;
+ dst_hold(&rt->dst);
+ } else {
+ rt = ip6_create_rt_rcu(f6i);
+ if (!rt) {
+ rt = net->ipv6.ip6_null_entry;
+ dst_hold(&rt->dst);
+ }
+ }
rcu_read_unlock();
trace_fib6_table_lookup(net, rt, table, fl6);
return rt;
-
}
struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
@@ -999,55 +1139,28 @@ EXPORT_SYMBOL(rt6_lookup);
* Caller must hold dst before calling it.
*/
-static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
- struct mx6_config *mxc,
+static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
struct netlink_ext_ack *extack)
{
int err;
struct fib6_table *table;
- table = rt->rt6i_table;
+ table = rt->fib6_table;
spin_lock_bh(&table->tb6_lock);
- err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
+ err = fib6_add(&table->tb6_root, rt, info, extack);
spin_unlock_bh(&table->tb6_lock);
return err;
}
-int ip6_ins_rt(struct rt6_info *rt)
+int ip6_ins_rt(struct net *net, struct fib6_info *rt)
{
- struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
- struct mx6_config mxc = { .mx = NULL, };
-
- /* Hold dst to account for the reference from the fib6 tree */
- dst_hold(&rt->dst);
- return __ip6_ins_rt(rt, &info, &mxc, NULL);
-}
-
-/* called with rcu_lock held */
-static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
-{
- struct net_device *dev = rt->dst.dev;
-
- if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) {
- /* for copies of local routes, dst->dev needs to be the
- * device if it is a master device, the master device if
- * device is enslaved, and the loopback as the default
- */
- if (netif_is_l3_slave(dev) &&
- !rt6_need_strict(&rt->rt6i_dst.addr))
- dev = l3mdev_master_dev_rcu(dev);
- else if (!netif_is_l3_master(dev))
- dev = dev_net(dev)->loopback_dev;
- /* last case is netif_is_l3_master(dev) is true in which
- * case we want dev returned to be dev
- */
- }
+ struct nl_info info = { .nl_net = net, };
- return dev;
+ return __ip6_ins_rt(rt, &info, NULL);
}
-static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
+static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
const struct in6_addr *daddr,
const struct in6_addr *saddr)
{
@@ -1058,26 +1171,20 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
* Clone the route.
*/
- if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
- ort = ort->from;
-
- rcu_read_lock();
dev = ip6_rt_get_dev_rcu(ort);
- rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
- rcu_read_unlock();
+ rt = ip6_dst_alloc(dev_net(dev), dev, 0);
if (!rt)
return NULL;
ip6_rt_copy_init(rt, ort);
rt->rt6i_flags |= RTF_CACHE;
- rt->rt6i_metric = 0;
rt->dst.flags |= DST_HOST;
rt->rt6i_dst.addr = *daddr;
rt->rt6i_dst.plen = 128;
if (!rt6_is_gw_or_nonexthop(ort)) {
- if (ort->rt6i_dst.plen != 128 &&
- ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
+ if (ort->fib6_dst.plen != 128 &&
+ ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
rt->rt6i_flags |= RTF_ANYCAST;
#ifdef CONFIG_IPV6_SUBTREES
if (rt->rt6i_src.plen && saddr) {
@@ -1090,45 +1197,44 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
return rt;
}
-static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
+static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
{
+ unsigned short flags = fib6_info_dst_flags(rt);
struct net_device *dev;
struct rt6_info *pcpu_rt;
rcu_read_lock();
dev = ip6_rt_get_dev_rcu(rt);
- pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
+ pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
rcu_read_unlock();
if (!pcpu_rt)
return NULL;
ip6_rt_copy_init(pcpu_rt, rt);
- pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
pcpu_rt->rt6i_flags |= RTF_PCPU;
return pcpu_rt;
}
/* It should be called with rcu_read_lock() acquired */
-static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
+static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
{
struct rt6_info *pcpu_rt, **p;
p = this_cpu_ptr(rt->rt6i_pcpu);
pcpu_rt = *p;
- if (pcpu_rt && ip6_hold_safe(NULL, &pcpu_rt, false))
- rt6_dst_from_metrics_check(pcpu_rt);
+ if (pcpu_rt)
+ ip6_hold_safe(NULL, &pcpu_rt, false);
return pcpu_rt;
}
-static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
+static struct rt6_info *rt6_make_pcpu_route(struct net *net,
+ struct fib6_info *rt)
{
struct rt6_info *pcpu_rt, *prev, **p;
pcpu_rt = ip6_rt_pcpu_alloc(rt);
if (!pcpu_rt) {
- struct net *net = dev_net(rt->dst.dev);
-
dst_hold(&net->ipv6.ip6_null_entry->dst);
return net->ipv6.ip6_null_entry;
}
@@ -1138,7 +1244,6 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
prev = cmpxchg(p, NULL, pcpu_rt);
BUG_ON(prev);
- rt6_dst_from_metrics_check(pcpu_rt);
return pcpu_rt;
}
@@ -1158,9 +1263,8 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
return;
net = dev_net(rt6_ex->rt6i->dst.dev);
- rt6_ex->rt6i->rt6i_node = NULL;
hlist_del_rcu(&rt6_ex->hlist);
- rt6_release(rt6_ex->rt6i);
+ dst_release(&rt6_ex->rt6i->dst);
kfree_rcu(rt6_ex, rcu);
WARN_ON_ONCE(!bucket->depth);
bucket->depth--;
@@ -1268,20 +1372,36 @@ __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
return NULL;
}
+static unsigned int fib6_mtu(const struct fib6_info *rt)
+{
+ unsigned int mtu;
+
+ if (rt->fib6_pmtu) {
+ mtu = rt->fib6_pmtu;
+ } else {
+ struct net_device *dev = fib6_info_nh_dev(rt);
+ struct inet6_dev *idev;
+
+ rcu_read_lock();
+ idev = __in6_dev_get(dev);
+ mtu = idev->cnf.mtu6;
+ rcu_read_unlock();
+ }
+
+ mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
+
+ return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
+}
+
static int rt6_insert_exception(struct rt6_info *nrt,
- struct rt6_info *ort)
+ struct fib6_info *ort)
{
- struct net *net = dev_net(ort->dst.dev);
+ struct net *net = dev_net(nrt->dst.dev);
struct rt6_exception_bucket *bucket;
struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
int err = 0;
- /* ort can't be a cache or pcpu route */
- if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
- ort = ort->from;
- WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
-
spin_lock_bh(&rt6_exception_lock);
if (ort->exception_bucket_flushed) {
@@ -1308,19 +1428,19 @@ static int rt6_insert_exception(struct rt6_info *nrt,
* Otherwise, the exception table is indexed by
* a hash of only rt6i_dst.
*/
- if (ort->rt6i_src.plen)
+ if (ort->fib6_src.plen)
src_key = &nrt->rt6i_src.addr;
#endif
/* Update rt6i_prefsrc as it could be changed
* in rt6_remove_prefsrc()
*/
- nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
+ nrt->rt6i_prefsrc = ort->fib6_prefsrc;
/* rt6_mtu_change() might lower mtu on ort.
* Only insert this exception route if its mtu
* is less than ort's mtu value.
*/
- if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) {
+ if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
err = -EINVAL;
goto out;
}
@@ -1337,8 +1457,6 @@ static int rt6_insert_exception(struct rt6_info *nrt,
}
rt6_ex->rt6i = nrt;
rt6_ex->stamp = jiffies;
- atomic_inc(&nrt->rt6i_ref);
- nrt->rt6i_node = ort->rt6i_node;
hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
bucket->depth++;
net->ipv6.rt6_stats->fib_rt_cache++;
@@ -1351,16 +1469,16 @@ out:
/* Update fn->fn_sernum to invalidate all cached dst */
if (!err) {
- spin_lock_bh(&ort->rt6i_table->tb6_lock);
- fib6_update_sernum(ort);
- spin_unlock_bh(&ort->rt6i_table->tb6_lock);
+ spin_lock_bh(&ort->fib6_table->tb6_lock);
+ fib6_update_sernum(net, ort);
+ spin_unlock_bh(&ort->fib6_table->tb6_lock);
fib6_force_start_gc(net);
}
return err;
}
-void rt6_flush_exceptions(struct rt6_info *rt)
+void rt6_flush_exceptions(struct fib6_info *rt)
{
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
@@ -1390,7 +1508,7 @@ out:
/* Find cached rt in the hash table inside passed in rt
* Caller has to hold rcu_read_lock()
*/
-static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
+static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
struct in6_addr *daddr,
struct in6_addr *saddr)
{
@@ -1408,7 +1526,7 @@ static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
* Otherwise, the exception table is indexed by
* a hash of only rt6i_dst.
*/
- if (rt->rt6i_src.plen)
+ if (rt->fib6_src.plen)
src_key = saddr;
#endif
rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
@@ -1420,14 +1538,15 @@ static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
}
/* Remove the passed in cached rt from the hash table that contains it */
-int rt6_remove_exception_rt(struct rt6_info *rt)
+static int rt6_remove_exception_rt(struct rt6_info *rt)
{
struct rt6_exception_bucket *bucket;
- struct rt6_info *from = rt->from;
struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
+ struct fib6_info *from;
int err;
+ from = rcu_dereference(rt->from);
if (!from ||
!(rt->rt6i_flags & RTF_CACHE))
return -EINVAL;
@@ -1445,7 +1564,7 @@ int rt6_remove_exception_rt(struct rt6_info *rt)
* Otherwise, the exception table is indexed by
* a hash of only rt6i_dst.
*/
- if (from->rt6i_src.plen)
+ if (from->fib6_src.plen)
src_key = &rt->rt6i_src.addr;
#endif
rt6_ex = __rt6_find_exception_spinlock(&bucket,
@@ -1468,7 +1587,7 @@ int rt6_remove_exception_rt(struct rt6_info *rt)
static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
{
struct rt6_exception_bucket *bucket;
- struct rt6_info *from = rt->from;
+ struct fib6_info *from = rt->from;
struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
@@ -1486,7 +1605,7 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
* Otherwise, the exception table is indexed by
* a hash of only rt6i_dst.
*/
- if (from->rt6i_src.plen)
+ if (from->fib6_src.plen)
src_key = &rt->rt6i_src.addr;
#endif
rt6_ex = __rt6_find_exception_rcu(&bucket,
@@ -1498,7 +1617,7 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
rcu_read_unlock();
}
-static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
+static void rt6_exceptions_remove_prefsrc(struct fib6_info *rt)
{
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
@@ -1540,7 +1659,7 @@ static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
}
static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
- struct rt6_info *rt, int mtu)
+ struct fib6_info *rt, int mtu)
{
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
@@ -1557,12 +1676,12 @@ static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
struct rt6_info *entry = rt6_ex->rt6i;
/* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
- * route), the metrics of its rt->dst.from have already
+ * route), the metrics of its rt->from have already
* been updated.
*/
- if (entry->rt6i_pmtu &&
+ if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
rt6_mtu_change_route_allowed(idev, entry, mtu))
- entry->rt6i_pmtu = mtu;
+ dst_metric_set(&entry->dst, RTAX_MTU, mtu);
}
bucket++;
}
@@ -1570,7 +1689,7 @@ static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
-static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
+static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
struct in6_addr *gateway)
{
struct rt6_exception_bucket *bucket;
@@ -1649,7 +1768,7 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
gc_args->more++;
}
-void rt6_age_exceptions(struct rt6_info *rt,
+void rt6_age_exceptions(struct fib6_info *rt,
struct fib6_gc_args *gc_args,
unsigned long now)
{
@@ -1685,7 +1804,8 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
const struct sk_buff *skb, int flags)
{
struct fib6_node *fn, *saved_fn;
- struct rt6_info *rt, *rt_cache;
+ struct fib6_info *f6i;
+ struct rt6_info *rt;
int strict = 0;
strict |= flags & RT6_LOOKUP_F_IFACE;
@@ -1702,10 +1822,10 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
oif = 0;
redo_rt6_select:
- rt = rt6_select(net, fn, oif, strict);
- if (rt->rt6i_nsiblings)
- rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict);
- if (rt == net->ipv6.ip6_null_entry) {
+ f6i = rt6_select(net, fn, oif, strict);
+ if (f6i->fib6_nsiblings)
+ f6i = rt6_multipath_select(net, f6i, fl6, oif, skb, strict);
+ if (f6i == net->ipv6.fib6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr);
if (fn)
goto redo_rt6_select;
@@ -1717,45 +1837,35 @@ redo_rt6_select:
}
}
- /*Search through exception table */
- rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
- if (rt_cache)
- rt = rt_cache;
-
- if (rt == net->ipv6.ip6_null_entry) {
+ if (f6i == net->ipv6.fib6_null_entry) {
+ rt = net->ipv6.ip6_null_entry;
rcu_read_unlock();
dst_hold(&rt->dst);
trace_fib6_table_lookup(net, rt, table, fl6);
return rt;
- } else if (rt->rt6i_flags & RTF_CACHE) {
- if (ip6_hold_safe(net, &rt, true)) {
+ }
+
+ /*Search through exception table */
+ rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
+ if (rt) {
+ if (ip6_hold_safe(net, &rt, true))
dst_use_noref(&rt->dst, jiffies);
- rt6_dst_from_metrics_check(rt);
- }
+
rcu_read_unlock();
trace_fib6_table_lookup(net, rt, table, fl6);
return rt;
} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
- !(rt->rt6i_flags & RTF_GATEWAY))) {
+ !(f6i->fib6_flags & RTF_GATEWAY))) {
/* Create a RTF_CACHE clone which will not be
* owned by the fib6 tree. It is for the special case where
* the daddr in the skb during the neighbor look-up is different
* from the fl6->daddr used to look-up route here.
*/
-
struct rt6_info *uncached_rt;
- if (ip6_hold_safe(net, &rt, true)) {
- dst_use_noref(&rt->dst, jiffies);
- } else {
- rcu_read_unlock();
- uncached_rt = rt;
- goto uncached_rt_out;
- }
- rcu_read_unlock();
+ uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
- uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
- dst_release(&rt->dst);
+ rcu_read_unlock();
if (uncached_rt) {
/* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
@@ -1768,7 +1878,6 @@ redo_rt6_select:
dst_hold(&uncached_rt->dst);
}
-uncached_rt_out:
trace_fib6_table_lookup(net, uncached_rt, table, fl6);
return uncached_rt;
@@ -1777,24 +1886,12 @@ uncached_rt_out:
struct rt6_info *pcpu_rt;
- dst_use_noref(&rt->dst, jiffies);
local_bh_disable();
- pcpu_rt = rt6_get_pcpu_route(rt);
-
- if (!pcpu_rt) {
- /* atomic_inc_not_zero() is needed when using rcu */
- if (atomic_inc_not_zero(&rt->rt6i_ref)) {
- /* No dst_hold() on rt is needed because grabbing
- * rt->rt6i_ref makes sure rt can't be released.
- */
- pcpu_rt = rt6_make_pcpu_route(rt);
- rt6_release(rt);
- } else {
- /* rt is already removed from tree */
- pcpu_rt = net->ipv6.ip6_null_entry;
- dst_hold(&pcpu_rt->dst);
- }
- }
+ pcpu_rt = rt6_get_pcpu_route(f6i);
+
+ if (!pcpu_rt)
+ pcpu_rt = rt6_make_pcpu_route(net, f6i);
+
local_bh_enable();
rcu_read_unlock();
trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
@@ -2020,7 +2117,6 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
rt->rt6i_idev = in6_dev_get(loopback_dev);
rt->rt6i_gateway = ort->rt6i_gateway;
rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
- rt->rt6i_metric = 0;
memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
#ifdef CONFIG_IPV6_SUBTREES
@@ -2036,18 +2132,27 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
* Destination cache support functions
*/
-static void rt6_dst_from_metrics_check(struct rt6_info *rt)
+static bool fib6_check(struct fib6_info *f6i, u32 cookie)
{
- if (rt->from &&
- dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(&rt->from->dst))
- dst_init_metrics(&rt->dst, dst_metrics_ptr(&rt->from->dst), true);
+ u32 rt_cookie = 0;
+
+ if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
+ return false;
+
+ if (fib6_check_expired(f6i))
+ return false;
+
+ return true;
}
-static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
+static struct dst_entry *rt6_check(struct rt6_info *rt,
+ struct fib6_info *from,
+ u32 cookie)
{
u32 rt_cookie = 0;
- if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
+ if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
+ rt_cookie != cookie)
return NULL;
if (rt6_check_expired(rt))
@@ -2056,11 +2161,13 @@ static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
return &rt->dst;
}
-static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
+static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
+ struct fib6_info *from,
+ u32 cookie)
{
if (!__rt6_check_expired(rt) &&
rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
- rt6_check(rt->from, cookie))
+ fib6_check(from, cookie))
return &rt->dst;
else
return NULL;
@@ -2068,22 +2175,30 @@ static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
{
+ struct dst_entry *dst_ret;
+ struct fib6_info *from;
struct rt6_info *rt;
- rt = (struct rt6_info *) dst;
+ rt = container_of(dst, struct rt6_info, dst);
+
+ rcu_read_lock();
/* All IPV6 dsts are created with ->obsolete set to the value
* DST_OBSOLETE_FORCE_CHK which forces validation calls down
* into this function always.
*/
- rt6_dst_from_metrics_check(rt);
+ from = rcu_dereference(rt->from);
- if (rt->rt6i_flags & RTF_PCPU ||
- (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
- return rt6_dst_from_check(rt, cookie);
+ if (from && (rt->rt6i_flags & RTF_PCPU ||
+ unlikely(!list_empty(&rt->rt6i_uncached))))
+ dst_ret = rt6_dst_from_check(rt, from, cookie);
else
- return rt6_check(rt, cookie);
+ dst_ret = rt6_check(rt, from, cookie);
+
+ rcu_read_unlock();
+
+ return dst_ret;
}
static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
@@ -2092,10 +2207,12 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
if (rt) {
if (rt->rt6i_flags & RTF_CACHE) {
+ rcu_read_lock();
if (rt6_check_expired(rt)) {
- ip6_del_rt(rt);
+ rt6_remove_exception_rt(rt);
dst = NULL;
}
+ rcu_read_unlock();
} else {
dst_release(dst);
dst = NULL;
@@ -2112,35 +2229,60 @@ static void ip6_link_failure(struct sk_buff *skb)
rt = (struct rt6_info *) skb_dst(skb);
if (rt) {
+ rcu_read_lock();
if (rt->rt6i_flags & RTF_CACHE) {
if (dst_hold_safe(&rt->dst))
- ip6_del_rt(rt);
+ rt6_remove_exception_rt(rt);
} else {
+ struct fib6_info *from;
struct fib6_node *fn;
- rcu_read_lock();
- fn = rcu_dereference(rt->rt6i_node);
- if (fn && (rt->rt6i_flags & RTF_DEFAULT))
- fn->fn_sernum = -1;
- rcu_read_unlock();
+ from = rcu_dereference(rt->from);
+ if (from) {
+ fn = rcu_dereference(from->fib6_node);
+ if (fn && (rt->rt6i_flags & RTF_DEFAULT))
+ fn->fn_sernum = -1;
+ }
}
+ rcu_read_unlock();
}
}
+static void rt6_update_expires(struct rt6_info *rt0, int timeout)
+{
+ if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
+ struct fib6_info *from;
+
+ rcu_read_lock();
+ from = rcu_dereference(rt0->from);
+ if (from)
+ rt0->dst.expires = from->expires;
+ rcu_read_unlock();
+ }
+
+ dst_set_expires(&rt0->dst, timeout);
+ rt0->rt6i_flags |= RTF_EXPIRES;
+}
+
static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
{
struct net *net = dev_net(rt->dst.dev);
+ dst_metric_set(&rt->dst, RTAX_MTU, mtu);
rt->rt6i_flags |= RTF_MODIFIED;
- rt->rt6i_pmtu = mtu;
rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
}
static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
{
+ bool from_set;
+
+ rcu_read_lock();
+ from_set = !!rcu_dereference(rt->from);
+ rcu_read_unlock();
+
return !(rt->rt6i_flags & RTF_CACHE) &&
- (rt->rt6i_flags & RTF_PCPU ||
- rcu_access_pointer(rt->rt6i_node));
+ (rt->rt6i_flags & RTF_PCPU || from_set);
}
static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
@@ -2176,14 +2318,18 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
if (rt6->rt6i_flags & RTF_CACHE)
rt6_update_exception_stamp_rt(rt6);
} else if (daddr) {
+ struct fib6_info *from;
struct rt6_info *nrt6;
- nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
+ rcu_read_lock();
+ from = rcu_dereference(rt6->from);
+ nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
if (nrt6) {
rt6_do_update_pmtu(nrt6, mtu);
- if (rt6_insert_exception(nrt6, rt6))
+ if (rt6_insert_exception(nrt6, from))
dst_release_immediate(&nrt6->dst);
}
+ rcu_read_unlock();
}
}
@@ -2264,7 +2410,8 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
int flags)
{
struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
- struct rt6_info *rt, *rt_cache;
+ struct rt6_info *ret = NULL, *rt_cache;
+ struct fib6_info *rt;
struct fib6_node *fn;
/* Get the "current" route for this destination and
@@ -2281,29 +2428,29 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
for_each_fib6_node_rt_rcu(fn) {
- if (rt->rt6i_nh_flags & RTNH_F_DEAD)
+ if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
continue;
- if (rt6_check_expired(rt))
+ if (fib6_check_expired(rt))
continue;
- if (rt->dst.error)
+ if (rt->fib6_flags & RTF_REJECT)
break;
- if (!(rt->rt6i_flags & RTF_GATEWAY))
+ if (!(rt->fib6_flags & RTF_GATEWAY))
continue;
- if (fl6->flowi6_oif != rt->dst.dev->ifindex)
+ if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
continue;
/* rt_cache's gateway might be different from its 'parent'
* in the case of an ip redirect.
* So we keep searching in the exception table if the gateway
* is different.
*/
- if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) {
+ if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
rt_cache = rt6_find_cached_rt(rt,
&fl6->daddr,
&fl6->saddr);
if (rt_cache &&
ipv6_addr_equal(&rdfl->gateway,
&rt_cache->rt6i_gateway)) {
- rt = rt_cache;
+ ret = rt_cache;
break;
}
continue;
@@ -2312,25 +2459,28 @@ restart:
}
if (!rt)
- rt = net->ipv6.ip6_null_entry;
- else if (rt->dst.error) {
- rt = net->ipv6.ip6_null_entry;
+ rt = net->ipv6.fib6_null_entry;
+ else if (rt->fib6_flags & RTF_REJECT) {
+ ret = net->ipv6.ip6_null_entry;
goto out;
}
- if (rt == net->ipv6.ip6_null_entry) {
+ if (rt == net->ipv6.fib6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr);
if (fn)
goto restart;
}
out:
- ip6_hold_safe(net, &rt, true);
+ if (ret)
+ dst_hold(&ret->dst);
+ else
+ ret = ip6_create_rt_rcu(rt);
rcu_read_unlock();
- trace_fib6_table_lookup(net, rt, table, fl6);
- return rt;
+ trace_fib6_table_lookup(net, ret, table, fl6);
+ return ret;
};
static struct dst_entry *ip6_route_redirect(struct net *net,
@@ -2422,12 +2572,8 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
static unsigned int ip6_mtu(const struct dst_entry *dst)
{
- const struct rt6_info *rt = (const struct rt6_info *)dst;
- unsigned int mtu = rt->rt6i_pmtu;
struct inet6_dev *idev;
-
- if (mtu)
- goto out;
+ unsigned int mtu;
mtu = dst_metric_raw(dst, RTAX_MTU);
if (mtu)
@@ -2511,60 +2657,22 @@ out:
return entries > rt_max_size;
}
-static int ip6_convert_metrics(struct mx6_config *mxc,
- const struct fib6_config *cfg)
+static int ip6_convert_metrics(struct net *net, struct fib6_info *rt,
+ struct fib6_config *cfg)
{
- struct net *net = cfg->fc_nlinfo.nl_net;
- bool ecn_ca = false;
- struct nlattr *nla;
- int remaining;
- u32 *mp;
+ struct dst_metrics *p;
if (!cfg->fc_mx)
return 0;
- mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
- if (unlikely(!mp))
+ p = kzalloc(sizeof(*rt->fib6_metrics), GFP_KERNEL);
+ if (unlikely(!p))
return -ENOMEM;
- nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
- int type = nla_type(nla);
- u32 val;
-
- if (!type)
- continue;
- if (unlikely(type > RTAX_MAX))
- goto err;
-
- if (type == RTAX_CC_ALGO) {
- char tmp[TCP_CA_NAME_MAX];
-
- nla_strlcpy(tmp, nla, sizeof(tmp));
- val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca);
- if (val == TCP_CA_UNSPEC)
- goto err;
- } else {
- val = nla_get_u32(nla);
- }
- if (type == RTAX_HOPLIMIT && val > 255)
- val = 255;
- if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
- goto err;
+ refcount_set(&p->refcnt, 1);
+ rt->fib6_metrics = p;
- mp[type - 1] = val;
- __set_bit(type - 1, mxc->mx_valid);
- }
-
- if (ecn_ca) {
- __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
- mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
- }
-
- mxc->mx = mp;
- return 0;
- err:
- kfree(mp);
- return -EINVAL;
+ return ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len, p->metrics);
}
static struct rt6_info *ip6_nh_lookup_table(struct net *net,
@@ -2750,11 +2858,12 @@ out:
return err;
}
-static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
+static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
+ gfp_t gfp_flags,
struct netlink_ext_ack *extack)
{
struct net *net = cfg->fc_nlinfo.nl_net;
- struct rt6_info *rt = NULL;
+ struct fib6_info *rt = NULL;
struct net_device *dev = NULL;
struct inet6_dev *idev = NULL;
struct fib6_table *table;
@@ -2773,6 +2882,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
goto out;
}
+ if (cfg->fc_type > RTN_MAX) {
+ NL_SET_ERR_MSG(extack, "Invalid route type");
+ goto out;
+ }
+
if (cfg->fc_dst_len > 128) {
NL_SET_ERR_MSG(extack, "Invalid prefix length");
goto out;
@@ -2831,35 +2945,30 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
if (!table)
goto out;
- rt = ip6_dst_alloc(net, NULL,
- (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
+ err = -ENOMEM;
+ rt = fib6_info_alloc(gfp_flags);
+ if (!rt)
+ goto out;
- if (!rt) {
- err = -ENOMEM;
+ if (cfg->fc_flags & RTF_ADDRCONF)
+ rt->dst_nocount = true;
+
+ err = ip6_convert_metrics(net, rt, cfg);
+ if (err < 0)
goto out;
- }
if (cfg->fc_flags & RTF_EXPIRES)
- rt6_set_expires(rt, jiffies +
+ fib6_set_expires(rt, jiffies +
clock_t_to_jiffies(cfg->fc_expires));
else
- rt6_clean_expires(rt);
+ fib6_clean_expires(rt);
if (cfg->fc_protocol == RTPROT_UNSPEC)
cfg->fc_protocol = RTPROT_BOOT;
- rt->rt6i_protocol = cfg->fc_protocol;
+ rt->fib6_protocol = cfg->fc_protocol;
addr_type = ipv6_addr_type(&cfg->fc_dst);
- if (addr_type & IPV6_ADDR_MULTICAST)
- rt->dst.input = ip6_mc_input;
- else if (cfg->fc_flags & RTF_LOCAL)
- rt->dst.input = ip6_input;
- else
- rt->dst.input = ip6_forward;
-
- rt->dst.output = ip6_output;
-
if (cfg->fc_encap) {
struct lwtunnel_state *lwtstate;
@@ -2868,22 +2977,23 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
&lwtstate, extack);
if (err)
goto out;
- rt->dst.lwtstate = lwtstate_get(lwtstate);
- lwtunnel_set_redirect(&rt->dst);
+ rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
}
- ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
- rt->rt6i_dst.plen = cfg->fc_dst_len;
- if (rt->rt6i_dst.plen == 128)
- rt->dst.flags |= DST_HOST;
+ ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
+ rt->fib6_dst.plen = cfg->fc_dst_len;
+ if (rt->fib6_dst.plen == 128)
+ rt->dst_host = true;
#ifdef CONFIG_IPV6_SUBTREES
- ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
- rt->rt6i_src.plen = cfg->fc_src_len;
+ ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
+ rt->fib6_src.plen = cfg->fc_src_len;
#endif
- rt->rt6i_metric = cfg->fc_metric;
- rt->rt6i_nh_weight = 1;
+ rt->fib6_metric = cfg->fc_metric;
+ rt->fib6_nh.nh_weight = 1;
+
+ rt->fib6_type = cfg->fc_type;
/* We cannot add true routes via loopback here,
they would result in kernel looping; promote them to reject routes
@@ -2906,28 +3016,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
goto out;
}
}
- rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
- switch (cfg->fc_type) {
- case RTN_BLACKHOLE:
- rt->dst.error = -EINVAL;
- rt->dst.output = dst_discard_out;
- rt->dst.input = dst_discard;
- break;
- case RTN_PROHIBIT:
- rt->dst.error = -EACCES;
- rt->dst.output = ip6_pkt_prohibit_out;
- rt->dst.input = ip6_pkt_prohibit;
- break;
- case RTN_THROW:
- case RTN_UNREACHABLE:
- default:
- rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
- : (cfg->fc_type == RTN_UNREACHABLE)
- ? -EHOSTUNREACH : -ENETUNREACH;
- rt->dst.output = ip6_pkt_discard_out;
- rt->dst.input = ip6_pkt_discard;
- break;
- }
+ rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP;
goto install_route;
}
@@ -2936,7 +3025,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
if (err)
goto out;
- rt->rt6i_gateway = cfg->fc_gateway;
+ rt->fib6_nh.nh_gw = cfg->fc_gateway;
}
err = -ENODEV;
@@ -2961,96 +3050,82 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
err = -EINVAL;
goto out;
}
- rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
- rt->rt6i_prefsrc.plen = 128;
+ rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
+ rt->fib6_prefsrc.plen = 128;
} else
- rt->rt6i_prefsrc.plen = 0;
+ rt->fib6_prefsrc.plen = 0;
- rt->rt6i_flags = cfg->fc_flags;
+ rt->fib6_flags = cfg->fc_flags;
install_route:
- if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
+ if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
!netif_carrier_ok(dev))
- rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
- rt->rt6i_nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
- rt->dst.dev = dev;
- rt->rt6i_idev = idev;
- rt->rt6i_table = table;
+ rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
+ rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
+ rt->fib6_nh.nh_dev = dev;
+ rt->fib6_table = table;
cfg->fc_nlinfo.nl_net = dev_net(dev);
+ if (idev)
+ in6_dev_put(idev);
+
return rt;
out:
if (dev)
dev_put(dev);
if (idev)
in6_dev_put(idev);
- if (rt)
- dst_release_immediate(&rt->dst);
+ fib6_info_release(rt);
return ERR_PTR(err);
}
-int ip6_route_add(struct fib6_config *cfg,
+int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
struct netlink_ext_ack *extack)
{
- struct mx6_config mxc = { .mx = NULL, };
- struct rt6_info *rt;
+ struct fib6_info *rt;
int err;
- rt = ip6_route_info_create(cfg, extack);
- if (IS_ERR(rt)) {
- err = PTR_ERR(rt);
- rt = NULL;
- goto out;
- }
+ rt = ip6_route_info_create(cfg, gfp_flags, extack);
+ if (IS_ERR(rt))
+ return PTR_ERR(rt);
- err = ip6_convert_metrics(&mxc, cfg);
- if (err)
- goto out;
-
- err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
-
- kfree(mxc.mx);
-
- return err;
-out:
- if (rt)
- dst_release_immediate(&rt->dst);
+ err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
+ fib6_info_release(rt);
return err;
}
-static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
+static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
{
- int err;
+ struct net *net = info->nl_net;
struct fib6_table *table;
- struct net *net = dev_net(rt->dst.dev);
+ int err;
- if (rt == net->ipv6.ip6_null_entry) {
+ if (rt == net->ipv6.fib6_null_entry) {
err = -ENOENT;
goto out;
}
- table = rt->rt6i_table;
+ table = rt->fib6_table;
spin_lock_bh(&table->tb6_lock);
err = fib6_del(rt, info);
spin_unlock_bh(&table->tb6_lock);
out:
- ip6_rt_put(rt);
+ fib6_info_release(rt);
return err;
}
-int ip6_del_rt(struct rt6_info *rt)
+int ip6_del_rt(struct net *net, struct fib6_info *rt)
{
- struct nl_info info = {
- .nl_net = dev_net(rt->dst.dev),
- };
+ struct nl_info info = { .nl_net = net };
+
return __ip6_del_rt(rt, &info);
}
-static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
+static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
{
struct nl_info *info = &cfg->fc_nlinfo;
struct net *net = info->nl_net;
@@ -3058,20 +3133,20 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
struct fib6_table *table;
int err = -ENOENT;
- if (rt == net->ipv6.ip6_null_entry)
+ if (rt == net->ipv6.fib6_null_entry)
goto out_put;
- table = rt->rt6i_table;
+ table = rt->fib6_table;
spin_lock_bh(&table->tb6_lock);
- if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
- struct rt6_info *sibling, *next_sibling;
+ if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
+ struct fib6_info *sibling, *next_sibling;
/* prefer to send a single notification with all hops */
skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
if (skb) {
u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
- if (rt6_fill_node(net, skb, rt,
+ if (rt6_fill_node(net, skb, rt, NULL,
NULL, NULL, 0, RTM_DELROUTE,
info->portid, seq, 0) < 0) {
kfree_skb(skb);
@@ -3081,8 +3156,8 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
}
list_for_each_entry_safe(sibling, next_sibling,
- &rt->rt6i_siblings,
- rt6i_siblings) {
+ &rt->fib6_siblings,
+ fib6_siblings) {
err = fib6_del(sibling, info);
if (err)
goto out_unlock;
@@ -3093,7 +3168,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
out_unlock:
spin_unlock_bh(&table->tb6_lock);
out_put:
- ip6_rt_put(rt);
+ fib6_info_release(rt);
if (skb) {
rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
@@ -3102,11 +3177,28 @@ out_put:
return err;
}
+static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
+{
+ int rc = -ESRCH;
+
+ if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
+ goto out;
+
+ if (cfg->fc_flags & RTF_GATEWAY &&
+ !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
+ goto out;
+ if (dst_hold_safe(&rt->dst))
+ rc = rt6_remove_exception_rt(rt);
+out:
+ return rc;
+}
+
static int ip6_route_del(struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
- struct rt6_info *rt, *rt_cache;
+ struct rt6_info *rt_cache;
struct fib6_table *table;
+ struct fib6_info *rt;
struct fib6_node *fn;
int err = -ESRCH;
@@ -3126,25 +3218,29 @@ static int ip6_route_del(struct fib6_config *cfg,
if (fn) {
for_each_fib6_node_rt_rcu(fn) {
if (cfg->fc_flags & RTF_CACHE) {
+ int rc;
+
rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
&cfg->fc_src);
- if (!rt_cache)
- continue;
- rt = rt_cache;
+ if (rt_cache) {
+ rc = ip6_del_cached_rt(rt_cache, cfg);
+ if (rc != -ESRCH)
+ return rc;
+ }
+ continue;
}
if (cfg->fc_ifindex &&
- (!rt->dst.dev ||
- rt->dst.dev->ifindex != cfg->fc_ifindex))
+ (!rt->fib6_nh.nh_dev ||
+ rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
continue;
if (cfg->fc_flags & RTF_GATEWAY &&
- !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
+ !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
continue;
- if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
+ if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
continue;
- if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
+ if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
continue;
- if (!dst_hold_safe(&rt->dst))
- break;
+ fib6_info_hold(rt);
rcu_read_unlock();
/* if gateway was specified only delete the one hop */
@@ -3166,6 +3262,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
struct ndisc_options ndopts;
struct inet6_dev *in6_dev;
struct neighbour *neigh;
+ struct fib6_info *from;
struct rd_msg *msg;
int optlen, on_link;
u8 *lladdr;
@@ -3247,7 +3344,12 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
NEIGH_UPDATE_F_ISROUTER)),
NDISC_REDIRECT, &ndopts);
- nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
+ rcu_read_lock();
+ from = rcu_dereference(rt->from);
+ fib6_info_hold(from);
+ rcu_read_unlock();
+
+ nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
if (!nrt)
goto out;
@@ -3255,14 +3357,13 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
if (on_link)
nrt->rt6i_flags &= ~RTF_GATEWAY;
- nrt->rt6i_protocol = RTPROT_REDIRECT;
nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
/* No need to remove rt from the exception table if rt is
* a cached route because rt6_insert_exception() will
* takes care of it
*/
- if (rt6_insert_exception(nrt, rt)) {
+ if (rt6_insert_exception(nrt, from)) {
dst_release_immediate(&nrt->dst);
goto out;
}
@@ -3274,47 +3375,12 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
out:
+ fib6_info_release(from);
neigh_release(neigh);
}
-/*
- * Misc support functions
- */
-
-static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
-{
- BUG_ON(from->from);
-
- rt->rt6i_flags &= ~RTF_EXPIRES;
- dst_hold(&from->dst);
- rt->from = from;
- dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
-}
-
-static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
-{
- rt->dst.input = ort->dst.input;
- rt->dst.output = ort->dst.output;
- rt->rt6i_dst = ort->rt6i_dst;
- rt->dst.error = ort->dst.error;
- rt->rt6i_idev = ort->rt6i_idev;
- if (rt->rt6i_idev)
- in6_dev_hold(rt->rt6i_idev);
- rt->dst.lastuse = jiffies;
- rt->rt6i_gateway = ort->rt6i_gateway;
- rt->rt6i_flags = ort->rt6i_flags;
- rt6_set_from(rt, ort);
- rt->rt6i_metric = ort->rt6i_metric;
-#ifdef CONFIG_IPV6_SUBTREES
- rt->rt6i_src = ort->rt6i_src;
-#endif
- rt->rt6i_prefsrc = ort->rt6i_prefsrc;
- rt->rt6i_table = ort->rt6i_table;
- rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
-}
-
#ifdef CONFIG_IPV6_ROUTE_INFO
-static struct rt6_info *rt6_get_route_info(struct net *net,
+static struct fib6_info *rt6_get_route_info(struct net *net,
const struct in6_addr *prefix, int prefixlen,
const struct in6_addr *gwaddr,
struct net_device *dev)
@@ -3322,7 +3388,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
int ifindex = dev->ifindex;
struct fib6_node *fn;
- struct rt6_info *rt = NULL;
+ struct fib6_info *rt = NULL;
struct fib6_table *table;
table = fib6_get_table(net, tb_id);
@@ -3335,13 +3401,13 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
goto out;
for_each_fib6_node_rt_rcu(fn) {
- if (rt->dst.dev->ifindex != ifindex)
+ if (rt->fib6_nh.nh_dev->ifindex != ifindex)
continue;
- if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
+ if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
continue;
- if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
+ if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
continue;
- ip6_hold_safe(NULL, &rt, false);
+ fib6_info_hold(rt);
break;
}
out:
@@ -3349,7 +3415,7 @@ out:
return rt;
}
-static struct rt6_info *rt6_add_route_info(struct net *net,
+static struct fib6_info *rt6_add_route_info(struct net *net,
const struct in6_addr *prefix, int prefixlen,
const struct in6_addr *gwaddr,
struct net_device *dev,
@@ -3362,6 +3428,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
.fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
RTF_UP | RTF_PREF(pref),
.fc_protocol = RTPROT_RA,
+ .fc_type = RTN_UNICAST,
.fc_nlinfo.portid = 0,
.fc_nlinfo.nlh = NULL,
.fc_nlinfo.nl_net = net,
@@ -3375,36 +3442,39 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
if (!prefixlen)
cfg.fc_flags |= RTF_DEFAULT;
- ip6_route_add(&cfg, NULL);
+ ip6_route_add(&cfg, GFP_ATOMIC, NULL);
return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
}
#endif
-struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
+struct fib6_info *rt6_get_dflt_router(struct net *net,
+ const struct in6_addr *addr,
+ struct net_device *dev)
{
u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
- struct rt6_info *rt;
+ struct fib6_info *rt;
struct fib6_table *table;
- table = fib6_get_table(dev_net(dev), tb_id);
+ table = fib6_get_table(net, tb_id);
if (!table)
return NULL;
rcu_read_lock();
for_each_fib6_node_rt_rcu(&table->tb6_root) {
- if (dev == rt->dst.dev &&
- ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
- ipv6_addr_equal(&rt->rt6i_gateway, addr))
+ if (dev == rt->fib6_nh.nh_dev &&
+ ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
+ ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
break;
}
if (rt)
- ip6_hold_safe(NULL, &rt, false);
+ fib6_info_hold(rt);
rcu_read_unlock();
return rt;
}
-struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
+struct fib6_info *rt6_add_dflt_router(struct net *net,
+ const struct in6_addr *gwaddr,
struct net_device *dev,
unsigned int pref)
{
@@ -3415,14 +3485,15 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
.fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
.fc_protocol = RTPROT_RA,
+ .fc_type = RTN_UNICAST,
.fc_nlinfo.portid = 0,
.fc_nlinfo.nlh = NULL,
- .fc_nlinfo.nl_net = dev_net(dev),
+ .fc_nlinfo.nl_net = net,
};
cfg.fc_gateway = *gwaddr;
- if (!ip6_route_add(&cfg, NULL)) {
+ if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
struct fib6_table *table;
table = fib6_get_table(dev_net(dev), cfg.fc_table);
@@ -3430,24 +3501,25 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
}
- return rt6_get_dflt_router(gwaddr, dev);
+ return rt6_get_dflt_router(net, gwaddr, dev);
}
-static void __rt6_purge_dflt_routers(struct fib6_table *table)
+static void __rt6_purge_dflt_routers(struct net *net,
+ struct fib6_table *table)
{
- struct rt6_info *rt;
+ struct fib6_info *rt;
restart:
rcu_read_lock();
for_each_fib6_node_rt_rcu(&table->tb6_root) {
- if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
- (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
- if (dst_hold_safe(&rt->dst)) {
- rcu_read_unlock();
- ip6_del_rt(rt);
- } else {
- rcu_read_unlock();
- }
+ struct net_device *dev = fib6_info_nh_dev(rt);
+ struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
+
+ if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
+ (!idev || idev->cnf.accept_ra != 2)) {
+ fib6_info_hold(rt);
+ rcu_read_unlock();
+ ip6_del_rt(net, rt);
goto restart;
}
}
@@ -3468,7 +3540,7 @@ void rt6_purge_dflt_routers(struct net *net)
head = &net->ipv6.fib_table_hash[h];
hlist_for_each_entry_rcu(table, head, tb6_hlist) {
if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
- __rt6_purge_dflt_routers(table);
+ __rt6_purge_dflt_routers(net, table);
}
}
@@ -3489,6 +3561,7 @@ static void rtmsg_to_fib6_config(struct net *net,
cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
cfg->fc_src_len = rtmsg->rtmsg_src_len;
cfg->fc_flags = rtmsg->rtmsg_flags;
+ cfg->fc_type = rtmsg->rtmsg_type;
cfg->fc_nlinfo.nl_net = net;
@@ -3518,7 +3591,7 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
rtnl_lock();
switch (cmd) {
case SIOCADDRT:
- err = ip6_route_add(&cfg, NULL);
+ err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
break;
case SIOCDELRT:
err = ip6_route_del(&cfg, NULL);
@@ -3546,7 +3619,8 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
case IPSTATS_MIB_INNOROUTES:
type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
if (type == IPV6_ADDR_ANY) {
- IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
+ IP6_INC_STATS(dev_net(dst->dev),
+ __in6_dev_get_safely(skb->dev),
IPSTATS_MIB_INADDRERRORS);
break;
}
@@ -3587,40 +3661,40 @@ static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff
* Allocate a dst for local (unicast / anycast) address.
*/
-struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
- const struct in6_addr *addr,
- bool anycast)
+struct fib6_info *addrconf_f6i_alloc(struct net *net,
+ struct inet6_dev *idev,
+ const struct in6_addr *addr,
+ bool anycast, gfp_t gfp_flags)
{
u32 tb_id;
- struct net *net = dev_net(idev->dev);
struct net_device *dev = idev->dev;
- struct rt6_info *rt;
+ struct fib6_info *f6i;
- rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
- if (!rt)
+ f6i = fib6_info_alloc(gfp_flags);
+ if (!f6i)
return ERR_PTR(-ENOMEM);
- in6_dev_hold(idev);
-
- rt->dst.flags |= DST_HOST;
- rt->dst.input = ip6_input;
- rt->dst.output = ip6_output;
- rt->rt6i_idev = idev;
-
- rt->rt6i_protocol = RTPROT_KERNEL;
- rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
- if (anycast)
- rt->rt6i_flags |= RTF_ANYCAST;
- else
- rt->rt6i_flags |= RTF_LOCAL;
+ f6i->dst_nocount = true;
+ f6i->dst_host = true;
+ f6i->fib6_protocol = RTPROT_KERNEL;
+ f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP;
+ if (anycast) {
+ f6i->fib6_type = RTN_ANYCAST;
+ f6i->fib6_flags |= RTF_ANYCAST;
+ } else {
+ f6i->fib6_type = RTN_LOCAL;
+ f6i->fib6_flags |= RTF_LOCAL;
+ }
- rt->rt6i_gateway = *addr;
- rt->rt6i_dst.addr = *addr;
- rt->rt6i_dst.plen = 128;
+ f6i->fib6_nh.nh_gw = *addr;
+ dev_hold(dev);
+ f6i->fib6_nh.nh_dev = dev;
+ f6i->fib6_dst.addr = *addr;
+ f6i->fib6_dst.plen = 128;
tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
- rt->rt6i_table = fib6_get_table(net, tb_id);
+ f6i->fib6_table = fib6_get_table(net, tb_id);
- return rt;
+ return f6i;
}
/* remove deleted ip from prefsrc entries */
@@ -3630,18 +3704,18 @@ struct arg_dev_net_ip {
struct in6_addr *addr;
};
-static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
+static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
{
struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
struct net *net = ((struct arg_dev_net_ip *)arg)->net;
struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
- if (((void *)rt->dst.dev == dev || !dev) &&
- rt != net->ipv6.ip6_null_entry &&
- ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
+ if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
+ rt != net->ipv6.fib6_null_entry &&
+ ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
spin_lock_bh(&rt6_exception_lock);
/* remove prefsrc entry */
- rt->rt6i_prefsrc.plen = 0;
+ rt->fib6_prefsrc.plen = 0;
/* need to update cache as well */
rt6_exceptions_remove_prefsrc(rt);
spin_unlock_bh(&rt6_exception_lock);
@@ -3663,12 +3737,12 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
/* Remove routers and update dst entries when gateway turn into host. */
-static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
+static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
{
struct in6_addr *gateway = (struct in6_addr *)arg;
- if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
- ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
+ if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
+ ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
return -1;
}
@@ -3694,85 +3768,85 @@ struct arg_netdev_event {
};
};
-static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt)
+static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
{
- struct rt6_info *iter;
+ struct fib6_info *iter;
struct fib6_node *fn;
- fn = rcu_dereference_protected(rt->rt6i_node,
- lockdep_is_held(&rt->rt6i_table->tb6_lock));
+ fn = rcu_dereference_protected(rt->fib6_node,
+ lockdep_is_held(&rt->fib6_table->tb6_lock));
iter = rcu_dereference_protected(fn->leaf,
- lockdep_is_held(&rt->rt6i_table->tb6_lock));
+ lockdep_is_held(&rt->fib6_table->tb6_lock));
while (iter) {
- if (iter->rt6i_metric == rt->rt6i_metric &&
+ if (iter->fib6_metric == rt->fib6_metric &&
rt6_qualify_for_ecmp(iter))
return iter;
iter = rcu_dereference_protected(iter->rt6_next,
- lockdep_is_held(&rt->rt6i_table->tb6_lock));
+ lockdep_is_held(&rt->fib6_table->tb6_lock));
}
return NULL;
}
-static bool rt6_is_dead(const struct rt6_info *rt)
+static bool rt6_is_dead(const struct fib6_info *rt)
{
- if (rt->rt6i_nh_flags & RTNH_F_DEAD ||
- (rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
- rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
+ if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
+ (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
+ fib6_ignore_linkdown(rt)))
return true;
return false;
}
-static int rt6_multipath_total_weight(const struct rt6_info *rt)
+static int rt6_multipath_total_weight(const struct fib6_info *rt)
{
- struct rt6_info *iter;
+ struct fib6_info *iter;
int total = 0;
if (!rt6_is_dead(rt))
- total += rt->rt6i_nh_weight;
+ total += rt->fib6_nh.nh_weight;
- list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) {
+ list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
if (!rt6_is_dead(iter))
- total += iter->rt6i_nh_weight;
+ total += iter->fib6_nh.nh_weight;
}
return total;
}
-static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
+static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
{
int upper_bound = -1;
if (!rt6_is_dead(rt)) {
- *weight += rt->rt6i_nh_weight;
+ *weight += rt->fib6_nh.nh_weight;
upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
total) - 1;
}
- atomic_set(&rt->rt6i_nh_upper_bound, upper_bound);
+ atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
}
-static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total)
+static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
{
- struct rt6_info *iter;
+ struct fib6_info *iter;
int weight = 0;
rt6_upper_bound_set(rt, &weight, total);
- list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
+ list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
rt6_upper_bound_set(iter, &weight, total);
}
-void rt6_multipath_rebalance(struct rt6_info *rt)
+void rt6_multipath_rebalance(struct fib6_info *rt)
{
- struct rt6_info *first;
+ struct fib6_info *first;
int total;
/* In case the entire multipath route was marked for flushing,
* then there is no need to rebalance upon the removal of every
* sibling route.
*/
- if (!rt->rt6i_nsiblings || rt->should_flush)
+ if (!rt->fib6_nsiblings || rt->should_flush)
return;
/* During lookup routes are evaluated in order, so we need to
@@ -3787,14 +3861,14 @@ void rt6_multipath_rebalance(struct rt6_info *rt)
rt6_multipath_upper_bound_set(first, total);
}
-static int fib6_ifup(struct rt6_info *rt, void *p_arg)
+static int fib6_ifup(struct fib6_info *rt, void *p_arg)
{
const struct arg_netdev_event *arg = p_arg;
- const struct net *net = dev_net(arg->dev);
+ struct net *net = dev_net(arg->dev);
- if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) {
- rt->rt6i_nh_flags &= ~arg->nh_flags;
- fib6_update_sernum_upto_root(dev_net(rt->dst.dev), rt);
+ if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
+ rt->fib6_nh.nh_flags &= ~arg->nh_flags;
+ fib6_update_sernum_upto_root(net, rt);
rt6_multipath_rebalance(rt);
}
@@ -3816,95 +3890,96 @@ void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
}
-static bool rt6_multipath_uses_dev(const struct rt6_info *rt,
+static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
const struct net_device *dev)
{
- struct rt6_info *iter;
+ struct fib6_info *iter;
- if (rt->dst.dev == dev)
+ if (rt->fib6_nh.nh_dev == dev)
return true;
- list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
- if (iter->dst.dev == dev)
+ list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
+ if (iter->fib6_nh.nh_dev == dev)
return true;
return false;
}
-static void rt6_multipath_flush(struct rt6_info *rt)
+static void rt6_multipath_flush(struct fib6_info *rt)
{
- struct rt6_info *iter;
+ struct fib6_info *iter;
rt->should_flush = 1;
- list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
+ list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
iter->should_flush = 1;
}
-static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt,
+static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
const struct net_device *down_dev)
{
- struct rt6_info *iter;
+ struct fib6_info *iter;
unsigned int dead = 0;
- if (rt->dst.dev == down_dev || rt->rt6i_nh_flags & RTNH_F_DEAD)
+ if (rt->fib6_nh.nh_dev == down_dev ||
+ rt->fib6_nh.nh_flags & RTNH_F_DEAD)
dead++;
- list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
- if (iter->dst.dev == down_dev ||
- iter->rt6i_nh_flags & RTNH_F_DEAD)
+ list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
+ if (iter->fib6_nh.nh_dev == down_dev ||
+ iter->fib6_nh.nh_flags & RTNH_F_DEAD)
dead++;
return dead;
}
-static void rt6_multipath_nh_flags_set(struct rt6_info *rt,
+static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
const struct net_device *dev,
unsigned int nh_flags)
{
- struct rt6_info *iter;
+ struct fib6_info *iter;
- if (rt->dst.dev == dev)
- rt->rt6i_nh_flags |= nh_flags;
- list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
- if (iter->dst.dev == dev)
- iter->rt6i_nh_flags |= nh_flags;
+ if (rt->fib6_nh.nh_dev == dev)
+ rt->fib6_nh.nh_flags |= nh_flags;
+ list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
+ if (iter->fib6_nh.nh_dev == dev)
+ iter->fib6_nh.nh_flags |= nh_flags;
}
/* called with write lock held for table with rt */
-static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
+static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
{
const struct arg_netdev_event *arg = p_arg;
const struct net_device *dev = arg->dev;
- const struct net *net = dev_net(dev);
+ struct net *net = dev_net(dev);
- if (rt == net->ipv6.ip6_null_entry)
+ if (rt == net->ipv6.fib6_null_entry)
return 0;
switch (arg->event) {
case NETDEV_UNREGISTER:
- return rt->dst.dev == dev ? -1 : 0;
+ return rt->fib6_nh.nh_dev == dev ? -1 : 0;
case NETDEV_DOWN:
if (rt->should_flush)
return -1;
- if (!rt->rt6i_nsiblings)
- return rt->dst.dev == dev ? -1 : 0;
+ if (!rt->fib6_nsiblings)
+ return rt->fib6_nh.nh_dev == dev ? -1 : 0;
if (rt6_multipath_uses_dev(rt, dev)) {
unsigned int count;
count = rt6_multipath_dead_count(rt, dev);
- if (rt->rt6i_nsiblings + 1 == count) {
+ if (rt->fib6_nsiblings + 1 == count) {
rt6_multipath_flush(rt);
return -1;
}
rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
RTNH_F_LINKDOWN);
- fib6_update_sernum(rt);
+ fib6_update_sernum(net, rt);
rt6_multipath_rebalance(rt);
}
return -2;
case NETDEV_CHANGE:
- if (rt->dst.dev != dev ||
- rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
+ if (rt->fib6_nh.nh_dev != dev ||
+ rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
break;
- rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
+ rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
rt6_multipath_rebalance(rt);
break;
}
@@ -3936,7 +4011,7 @@ struct rt6_mtu_change_arg {
unsigned int mtu;
};
-static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
+static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
{
struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
struct inet6_dev *idev;
@@ -3956,12 +4031,15 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
Since RFC 1981 doesn't include administrative MTU increase
update PMTU increase is a MUST. (i.e. jumbo frame)
*/
- if (rt->dst.dev == arg->dev &&
- !dst_metric_locked(&rt->dst, RTAX_MTU)) {
+ if (rt->fib6_nh.nh_dev == arg->dev &&
+ !fib6_metric_locked(rt, RTAX_MTU)) {
+ u32 mtu = rt->fib6_pmtu;
+
+ if (mtu >= arg->mtu ||
+ (mtu < arg->mtu && mtu == idev->cnf.mtu6))
+ fib6_metric_set(rt, RTAX_MTU, arg->mtu);
+
spin_lock_bh(&rt6_exception_lock);
- if (dst_metric_raw(&rt->dst, RTAX_MTU) &&
- rt6_mtu_change_route_allowed(idev, rt, arg->mtu))
- dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
spin_unlock_bh(&rt6_exception_lock);
}
@@ -4122,9 +4200,8 @@ errout:
}
struct rt6_nh {
- struct rt6_info *rt6_info;
+ struct fib6_info *fib6_info;
struct fib6_config r_cfg;
- struct mx6_config mxc;
struct list_head next;
};
@@ -4139,23 +4216,25 @@ static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
}
}
-static int ip6_route_info_append(struct list_head *rt6_nh_list,
- struct rt6_info *rt, struct fib6_config *r_cfg)
+static int ip6_route_info_append(struct net *net,
+ struct list_head *rt6_nh_list,
+ struct fib6_info *rt,
+ struct fib6_config *r_cfg)
{
struct rt6_nh *nh;
int err = -EEXIST;
list_for_each_entry(nh, rt6_nh_list, next) {
- /* check if rt6_info already exists */
- if (rt6_duplicate_nexthop(nh->rt6_info, rt))
+ /* check if fib6_info already exists */
+ if (rt6_duplicate_nexthop(nh->fib6_info, rt))
return err;
}
nh = kzalloc(sizeof(*nh), GFP_KERNEL);
if (!nh)
return -ENOMEM;
- nh->rt6_info = rt;
- err = ip6_convert_metrics(&nh->mxc, r_cfg);
+ nh->fib6_info = rt;
+ err = ip6_convert_metrics(net, rt, r_cfg);
if (err) {
kfree(nh);
return err;
@@ -4166,8 +4245,8 @@ static int ip6_route_info_append(struct list_head *rt6_nh_list,
return 0;
}
-static void ip6_route_mpath_notify(struct rt6_info *rt,
- struct rt6_info *rt_last,
+static void ip6_route_mpath_notify(struct fib6_info *rt,
+ struct fib6_info *rt_last,
struct nl_info *info,
__u16 nlflags)
{
@@ -4177,10 +4256,10 @@ static void ip6_route_mpath_notify(struct rt6_info *rt,
* nexthop. Since sibling routes are always added at the end of
* the list, find the first sibling of the last route appended
*/
- if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
- rt = list_first_entry(&rt_last->rt6i_siblings,
- struct rt6_info,
- rt6i_siblings);
+ if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
+ rt = list_first_entry(&rt_last->fib6_siblings,
+ struct fib6_info,
+ fib6_siblings);
}
if (rt)
@@ -4190,11 +4269,11 @@ static void ip6_route_mpath_notify(struct rt6_info *rt,
static int ip6_route_multipath_add(struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
- struct rt6_info *rt_notif = NULL, *rt_last = NULL;
+ struct fib6_info *rt_notif = NULL, *rt_last = NULL;
struct nl_info *info = &cfg->fc_nlinfo;
struct fib6_config r_cfg;
struct rtnexthop *rtnh;
- struct rt6_info *rt;
+ struct fib6_info *rt;
struct rt6_nh *err_nh;
struct rt6_nh *nh, *nh_safe;
__u16 nlflags;
@@ -4214,7 +4293,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
rtnh = (struct rtnexthop *)cfg->fc_mp;
/* Parse a Multipath Entry and build a list (rt6_nh_list) of
- * rt6_info structs per nexthop
+ * fib6_info structs per nexthop
*/
while (rtnh_ok(rtnh, remaining)) {
memcpy(&r_cfg, cfg, sizeof(*cfg));
@@ -4237,18 +4316,19 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
}
r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
- rt = ip6_route_info_create(&r_cfg, extack);
+ rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
rt = NULL;
goto cleanup;
}
- rt->rt6i_nh_weight = rtnh->rtnh_hops + 1;
+ rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
- err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
+ err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
+ rt, &r_cfg);
if (err) {
- dst_release_immediate(&rt->dst);
+ fib6_info_release(rt);
goto cleanup;
}
@@ -4263,14 +4343,16 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
err_nh = NULL;
list_for_each_entry(nh, &rt6_nh_list, next) {
- rt_last = nh->rt6_info;
- err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
+ rt_last = nh->fib6_info;
+ err = __ip6_ins_rt(nh->fib6_info, info, extack);
+ fib6_info_release(nh->fib6_info);
+
/* save reference to first route for notification */
if (!rt_notif && !err)
- rt_notif = nh->rt6_info;
+ rt_notif = nh->fib6_info;
- /* nh->rt6_info is used or freed at this point, reset to NULL*/
- nh->rt6_info = NULL;
+ /* nh->fib6_info is used or freed at this point, reset to NULL*/
+ nh->fib6_info = NULL;
if (err) {
if (replace && nhn)
ip6_print_replace_route_err(&rt6_nh_list);
@@ -4311,9 +4393,8 @@ add_errout:
cleanup:
list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
- if (nh->rt6_info)
- dst_release_immediate(&nh->rt6_info->dst);
- kfree(nh->mxc.mx);
+ if (nh->fib6_info)
+ fib6_info_release(nh->fib6_info);
list_del(&nh->next);
kfree(nh);
}
@@ -4390,20 +4471,20 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
if (cfg.fc_mp)
return ip6_route_multipath_add(&cfg, extack);
else
- return ip6_route_add(&cfg, extack);
+ return ip6_route_add(&cfg, GFP_KERNEL, extack);
}
-static size_t rt6_nlmsg_size(struct rt6_info *rt)
+static size_t rt6_nlmsg_size(struct fib6_info *rt)
{
int nexthop_len = 0;
- if (rt->rt6i_nsiblings) {
+ if (rt->fib6_nsiblings) {
nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
+ NLA_ALIGN(sizeof(struct rtnexthop))
+ nla_total_size(16) /* RTA_GATEWAY */
- + lwtunnel_get_encap_size(rt->dst.lwtstate);
+ + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
- nexthop_len *= rt->rt6i_nsiblings;
+ nexthop_len *= rt->fib6_nsiblings;
}
return NLMSG_ALIGN(sizeof(struct rtmsg))
@@ -4419,38 +4500,41 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt)
+ nla_total_size(sizeof(struct rta_cacheinfo))
+ nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
+ nla_total_size(1) /* RTA_PREF */
- + lwtunnel_get_encap_size(rt->dst.lwtstate)
+ + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
+ nexthop_len;
}
-static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
+static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,
unsigned int *flags, bool skip_oif)
{
- if (rt->rt6i_nh_flags & RTNH_F_DEAD)
+ if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
*flags |= RTNH_F_DEAD;
- if (rt->rt6i_nh_flags & RTNH_F_LINKDOWN) {
+ if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
*flags |= RTNH_F_LINKDOWN;
- if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
+
+ rcu_read_lock();
+ if (fib6_ignore_linkdown(rt))
*flags |= RTNH_F_DEAD;
+ rcu_read_unlock();
}
- if (rt->rt6i_flags & RTF_GATEWAY) {
- if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
+ if (rt->fib6_flags & RTF_GATEWAY) {
+ if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
goto nla_put_failure;
}
- *flags |= (rt->rt6i_nh_flags & RTNH_F_ONLINK);
- if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
+ *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
+ if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
*flags |= RTNH_F_OFFLOAD;
/* not needed for multipath encoding b/c it has a rtnexthop struct */
- if (!skip_oif && rt->dst.dev &&
- nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
+ if (!skip_oif && rt->fib6_nh.nh_dev &&
+ nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
goto nla_put_failure;
- if (rt->dst.lwtstate &&
- lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
+ if (rt->fib6_nh.nh_lwtstate &&
+ lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
goto nla_put_failure;
return 0;
@@ -4460,8 +4544,9 @@ nla_put_failure:
}
/* add multipath next hop */
-static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
+static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt)
{
+ const struct net_device *dev = rt->fib6_nh.nh_dev;
struct rtnexthop *rtnh;
unsigned int flags = 0;
@@ -4469,8 +4554,8 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
if (!rtnh)
goto nla_put_failure;
- rtnh->rtnh_hops = rt->rt6i_nh_weight - 1;
- rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
+ rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
+ rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
goto nla_put_failure;
@@ -4486,16 +4571,16 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int rt6_fill_node(struct net *net,
- struct sk_buff *skb, struct rt6_info *rt,
- struct in6_addr *dst, struct in6_addr *src,
+static int rt6_fill_node(struct net *net, struct sk_buff *skb,
+ struct fib6_info *rt, struct dst_entry *dst,
+ struct in6_addr *dest, struct in6_addr *src,
int iif, int type, u32 portid, u32 seq,
unsigned int flags)
{
- u32 metrics[RTAX_MAX];
struct rtmsg *rtm;
struct nlmsghdr *nlh;
- long expires;
+ long expires = 0;
+ u32 *pmetrics;
u32 table;
nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
@@ -4504,53 +4589,31 @@ static int rt6_fill_node(struct net *net,
rtm = nlmsg_data(nlh);
rtm->rtm_family = AF_INET6;
- rtm->rtm_dst_len = rt->rt6i_dst.plen;
- rtm->rtm_src_len = rt->rt6i_src.plen;
+ rtm->rtm_dst_len = rt->fib6_dst.plen;
+ rtm->rtm_src_len = rt->fib6_src.plen;
rtm->rtm_tos = 0;
- if (rt->rt6i_table)
- table = rt->rt6i_table->tb6_id;
+ if (rt->fib6_table)
+ table = rt->fib6_table->tb6_id;
else
table = RT6_TABLE_UNSPEC;
rtm->rtm_table = table;
if (nla_put_u32(skb, RTA_TABLE, table))
goto nla_put_failure;
- if (rt->rt6i_flags & RTF_REJECT) {
- switch (rt->dst.error) {
- case -EINVAL:
- rtm->rtm_type = RTN_BLACKHOLE;
- break;
- case -EACCES:
- rtm->rtm_type = RTN_PROHIBIT;
- break;
- case -EAGAIN:
- rtm->rtm_type = RTN_THROW;
- break;
- default:
- rtm->rtm_type = RTN_UNREACHABLE;
- break;
- }
- }
- else if (rt->rt6i_flags & RTF_LOCAL)
- rtm->rtm_type = RTN_LOCAL;
- else if (rt->rt6i_flags & RTF_ANYCAST)
- rtm->rtm_type = RTN_ANYCAST;
- else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
- rtm->rtm_type = RTN_LOCAL;
- else
- rtm->rtm_type = RTN_UNICAST;
+
+ rtm->rtm_type = rt->fib6_type;
rtm->rtm_flags = 0;
rtm->rtm_scope = RT_SCOPE_UNIVERSE;
- rtm->rtm_protocol = rt->rt6i_protocol;
+ rtm->rtm_protocol = rt->fib6_protocol;
- if (rt->rt6i_flags & RTF_CACHE)
+ if (rt->fib6_flags & RTF_CACHE)
rtm->rtm_flags |= RTM_F_CLONED;
- if (dst) {
- if (nla_put_in6_addr(skb, RTA_DST, dst))
+ if (dest) {
+ if (nla_put_in6_addr(skb, RTA_DST, dest))
goto nla_put_failure;
rtm->rtm_dst_len = 128;
} else if (rtm->rtm_dst_len)
- if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
+ if (nla_put_in6_addr(skb, RTA_DST, &rt->fib6_dst.addr))
goto nla_put_failure;
#ifdef CONFIG_IPV6_SUBTREES
if (src) {
@@ -4558,12 +4621,12 @@ static int rt6_fill_node(struct net *net,
goto nla_put_failure;
rtm->rtm_src_len = 128;
} else if (rtm->rtm_src_len &&
- nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
+ nla_put_in6_addr(skb, RTA_SRC, &rt->fib6_src.addr))
goto nla_put_failure;
#endif
if (iif) {
#ifdef CONFIG_IPV6_MROUTE
- if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
+ if (ipv6_addr_is_multicast(&rt->fib6_dst.addr)) {
int err = ip6mr_get_route(net, skb, rtm, portid);
if (err == 0)
@@ -4574,34 +4637,32 @@ static int rt6_fill_node(struct net *net,
#endif
if (nla_put_u32(skb, RTA_IIF, iif))
goto nla_put_failure;
- } else if (dst) {
+ } else if (dest) {
struct in6_addr saddr_buf;
- if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
+ if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
goto nla_put_failure;
}
- if (rt->rt6i_prefsrc.plen) {
+ if (rt->fib6_prefsrc.plen) {
struct in6_addr saddr_buf;
- saddr_buf = rt->rt6i_prefsrc.addr;
+ saddr_buf = rt->fib6_prefsrc.addr;
if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
goto nla_put_failure;
}
- memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
- if (rt->rt6i_pmtu)
- metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
- if (rtnetlink_put_metrics(skb, metrics) < 0)
+ pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
+ if (rtnetlink_put_metrics(skb, pmetrics) < 0)
goto nla_put_failure;
- if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
+ if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
goto nla_put_failure;
/* For multipath routes, walk the siblings list and add
* each as a nexthop within RTA_MULTIPATH.
*/
- if (rt->rt6i_nsiblings) {
- struct rt6_info *sibling, *next_sibling;
+ if (rt->fib6_nsiblings) {
+ struct fib6_info *sibling, *next_sibling;
struct nlattr *mp;
mp = nla_nest_start(skb, RTA_MULTIPATH);
@@ -4612,7 +4673,7 @@ static int rt6_fill_node(struct net *net,
goto nla_put_failure;
list_for_each_entry_safe(sibling, next_sibling,
- &rt->rt6i_siblings, rt6i_siblings) {
+ &rt->fib6_siblings, fib6_siblings) {
if (rt6_add_nexthop(skb, sibling) < 0)
goto nla_put_failure;
}
@@ -4623,12 +4684,15 @@ static int rt6_fill_node(struct net *net,
goto nla_put_failure;
}
- expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
+ if (rt->fib6_flags & RTF_EXPIRES) {
+ expires = dst ? dst->expires : rt->expires;
+ expires -= jiffies;
+ }
- if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
+ if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
goto nla_put_failure;
- if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
+ if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->fib6_flags)))
goto nla_put_failure;
@@ -4640,12 +4704,12 @@ nla_put_failure:
return -EMSGSIZE;
}
-int rt6_dump_route(struct rt6_info *rt, void *p_arg)
+int rt6_dump_route(struct fib6_info *rt, void *p_arg)
{
struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
struct net *net = arg->net;
- if (rt == net->ipv6.ip6_null_entry)
+ if (rt == net->ipv6.fib6_null_entry)
return 0;
if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
@@ -4653,16 +4717,15 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
/* user wants prefix routes only */
if (rtm->rtm_flags & RTM_F_PREFIX &&
- !(rt->rt6i_flags & RTF_PREFIX_RT)) {
+ !(rt->fib6_flags & RTF_PREFIX_RT)) {
/* success since this is not a prefix route */
return 1;
}
}
- return rt6_fill_node(net,
- arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
- NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
+ return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
+ RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
+ arg->cb->nlh->nlmsg_seq, NLM_F_MULTI);
}
static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
@@ -4671,6 +4734,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct net *net = sock_net(in_skb->sk);
struct nlattr *tb[RTA_MAX+1];
int err, iif = 0, oif = 0;
+ struct fib6_info *from;
struct dst_entry *dst;
struct rt6_info *rt;
struct sk_buff *skb;
@@ -4759,14 +4823,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
goto errout;
}
- if (fibmatch && rt->from) {
- struct rt6_info *ort = rt->from;
-
- dst_hold(&ort->dst);
- ip6_rt_put(rt);
- rt = ort;
- }
-
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb) {
ip6_rt_put(rt);
@@ -4775,14 +4831,21 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
}
skb_dst_set(skb, &rt->dst);
+
+ rcu_read_lock();
+ from = rcu_dereference(rt->from);
+
if (fibmatch)
- err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
+ err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq, 0);
else
- err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
- RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
- nlh->nlmsg_seq, 0);
+ err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
+ &fl6.saddr, iif, RTM_NEWROUTE,
+ NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
+ 0);
+ rcu_read_unlock();
+
if (err < 0) {
kfree_skb(skb);
goto errout;
@@ -4793,7 +4856,7 @@ errout:
return err;
}
-void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
+void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
unsigned int nlm_flags)
{
struct sk_buff *skb;
@@ -4808,8 +4871,8 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
if (!skb)
goto errout;
- err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
- event, info->portid, seq, nlm_flags);
+ err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
+ event, info->portid, seq, nlm_flags);
if (err < 0) {
/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
@@ -4834,6 +4897,7 @@ static int ip6_route_dev_notify(struct notifier_block *this,
return NOTIFY_OK;
if (event == NETDEV_REGISTER) {
+ net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
net->ipv6.ip6_null_entry->dst.dev = dev;
net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
@@ -5030,11 +5094,17 @@ static int __net_init ip6_route_net_init(struct net *net)
if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
goto out_ip6_dst_ops;
+ net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
+ sizeof(*net->ipv6.fib6_null_entry),
+ GFP_KERNEL);
+ if (!net->ipv6.fib6_null_entry)
+ goto out_ip6_dst_entries;
+
net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
sizeof(*net->ipv6.ip6_null_entry),
GFP_KERNEL);
if (!net->ipv6.ip6_null_entry)
- goto out_ip6_dst_entries;
+ goto out_fib6_null_entry;
net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
ip6_template_metrics, true);
@@ -5081,6 +5151,8 @@ out_ip6_prohibit_entry:
out_ip6_null_entry:
kfree(net->ipv6.ip6_null_entry);
#endif
+out_fib6_null_entry:
+ kfree(net->ipv6.fib6_null_entry);
out_ip6_dst_entries:
dst_entries_destroy(&net->ipv6.ip6_dst_ops);
out_ip6_dst_ops:
@@ -5089,6 +5161,7 @@ out_ip6_dst_ops:
static void __net_exit ip6_route_net_exit(struct net *net)
{
+ kfree(net->ipv6.fib6_null_entry);
kfree(net->ipv6.ip6_null_entry);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
kfree(net->ipv6.ip6_prohibit_entry);
@@ -5159,6 +5232,7 @@ void __init ip6_route_init_special_entries(void)
/* Registering of the loopback is done before this portion of code,
* the loopback reference in rt6_info will not be taken, do it
* manually for init_net */
+ init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 5fe139484919..eab39bd91548 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -91,6 +91,24 @@ static void set_tun_src(struct net *net, struct net_device *dev,
rcu_read_unlock();
}
+/* Compute flowlabel for outer IPv6 header */
+static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb,
+ struct ipv6hdr *inner_hdr)
+{
+ int do_flowlabel = net->ipv6.sysctl.seg6_flowlabel;
+ __be32 flowlabel = 0;
+ u32 hash;
+
+ if (do_flowlabel > 0) {
+ hash = skb_get_hash(skb);
+ rol32(hash, 16);
+ flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK;
+ } else if (!do_flowlabel && skb->protocol == htons(ETH_P_IPV6)) {
+ flowlabel = ip6_flowlabel(inner_hdr);
+ }
+ return flowlabel;
+}
+
/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
{
@@ -99,6 +117,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
struct ipv6hdr *hdr, *inner_hdr;
struct ipv6_sr_hdr *isrh;
int hdrlen, tot_len, err;
+ __be32 flowlabel;
hdrlen = (osrh->hdrlen + 1) << 3;
tot_len = hdrlen + sizeof(*hdr);
@@ -108,6 +127,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
return err;
inner_hdr = ipv6_hdr(skb);
+ flowlabel = seg6_make_flowlabel(net, skb, inner_hdr);
skb_push(skb, tot_len);
skb_reset_network_header(skb);
@@ -121,10 +141,10 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
if (skb->protocol == htons(ETH_P_IPV6)) {
ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
- ip6_flowlabel(inner_hdr));
+ flowlabel);
hdr->hop_limit = inner_hdr->hop_limit;
} else {
- ip6_flow_hdr(hdr, 0, 0);
+ ip6_flow_hdr(hdr, 0, flowlabel);
hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
}
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 6fbdef630152..e15cd37024fd 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -152,6 +152,13 @@ static struct ctl_table ipv6_table_template[] = {
.extra1 = &zero,
.extra2 = &one,
},
+ {
+ .procname = "seg6_flowlabel",
+ .data = &init_net.ipv6.sysctl.seg6_flowlabel,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
{ }
};
@@ -217,6 +224,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len;
ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len;
ipv6_table[14].data = &net->ipv6.sysctl.multipath_hash_policy,
+ ipv6_table[15].data = &net->ipv6.sysctl.seg6_flowlabel;
ipv6_route_table = ipv6_route_sysctl_init(net);
if (!ipv6_route_table)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 4ec76a87aeb8..a34e28ac03a7 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1023,7 +1023,8 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
* Sending
*/
-static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6)
+static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
+ struct inet_cork *cork)
{
struct sock *sk = skb->sk;
struct udphdr *uh;
@@ -1042,12 +1043,31 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6)
uh->len = htons(len);
uh->check = 0;
+ if (cork->gso_size) {
+ const int hlen = skb_network_header_len(skb) +
+ sizeof(struct udphdr);
+
+ if (hlen + cork->gso_size > cork->fragsize)
+ return -EINVAL;
+ if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS)
+ return -EINVAL;
+ if (udp_sk(sk)->no_check6_tx)
+ return -EINVAL;
+ if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite)
+ return -EIO;
+
+ skb_shinfo(skb)->gso_size = cork->gso_size;
+ skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
+ goto csum_partial;
+ }
+
if (is_udplite)
csum = udplite_csum(skb);
else if (udp_sk(sk)->no_check6_tx) { /* UDP csum disabled */
skb->ip_summed = CHECKSUM_NONE;
goto send;
} else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
+csum_partial:
udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr, len);
goto send;
} else
@@ -1093,7 +1113,7 @@ static int udp_v6_push_pending_frames(struct sock *sk)
if (!skb)
goto out;
- err = udp_v6_send_skb(skb, &fl6);
+ err = udp_v6_send_skb(skb, &fl6, &inet_sk(sk)->cork.base);
out:
up->len = 0;
@@ -1127,6 +1147,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipc6.hlimit = -1;
ipc6.tclass = -1;
ipc6.dontfrag = -1;
+ ipc6.gso_size = up->gso_size;
sockc.tsflags = sk->sk_tsflags;
/* destination address check */
@@ -1259,7 +1280,10 @@ do_udp_sendmsg:
opt->tot_len = sizeof(*opt);
ipc6.opt = opt;
- err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6, &sockc);
+ err = udp_cmsg_send(sk, msg, &ipc6.gso_size);
+ if (err > 0)
+ err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6,
+ &ipc6, &sockc);
if (err < 0) {
fl6_sock_release(flowlabel);
return err;
@@ -1324,15 +1348,16 @@ back_from_confirm:
/* Lockless fast path for the non-corking case */
if (!corkreq) {
+ struct inet_cork_full cork;
struct sk_buff *skb;
skb = ip6_make_skb(sk, getfrag, msg, ulen,
sizeof(struct udphdr), &ipc6,
&fl6, (struct rt6_info *)dst,
- msg->msg_flags, &sockc);
+ msg->msg_flags, &cork, &sockc);
err = PTR_ERR(skb);
if (!IS_ERR_OR_NULL(skb))
- err = udp_v6_send_skb(skb, &fl6);
+ err = udp_v6_send_skb(skb, &fl6, &cork.base);
goto out;
}
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 2a04dc9c781b..f7b85b1e6b3e 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -17,6 +17,20 @@
#include <net/ip6_checksum.h>
#include "ip6_offload.h"
+static struct sk_buff *__udp6_gso_segment(struct sk_buff *gso_skb,
+ netdev_features_t features)
+{
+ const struct ipv6hdr *ip6h = ipv6_hdr(gso_skb);
+ unsigned int mss = skb_shinfo(gso_skb)->gso_size;
+
+ if (!can_checksum_protocol(features, htons(ETH_P_IPV6)))
+ return ERR_PTR(-EIO);
+
+ return __udp_gso_segment(gso_skb, features, mss,
+ udp_v6_check(sizeof(struct udphdr) + mss,
+ &ip6h->saddr, &ip6h->daddr, 0));
+}
+
static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
netdev_features_t features)
{
@@ -42,12 +56,15 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
const struct ipv6hdr *ipv6h;
struct udphdr *uh;
- if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP))
+ if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_UDP | SKB_GSO_UDP_L4)))
goto out;
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto out;
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
+ return __udp6_gso_segment(skb, features);
+
/* Do software UFO. Complete and fill in the UDP checksum as HW cannot
* do checksum of UDP packets sent as multiple IP fragments.
*/
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 416fe67271a9..2cff209d0fc1 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -107,8 +107,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
* it was magically lost, so this code needs audit */
xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST |
RTF_LOCAL);
- xdst->u.rt6.rt6i_metric = rt->rt6i_metric;
- xdst->u.rt6.rt6i_node = rt->rt6i_node;
xdst->route_cookie = rt6_get_cookie(rt);
xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway;
xdst->u.rt6.rt6i_dst = rt->rt6i_dst;
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 7f1e842ef05a..e87686f7d63c 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -57,6 +57,10 @@ static void l2tp_dfs_next_tunnel(struct l2tp_dfs_seq_data *pd)
static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd)
{
+ /* Drop reference taken during previous invocation */
+ if (pd->session)
+ l2tp_session_dec_refcount(pd->session);
+
pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx);
pd->session_idx++;
@@ -105,11 +109,16 @@ static void l2tp_dfs_seq_stop(struct seq_file *p, void *v)
if (!pd || pd == SEQ_START_TOKEN)
return;
- /* Drop reference taken by last invocation of l2tp_dfs_next_tunnel() */
+ /* Drop reference taken by last invocation of l2tp_dfs_next_session()
+ * or l2tp_dfs_next_tunnel().
+ */
+ if (pd->session) {
+ l2tp_session_dec_refcount(pd->session);
+ pd->session = NULL;
+ }
if (pd->tunnel) {
l2tp_tunnel_dec_refcount(pd->tunnel);
pd->tunnel = NULL;
- pd->session = NULL;
}
}
@@ -250,13 +259,10 @@ static int l2tp_dfs_seq_show(struct seq_file *m, void *v)
goto out;
}
- /* Show the tunnel or session context */
- if (!pd->session) {
+ if (!pd->session)
l2tp_dfs_seq_tunnel_show(m, pd->tunnel);
- } else {
+ else
l2tp_dfs_seq_session_show(m, pd->session);
- l2tp_session_dec_refcount(pd->session);
- }
out:
return 0;
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 1fd9e145076a..f951c768dcf2 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1576,6 +1576,10 @@ static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd)
static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd)
{
+ /* Drop reference taken during previous invocation */
+ if (pd->session)
+ l2tp_session_dec_refcount(pd->session);
+
pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx);
pd->session_idx++;
@@ -1624,11 +1628,16 @@ static void pppol2tp_seq_stop(struct seq_file *p, void *v)
if (!pd || pd == SEQ_START_TOKEN)
return;
- /* Drop reference taken by last invocation of pppol2tp_next_tunnel() */
+ /* Drop reference taken by last invocation of pppol2tp_next_session()
+ * or pppol2tp_next_tunnel().
+ */
+ if (pd->session) {
+ l2tp_session_dec_refcount(pd->session);
+ pd->session = NULL;
+ }
if (pd->tunnel) {
l2tp_tunnel_dec_refcount(pd->tunnel);
pd->tunnel = NULL;
- pd->session = NULL;
}
}
@@ -1723,14 +1732,10 @@ static int pppol2tp_seq_show(struct seq_file *m, void *v)
goto out;
}
- /* Show the tunnel or session context.
- */
- if (!pd->session) {
+ if (!pd->session)
pppol2tp_seq_tunnel_show(m, pd->tunnel);
- } else {
+ else
pppol2tp_seq_session_show(m, pd->session);
- l2tp_session_dec_refcount(pd->session);
- }
out:
return 0;
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 8da84312cd3b..8055e3965cef 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -68,15 +68,6 @@ enum {
NCSI_MODE_MAX
};
-enum {
- NCSI_FILTER_BASE = 0,
- NCSI_FILTER_VLAN = 0,
- NCSI_FILTER_UC,
- NCSI_FILTER_MC,
- NCSI_FILTER_MIXED,
- NCSI_FILTER_MAX
-};
-
struct ncsi_channel_version {
u32 version; /* Supported BCD encoded NCSI version */
u32 alpha2; /* Supported BCD encoded NCSI version */
@@ -98,11 +89,18 @@ struct ncsi_channel_mode {
u32 data[8]; /* Data entries */
};
-struct ncsi_channel_filter {
- u32 index; /* Index of channel filters */
- u32 total; /* Total entries in the filter table */
- u64 bitmap; /* Bitmap of valid entries */
- u32 data[]; /* Data for the valid entries */
+struct ncsi_channel_mac_filter {
+ u8 n_uc;
+ u8 n_mc;
+ u8 n_mixed;
+ u64 bitmap;
+ unsigned char *addrs;
+};
+
+struct ncsi_channel_vlan_filter {
+ u8 n_vids;
+ u64 bitmap;
+ u16 *vids;
};
struct ncsi_channel_stats {
@@ -186,7 +184,9 @@ struct ncsi_channel {
struct ncsi_channel_version version;
struct ncsi_channel_cap caps[NCSI_CAP_MAX];
struct ncsi_channel_mode modes[NCSI_MODE_MAX];
- struct ncsi_channel_filter *filters[NCSI_FILTER_MAX];
+ /* Filtering Settings */
+ struct ncsi_channel_mac_filter mac_filter;
+ struct ncsi_channel_vlan_filter vlan_filter;
struct ncsi_channel_stats stats;
struct {
struct timer_list timer;
@@ -320,10 +320,6 @@ extern spinlock_t ncsi_dev_lock;
list_for_each_entry_rcu(nc, &np->channels, node)
/* Resources */
-u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index);
-int ncsi_find_filter(struct ncsi_channel *nc, int table, void *data);
-int ncsi_add_filter(struct ncsi_channel *nc, int table, void *data);
-int ncsi_remove_filter(struct ncsi_channel *nc, int table, int index);
void ncsi_start_channel_monitor(struct ncsi_channel *nc);
void ncsi_stop_channel_monitor(struct ncsi_channel *nc);
struct ncsi_channel *ncsi_find_channel(struct ncsi_package *np,
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index c3695ba0cf94..5561e221b71f 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -27,125 +27,6 @@
LIST_HEAD(ncsi_dev_list);
DEFINE_SPINLOCK(ncsi_dev_lock);
-static inline int ncsi_filter_size(int table)
-{
- int sizes[] = { 2, 6, 6, 6 };
-
- BUILD_BUG_ON(ARRAY_SIZE(sizes) != NCSI_FILTER_MAX);
- if (table < NCSI_FILTER_BASE || table >= NCSI_FILTER_MAX)
- return -EINVAL;
-
- return sizes[table];
-}
-
-u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index)
-{
- struct ncsi_channel_filter *ncf;
- int size;
-
- ncf = nc->filters[table];
- if (!ncf)
- return NULL;
-
- size = ncsi_filter_size(table);
- if (size < 0)
- return NULL;
-
- return ncf->data + size * index;
-}
-
-/* Find the first active filter in a filter table that matches the given
- * data parameter. If data is NULL, this returns the first active filter.
- */
-int ncsi_find_filter(struct ncsi_channel *nc, int table, void *data)
-{
- struct ncsi_channel_filter *ncf;
- void *bitmap;
- int index, size;
- unsigned long flags;
-
- ncf = nc->filters[table];
- if (!ncf)
- return -ENXIO;
-
- size = ncsi_filter_size(table);
- if (size < 0)
- return size;
-
- spin_lock_irqsave(&nc->lock, flags);
- bitmap = (void *)&ncf->bitmap;
- index = -1;
- while ((index = find_next_bit(bitmap, ncf->total, index + 1))
- < ncf->total) {
- if (!data || !memcmp(ncf->data + size * index, data, size)) {
- spin_unlock_irqrestore(&nc->lock, flags);
- return index;
- }
- }
- spin_unlock_irqrestore(&nc->lock, flags);
-
- return -ENOENT;
-}
-
-int ncsi_add_filter(struct ncsi_channel *nc, int table, void *data)
-{
- struct ncsi_channel_filter *ncf;
- int index, size;
- void *bitmap;
- unsigned long flags;
-
- size = ncsi_filter_size(table);
- if (size < 0)
- return size;
-
- index = ncsi_find_filter(nc, table, data);
- if (index >= 0)
- return index;
-
- ncf = nc->filters[table];
- if (!ncf)
- return -ENODEV;
-
- spin_lock_irqsave(&nc->lock, flags);
- bitmap = (void *)&ncf->bitmap;
- do {
- index = find_next_zero_bit(bitmap, ncf->total, 0);
- if (index >= ncf->total) {
- spin_unlock_irqrestore(&nc->lock, flags);
- return -ENOSPC;
- }
- } while (test_and_set_bit(index, bitmap));
-
- memcpy(ncf->data + size * index, data, size);
- spin_unlock_irqrestore(&nc->lock, flags);
-
- return index;
-}
-
-int ncsi_remove_filter(struct ncsi_channel *nc, int table, int index)
-{
- struct ncsi_channel_filter *ncf;
- int size;
- void *bitmap;
- unsigned long flags;
-
- size = ncsi_filter_size(table);
- if (size < 0)
- return size;
-
- ncf = nc->filters[table];
- if (!ncf || index >= ncf->total)
- return -ENODEV;
-
- spin_lock_irqsave(&nc->lock, flags);
- bitmap = (void *)&ncf->bitmap;
- if (test_and_clear_bit(index, bitmap))
- memset(ncf->data + size * index, 0, size);
- spin_unlock_irqrestore(&nc->lock, flags);
-
- return 0;
-}
-
static void ncsi_report_link(struct ncsi_dev_priv *ndp, bool force_down)
{
struct ncsi_dev *nd = &ndp->ndev;
@@ -339,20 +220,13 @@ struct ncsi_channel *ncsi_add_channel(struct ncsi_package *np, unsigned char id)
static void ncsi_remove_channel(struct ncsi_channel *nc)
{
struct ncsi_package *np = nc->package;
- struct ncsi_channel_filter *ncf;
unsigned long flags;
- int i;
- /* Release filters */
spin_lock_irqsave(&nc->lock, flags);
- for (i = 0; i < NCSI_FILTER_MAX; i++) {
- ncf = nc->filters[i];
- if (!ncf)
- continue;
- nc->filters[i] = NULL;
- kfree(ncf);
- }
+ /* Release filters */
+ kfree(nc->mac_filter.addrs);
+ kfree(nc->vlan_filter.vids);
nc->state = NCSI_CHANNEL_INACTIVE;
spin_unlock_irqrestore(&nc->lock, flags);
@@ -670,32 +544,26 @@ error:
static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
struct ncsi_cmd_arg *nca)
{
+ struct ncsi_channel_vlan_filter *ncf;
+ unsigned long flags;
+ void *bitmap;
int index;
- u32 *data;
u16 vid;
- index = ncsi_find_filter(nc, NCSI_FILTER_VLAN, NULL);
- if (index < 0) {
- /* Filter table empty */
- return -1;
- }
+ ncf = &nc->vlan_filter;
+ bitmap = &ncf->bitmap;
- data = ncsi_get_filter(nc, NCSI_FILTER_VLAN, index);
- if (!data) {
- netdev_err(ndp->ndev.dev,
- "NCSI: failed to retrieve filter %d\n", index);
- /* Set the VLAN id to 0 - this will still disable the entry in
- * the filter table, but we won't know what it was.
- */
- vid = 0;
- } else {
- vid = *(u16 *)data;
+ spin_lock_irqsave(&nc->lock, flags);
+ index = find_next_bit(bitmap, ncf->n_vids, 0);
+ if (index >= ncf->n_vids) {
+ spin_unlock_irqrestore(&nc->lock, flags);
+ return -1;
}
+ vid = ncf->vids[index];
- netdev_printk(KERN_DEBUG, ndp->ndev.dev,
- "NCSI: removed vlan tag %u at index %d\n",
- vid, index + 1);
- ncsi_remove_filter(nc, NCSI_FILTER_VLAN, index);
+ clear_bit(index, bitmap);
+ ncf->vids[index] = 0;
+ spin_unlock_irqrestore(&nc->lock, flags);
nca->type = NCSI_PKT_CMD_SVF;
nca->words[1] = vid;
@@ -711,45 +579,55 @@ static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
struct ncsi_cmd_arg *nca)
{
+ struct ncsi_channel_vlan_filter *ncf;
struct vlan_vid *vlan = NULL;
- int index = 0;
+ unsigned long flags;
+ int i, index;
+ void *bitmap;
+ u16 vid;
+ if (list_empty(&ndp->vlan_vids))
+ return -1;
+
+ ncf = &nc->vlan_filter;
+ bitmap = &ncf->bitmap;
+
+ spin_lock_irqsave(&nc->lock, flags);
+
+ rcu_read_lock();
list_for_each_entry_rcu(vlan, &ndp->vlan_vids, list) {
- index = ncsi_find_filter(nc, NCSI_FILTER_VLAN, &vlan->vid);
- if (index < 0) {
- /* New tag to add */
- netdev_printk(KERN_DEBUG, ndp->ndev.dev,
- "NCSI: new vlan id to set: %u\n",
- vlan->vid);
+ vid = vlan->vid;
+ for (i = 0; i < ncf->n_vids; i++)
+ if (ncf->vids[i] == vid) {
+ vid = 0;
+ break;
+ }
+ if (vid)
break;
- }
- netdev_printk(KERN_DEBUG, ndp->ndev.dev,
- "vid %u already at filter pos %d\n",
- vlan->vid, index);
}
+ rcu_read_unlock();
- if (!vlan || index >= 0) {
- netdev_printk(KERN_DEBUG, ndp->ndev.dev,
- "no vlan ids left to set\n");
+ if (!vid) {
+ /* No VLAN ID is not set */
+ spin_unlock_irqrestore(&nc->lock, flags);
return -1;
}
- index = ncsi_add_filter(nc, NCSI_FILTER_VLAN, &vlan->vid);
- if (index < 0) {
+ index = find_next_zero_bit(bitmap, ncf->n_vids, 0);
+ if (index < 0 || index >= ncf->n_vids) {
netdev_err(ndp->ndev.dev,
- "Failed to add new VLAN tag, error %d\n", index);
- if (index == -ENOSPC)
- netdev_err(ndp->ndev.dev,
- "Channel %u already has all VLAN filters set\n",
- nc->id);
+ "Channel %u already has all VLAN filters set\n",
+ nc->id);
+ spin_unlock_irqrestore(&nc->lock, flags);
return -1;
}
- netdev_printk(KERN_DEBUG, ndp->ndev.dev,
- "NCSI: set vid %u in packet, index %u\n",
- vlan->vid, index + 1);
+ ncf->vids[index] = vid;
+ set_bit(index, bitmap);
+ spin_unlock_irqrestore(&nc->lock, flags);
+
nca->type = NCSI_PKT_CMD_SVF;
- nca->words[1] = vlan->vid;
+ nca->words[1] = vid;
/* HW filter index starts at 1 */
nca->bytes[6] = index + 1;
nca->bytes[7] = 0x01;
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index 8d7e849d4825..b09ef77bf4cd 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -58,10 +58,9 @@ static int ncsi_write_channel_info(struct sk_buff *skb,
struct ncsi_dev_priv *ndp,
struct ncsi_channel *nc)
{
- struct nlattr *vid_nest;
- struct ncsi_channel_filter *ncf;
+ struct ncsi_channel_vlan_filter *ncf;
struct ncsi_channel_mode *m;
- u32 *data;
+ struct nlattr *vid_nest;
int i;
nla_put_u32(skb, NCSI_CHANNEL_ATTR_ID, nc->id);
@@ -79,18 +78,13 @@ static int ncsi_write_channel_info(struct sk_buff *skb,
vid_nest = nla_nest_start(skb, NCSI_CHANNEL_ATTR_VLAN_LIST);
if (!vid_nest)
return -ENOMEM;
- ncf = nc->filters[NCSI_FILTER_VLAN];
+ ncf = &nc->vlan_filter;
i = -1;
- if (ncf) {
- while ((i = find_next_bit((void *)&ncf->bitmap, ncf->total,
- i + 1)) < ncf->total) {
- data = ncsi_get_filter(nc, NCSI_FILTER_VLAN, i);
- /* Uninitialised channels will have 'zero' vlan ids */
- if (!data || !*data)
- continue;
+ while ((i = find_next_bit((void *)&ncf->bitmap, ncf->n_vids,
+ i + 1)) < ncf->n_vids) {
+ if (ncf->vids[i])
nla_put_u16(skb, NCSI_CHANNEL_ATTR_VLAN_ID,
- *(u16 *)data);
- }
+ ncf->vids[i]);
}
nla_nest_end(skb, vid_nest);
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index efd933ff5570..ce9497966ebe 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -334,9 +334,9 @@ static int ncsi_rsp_handler_svf(struct ncsi_request *nr)
struct ncsi_rsp_pkt *rsp;
struct ncsi_dev_priv *ndp = nr->ndp;
struct ncsi_channel *nc;
- struct ncsi_channel_filter *ncf;
- unsigned short vlan;
- int ret;
+ struct ncsi_channel_vlan_filter *ncf;
+ unsigned long flags;
+ void *bitmap;
/* Find the package and channel */
rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
@@ -346,22 +346,23 @@ static int ncsi_rsp_handler_svf(struct ncsi_request *nr)
return -ENODEV;
cmd = (struct ncsi_cmd_svf_pkt *)skb_network_header(nr->cmd);
- ncf = nc->filters[NCSI_FILTER_VLAN];
- if (!ncf)
- return -ENOENT;
- if (cmd->index >= ncf->total)
+ ncf = &nc->vlan_filter;
+ if (cmd->index > ncf->n_vids)
return -ERANGE;
- /* Add or remove the VLAN filter */
+ /* Add or remove the VLAN filter. Remember HW indexes from 1 */
+ spin_lock_irqsave(&nc->lock, flags);
+ bitmap = &ncf->bitmap;
if (!(cmd->enable & 0x1)) {
- /* HW indexes from 1 */
- ret = ncsi_remove_filter(nc, NCSI_FILTER_VLAN, cmd->index - 1);
+ if (test_and_clear_bit(cmd->index - 1, bitmap))
+ ncf->vids[cmd->index - 1] = 0;
} else {
- vlan = ntohs(cmd->vlan);
- ret = ncsi_add_filter(nc, NCSI_FILTER_VLAN, &vlan);
+ set_bit(cmd->index - 1, bitmap);
+ ncf->vids[cmd->index - 1] = ntohs(cmd->vlan);
}
+ spin_unlock_irqrestore(&nc->lock, flags);
- return ret;
+ return 0;
}
static int ncsi_rsp_handler_ev(struct ncsi_request *nr)
@@ -422,8 +423,12 @@ static int ncsi_rsp_handler_sma(struct ncsi_request *nr)
struct ncsi_rsp_pkt *rsp;
struct ncsi_dev_priv *ndp = nr->ndp;
struct ncsi_channel *nc;
- struct ncsi_channel_filter *ncf;
+ struct ncsi_channel_mac_filter *ncf;
+ unsigned long flags;
void *bitmap;
+ bool enabled;
+ int index;
+
/* Find the package and channel */
rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
@@ -436,31 +441,23 @@ static int ncsi_rsp_handler_sma(struct ncsi_request *nr)
* isn't supported yet.
*/
cmd = (struct ncsi_cmd_sma_pkt *)skb_network_header(nr->cmd);
- switch (cmd->at_e >> 5) {
- case 0x0: /* UC address */
- ncf = nc->filters[NCSI_FILTER_UC];
- break;
- case 0x1: /* MC address */
- ncf = nc->filters[NCSI_FILTER_MC];
- break;
- default:
- return -EINVAL;
- }
+ enabled = cmd->at_e & 0x1;
+ ncf = &nc->mac_filter;
+ bitmap = &ncf->bitmap;
- /* Sanity check on the filter */
- if (!ncf)
- return -ENOENT;
- else if (cmd->index >= ncf->total)
+ if (cmd->index > ncf->n_uc + ncf->n_mc + ncf->n_mixed)
return -ERANGE;
- bitmap = &ncf->bitmap;
- if (cmd->at_e & 0x1) {
- set_bit(cmd->index, bitmap);
- memcpy(ncf->data + 6 * cmd->index, cmd->mac, 6);
+ index = (cmd->index - 1) * ETH_ALEN;
+ spin_lock_irqsave(&nc->lock, flags);
+ if (enabled) {
+ set_bit(cmd->index - 1, bitmap);
+ memcpy(&ncf->addrs[index], cmd->mac, ETH_ALEN);
} else {
- clear_bit(cmd->index, bitmap);
- memset(ncf->data + 6 * cmd->index, 0, 6);
+ clear_bit(cmd->index - 1, bitmap);
+ memset(&ncf->addrs[index], 0, ETH_ALEN);
}
+ spin_unlock_irqrestore(&nc->lock, flags);
return 0;
}
@@ -631,9 +628,7 @@ static int ncsi_rsp_handler_gc(struct ncsi_request *nr)
struct ncsi_rsp_gc_pkt *rsp;
struct ncsi_dev_priv *ndp = nr->ndp;
struct ncsi_channel *nc;
- struct ncsi_channel_filter *ncf;
- size_t size, entry_size;
- int cnt, i;
+ size_t size;
/* Find the channel */
rsp = (struct ncsi_rsp_gc_pkt *)skb_network_header(nr->rsp);
@@ -655,64 +650,40 @@ static int ncsi_rsp_handler_gc(struct ncsi_request *nr)
nc->caps[NCSI_CAP_VLAN].cap = rsp->vlan_mode &
NCSI_CAP_VLAN_MASK;
- /* Build filters */
- for (i = 0; i < NCSI_FILTER_MAX; i++) {
- switch (i) {
- case NCSI_FILTER_VLAN:
- cnt = rsp->vlan_cnt;
- entry_size = 2;
- break;
- case NCSI_FILTER_MIXED:
- cnt = rsp->mixed_cnt;
- entry_size = 6;
- break;
- case NCSI_FILTER_MC:
- cnt = rsp->mc_cnt;
- entry_size = 6;
- break;
- case NCSI_FILTER_UC:
- cnt = rsp->uc_cnt;
- entry_size = 6;
- break;
- default:
- continue;
- }
-
- if (!cnt || nc->filters[i])
- continue;
-
- size = sizeof(*ncf) + cnt * entry_size;
- ncf = kzalloc(size, GFP_ATOMIC);
- if (!ncf) {
- pr_warn("%s: Cannot alloc filter table (%d)\n",
- __func__, i);
- return -ENOMEM;
- }
-
- ncf->index = i;
- ncf->total = cnt;
- if (i == NCSI_FILTER_VLAN) {
- /* Set VLAN filters active so they are cleared in
- * first configuration state
- */
- ncf->bitmap = U64_MAX;
- } else {
- ncf->bitmap = 0x0ul;
- }
- nc->filters[i] = ncf;
- }
+ size = (rsp->uc_cnt + rsp->mc_cnt + rsp->mixed_cnt) * ETH_ALEN;
+ nc->mac_filter.addrs = kzalloc(size, GFP_KERNEL);
+ if (!nc->mac_filter.addrs)
+ return -ENOMEM;
+ nc->mac_filter.n_uc = rsp->uc_cnt;
+ nc->mac_filter.n_mc = rsp->mc_cnt;
+ nc->mac_filter.n_mixed = rsp->mixed_cnt;
+
+ nc->vlan_filter.vids = kcalloc(rsp->vlan_cnt,
+ sizeof(*nc->vlan_filter.vids),
+ GFP_KERNEL);
+ if (!nc->vlan_filter.vids)
+ return -ENOMEM;
+ /* Set VLAN filters active so they are cleared in the first
+ * configuration state
+ */
+ nc->vlan_filter.bitmap = U64_MAX;
+ nc->vlan_filter.n_vids = rsp->vlan_cnt;
return 0;
}
static int ncsi_rsp_handler_gp(struct ncsi_request *nr)
{
- struct ncsi_rsp_gp_pkt *rsp;
+ struct ncsi_channel_vlan_filter *ncvf;
+ struct ncsi_channel_mac_filter *ncmf;
struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_rsp_gp_pkt *rsp;
struct ncsi_channel *nc;
- unsigned short enable, vlan;
+ unsigned short enable;
unsigned char *pdata;
- int table, i;
+ unsigned long flags;
+ void *bitmap;
+ int i;
/* Find the channel */
rsp = (struct ncsi_rsp_gp_pkt *)skb_network_header(nr->rsp);
@@ -746,36 +717,33 @@ static int ncsi_rsp_handler_gp(struct ncsi_request *nr)
/* MAC addresses filter table */
pdata = (unsigned char *)rsp + 48;
enable = rsp->mac_enable;
+ ncmf = &nc->mac_filter;
+ spin_lock_irqsave(&nc->lock, flags);
+ bitmap = &ncmf->bitmap;
for (i = 0; i < rsp->mac_cnt; i++, pdata += 6) {
- if (i >= (nc->filters[NCSI_FILTER_UC]->total +
- nc->filters[NCSI_FILTER_MC]->total))
- table = NCSI_FILTER_MIXED;
- else if (i >= nc->filters[NCSI_FILTER_UC]->total)
- table = NCSI_FILTER_MC;
- else
- table = NCSI_FILTER_UC;
-
if (!(enable & (0x1 << i)))
- continue;
-
- if (ncsi_find_filter(nc, table, pdata) >= 0)
- continue;
+ clear_bit(i, bitmap);
+ else
+ set_bit(i, bitmap);
- ncsi_add_filter(nc, table, pdata);
+ memcpy(&ncmf->addrs[i * ETH_ALEN], pdata, ETH_ALEN);
}
+ spin_unlock_irqrestore(&nc->lock, flags);
/* VLAN filter table */
enable = ntohs(rsp->vlan_enable);
+ ncvf = &nc->vlan_filter;
+ bitmap = &ncvf->bitmap;
+ spin_lock_irqsave(&nc->lock, flags);
for (i = 0; i < rsp->vlan_cnt; i++, pdata += 2) {
if (!(enable & (0x1 << i)))
- continue;
-
- vlan = ntohs(*(__be16 *)pdata);
- if (ncsi_find_filter(nc, NCSI_FILTER_VLAN, &vlan) >= 0)
- continue;
+ clear_bit(i, bitmap);
+ else
+ set_bit(i, bitmap);
- ncsi_add_filter(nc, NCSI_FILTER_VLAN, &vlan);
+ ncvf->vids[i] = ntohs(*(__be16 *)pdata);
}
+ spin_unlock_irqrestore(&nc->lock, flags);
return 0;
}
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 4527921b1c3a..ba0a0fd045c8 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -266,12 +266,13 @@ static inline bool decrement_ttl(struct netns_ipvs *ipvs,
/* check and decrement ttl */
if (ipv6_hdr(skb)->hop_limit <= 1) {
+ struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
+
/* Force OUTPUT device used as source address */
skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_TIME_EXCEED,
ICMPV6_EXC_HOPLIMIT, 0);
- __IP6_INC_STATS(net, ip6_dst_idev(dst),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
return false;
}
diff --git a/net/qrtr/Kconfig b/net/qrtr/Kconfig
index 326fd97444f5..1944834d225c 100644
--- a/net/qrtr/Kconfig
+++ b/net/qrtr/Kconfig
@@ -21,4 +21,11 @@ config QRTR_SMD
Say Y here to support SMD based ipcrouter channels. SMD is the
most common transport for IPC Router.
+config QRTR_TUN
+ tristate "TUN device for Qualcomm IPC Router"
+ ---help---
+ Say Y here to expose a character device that allows user space to
+ implement endpoints of QRTR, for purpose of tunneling data to other
+ hosts or testing purposes.
+
endif # QRTR
diff --git a/net/qrtr/Makefile b/net/qrtr/Makefile
index ab09e40f7c74..be012bfd3e52 100644
--- a/net/qrtr/Makefile
+++ b/net/qrtr/Makefile
@@ -2,3 +2,5 @@ obj-$(CONFIG_QRTR) := qrtr.o
obj-$(CONFIG_QRTR_SMD) += qrtr-smd.o
qrtr-smd-y := smd.o
+obj-$(CONFIG_QRTR_TUN) += qrtr-tun.o
+qrtr-tun-y := tun.o
diff --git a/net/qrtr/tun.c b/net/qrtr/tun.c
new file mode 100644
index 000000000000..ccff1e544c21
--- /dev/null
+++ b/net/qrtr/tun.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Linaro Ltd */
+
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/poll.h>
+#include <linux/skbuff.h>
+#include <linux/uaccess.h>
+
+#include "qrtr.h"
+
+struct qrtr_tun {
+ struct qrtr_endpoint ep;
+
+ struct sk_buff_head queue;
+ wait_queue_head_t readq;
+};
+
+static int qrtr_tun_send(struct qrtr_endpoint *ep, struct sk_buff *skb)
+{
+ struct qrtr_tun *tun = container_of(ep, struct qrtr_tun, ep);
+
+ skb_queue_tail(&tun->queue, skb);
+
+ /* wake up any blocking processes, waiting for new data */
+ wake_up_interruptible(&tun->readq);
+
+ return 0;
+}
+
+static int qrtr_tun_open(struct inode *inode, struct file *filp)
+{
+ struct qrtr_tun *tun;
+
+ tun = kzalloc(sizeof(*tun), GFP_KERNEL);
+ if (!tun)
+ return -ENOMEM;
+
+ skb_queue_head_init(&tun->queue);
+ init_waitqueue_head(&tun->readq);
+
+ tun->ep.xmit = qrtr_tun_send;
+
+ filp->private_data = tun;
+
+ return qrtr_endpoint_register(&tun->ep, QRTR_EP_NID_AUTO);
+}
+
+static ssize_t qrtr_tun_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+ struct file *filp = iocb->ki_filp;
+ struct qrtr_tun *tun = filp->private_data;
+ struct sk_buff *skb;
+ int count;
+
+ while (!(skb = skb_dequeue(&tun->queue))) {
+ if (filp->f_flags & O_NONBLOCK)
+ return -EAGAIN;
+
+ /* Wait until we get data or the endpoint goes away */
+ if (wait_event_interruptible(tun->readq,
+ !skb_queue_empty(&tun->queue)))
+ return -ERESTARTSYS;
+ }
+
+ count = min_t(size_t, iov_iter_count(to), skb->len);
+ if (copy_to_iter(skb->data, count, to) != count)
+ count = -EFAULT;
+
+ kfree_skb(skb);
+
+ return count;
+}
+
+static ssize_t qrtr_tun_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct file *filp = iocb->ki_filp;
+ struct qrtr_tun *tun = filp->private_data;
+ size_t len = iov_iter_count(from);
+ ssize_t ret;
+ void *kbuf;
+
+ kbuf = kzalloc(len, GFP_KERNEL);
+ if (!kbuf)
+ return -ENOMEM;
+
+ if (!copy_from_iter_full(kbuf, len, from))
+ return -EFAULT;
+
+ ret = qrtr_endpoint_post(&tun->ep, kbuf, len);
+
+ return ret < 0 ? ret : len;
+}
+
+static __poll_t qrtr_tun_poll(struct file *filp, poll_table *wait)
+{
+ struct qrtr_tun *tun = filp->private_data;
+ __poll_t mask = 0;
+
+ poll_wait(filp, &tun->readq, wait);
+
+ if (!skb_queue_empty(&tun->queue))
+ mask |= EPOLLIN | EPOLLRDNORM;
+
+ return mask;
+}
+
+static int qrtr_tun_release(struct inode *inode, struct file *filp)
+{
+ struct qrtr_tun *tun = filp->private_data;
+ struct sk_buff *skb;
+
+ qrtr_endpoint_unregister(&tun->ep);
+
+ /* Discard all SKBs */
+ while (!skb_queue_empty(&tun->queue)) {
+ skb = skb_dequeue(&tun->queue);
+ kfree_skb(skb);
+ }
+
+ kfree(tun);
+
+ return 0;
+}
+
+static const struct file_operations qrtr_tun_ops = {
+ .owner = THIS_MODULE,
+ .open = qrtr_tun_open,
+ .poll = qrtr_tun_poll,
+ .read_iter = qrtr_tun_read_iter,
+ .write_iter = qrtr_tun_write_iter,
+ .release = qrtr_tun_release,
+};
+
+static struct miscdevice qrtr_tun_miscdev = {
+ MISC_DYNAMIC_MINOR,
+ "qrtr-tun",
+ &qrtr_tun_ops,
+};
+
+static int __init qrtr_tun_init(void)
+{
+ int ret;
+
+ ret = misc_register(&qrtr_tun_miscdev);
+ if (ret)
+ pr_err("failed to register Qualcomm IPC Router tun device\n");
+
+ return ret;
+}
+
+static void __exit qrtr_tun_exit(void)
+{
+ misc_deregister(&qrtr_tun_miscdev);
+}
+
+module_init(qrtr_tun_init);
+module_exit(qrtr_tun_exit);
+
+MODULE_DESCRIPTION("Qualcomm IPC Router TUN device");
+MODULE_LICENSE("GPL v2");
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 7e28b2ce1437..526a8e491626 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -648,6 +648,11 @@ static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
+static size_t tcf_csum_get_fill_size(const struct tc_action *act)
+{
+ return nla_total_size(sizeof(struct tc_csum));
+}
+
static struct tc_action_ops act_csum_ops = {
.kind = "csum",
.type = TCA_ACT_CSUM,
@@ -658,6 +663,7 @@ static struct tc_action_ops act_csum_ops = {
.cleanup = tcf_csum_cleanup,
.walk = tcf_csum_walker,
.lookup = tcf_csum_search,
+ .get_fill_size = tcf_csum_get_fill_size,
.size = sizeof(struct tcf_csum),
};
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index d964e60c730e..eacaaf803914 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -61,16 +61,18 @@ struct fl_flow_mask_range {
struct fl_flow_mask {
struct fl_flow_key key;
struct fl_flow_mask_range range;
- struct rcu_head rcu;
+ struct rhash_head ht_node;
+ struct rhashtable ht;
+ struct rhashtable_params filter_ht_params;
+ struct flow_dissector dissector;
+ struct list_head filters;
+ struct rcu_head rcu;
+ struct list_head list;
};
struct cls_fl_head {
struct rhashtable ht;
- struct fl_flow_mask mask;
- struct flow_dissector dissector;
- bool mask_assigned;
- struct list_head filters;
- struct rhashtable_params ht_params;
+ struct list_head masks;
union {
struct work_struct work;
struct rcu_head rcu;
@@ -79,6 +81,7 @@ struct cls_fl_head {
};
struct cls_fl_filter {
+ struct fl_flow_mask *mask;
struct rhash_head ht_node;
struct fl_flow_key mkey;
struct tcf_exts exts;
@@ -94,6 +97,13 @@ struct cls_fl_filter {
struct net_device *hw_dev;
};
+static const struct rhashtable_params mask_ht_params = {
+ .key_offset = offsetof(struct fl_flow_mask, key),
+ .key_len = sizeof(struct fl_flow_key),
+ .head_offset = offsetof(struct fl_flow_mask, ht_node),
+ .automatic_shrinking = true,
+};
+
static unsigned short int fl_mask_range(const struct fl_flow_mask *mask)
{
return mask->range.end - mask->range.start;
@@ -103,13 +113,19 @@ static void fl_mask_update_range(struct fl_flow_mask *mask)
{
const u8 *bytes = (const u8 *) &mask->key;
size_t size = sizeof(mask->key);
- size_t i, first = 0, last = size - 1;
+ size_t i, first = 0, last;
- for (i = 0; i < sizeof(mask->key); i++) {
+ for (i = 0; i < size; i++) {
+ if (bytes[i]) {
+ first = i;
+ break;
+ }
+ }
+ last = first;
+ for (i = size - 1; i != first; i--) {
if (bytes[i]) {
- if (!first && i)
- first = i;
last = i;
+ break;
}
}
mask->range.start = rounddown(first, sizeof(long));
@@ -140,12 +156,11 @@ static void fl_clear_masked_range(struct fl_flow_key *key,
memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask));
}
-static struct cls_fl_filter *fl_lookup(struct cls_fl_head *head,
+static struct cls_fl_filter *fl_lookup(struct fl_flow_mask *mask,
struct fl_flow_key *mkey)
{
- return rhashtable_lookup_fast(&head->ht,
- fl_key_get_start(mkey, &head->mask),
- head->ht_params);
+ return rhashtable_lookup_fast(&mask->ht, fl_key_get_start(mkey, mask),
+ mask->filter_ht_params);
}
static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -153,28 +168,28 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
{
struct cls_fl_head *head = rcu_dereference_bh(tp->root);
struct cls_fl_filter *f;
+ struct fl_flow_mask *mask;
struct fl_flow_key skb_key;
struct fl_flow_key skb_mkey;
- if (!atomic_read(&head->ht.nelems))
- return -1;
-
- fl_clear_masked_range(&skb_key, &head->mask);
+ list_for_each_entry_rcu(mask, &head->masks, list) {
+ fl_clear_masked_range(&skb_key, mask);
- skb_key.indev_ifindex = skb->skb_iif;
- /* skb_flow_dissect() does not set n_proto in case an unknown protocol,
- * so do it rather here.
- */
- skb_key.basic.n_proto = skb->protocol;
- skb_flow_dissect_tunnel_info(skb, &head->dissector, &skb_key);
- skb_flow_dissect(skb, &head->dissector, &skb_key, 0);
+ skb_key.indev_ifindex = skb->skb_iif;
+ /* skb_flow_dissect() does not set n_proto in case an unknown
+ * protocol, so do it rather here.
+ */
+ skb_key.basic.n_proto = skb->protocol;
+ skb_flow_dissect_tunnel_info(skb, &mask->dissector, &skb_key);
+ skb_flow_dissect(skb, &mask->dissector, &skb_key, 0);
- fl_set_masked_key(&skb_mkey, &skb_key, &head->mask);
+ fl_set_masked_key(&skb_mkey, &skb_key, mask);
- f = fl_lookup(head, &skb_mkey);
- if (f && !tc_skip_sw(f->flags)) {
- *res = f->res;
- return tcf_exts_exec(skb, &f->exts, res);
+ f = fl_lookup(mask, &skb_mkey);
+ if (f && !tc_skip_sw(f->flags)) {
+ *res = f->res;
+ return tcf_exts_exec(skb, &f->exts, res);
+ }
}
return -1;
}
@@ -187,11 +202,28 @@ static int fl_init(struct tcf_proto *tp)
if (!head)
return -ENOBUFS;
- INIT_LIST_HEAD_RCU(&head->filters);
+ INIT_LIST_HEAD_RCU(&head->masks);
rcu_assign_pointer(tp->root, head);
idr_init(&head->handle_idr);
- return 0;
+ return rhashtable_init(&head->ht, &mask_ht_params);
+}
+
+static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask,
+ bool async)
+{
+ if (!list_empty(&mask->filters))
+ return false;
+
+ rhashtable_remove_fast(&head->ht, &mask->ht_node, mask_ht_params);
+ rhashtable_destroy(&mask->ht);
+ list_del_rcu(&mask->list);
+ if (async)
+ kfree_rcu(mask, rcu);
+ else
+ kfree(mask);
+
+ return true;
}
static void __fl_destroy_filter(struct cls_fl_filter *f)
@@ -234,8 +266,6 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f,
}
static int fl_hw_replace_filter(struct tcf_proto *tp,
- struct flow_dissector *dissector,
- struct fl_flow_key *mask,
struct cls_fl_filter *f,
struct netlink_ext_ack *extack)
{
@@ -247,8 +277,8 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
cls_flower.command = TC_CLSFLOWER_REPLACE;
cls_flower.cookie = (unsigned long) f;
- cls_flower.dissector = dissector;
- cls_flower.mask = mask;
+ cls_flower.dissector = &f->mask->dissector;
+ cls_flower.mask = &f->mask->key;
cls_flower.key = &f->mkey;
cls_flower.exts = &f->exts;
cls_flower.classid = f->res.classid;
@@ -283,28 +313,31 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
&cls_flower, false);
}
-static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
+static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
struct netlink_ext_ack *extack)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
+ bool async = tcf_exts_get_net(&f->exts);
+ bool last;
idr_remove(&head->handle_idr, f->handle);
list_del_rcu(&f->list);
+ last = fl_mask_put(head, f->mask, async);
if (!tc_skip_hw(f->flags))
fl_hw_destroy_filter(tp, f, extack);
tcf_unbind_filter(tp, &f->res);
- if (tcf_exts_get_net(&f->exts))
+ if (async)
call_rcu(&f->rcu, fl_destroy_filter);
else
__fl_destroy_filter(f);
+
+ return last;
}
static void fl_destroy_sleepable(struct work_struct *work)
{
struct cls_fl_head *head = container_of(work, struct cls_fl_head,
work);
- if (head->mask_assigned)
- rhashtable_destroy(&head->ht);
kfree(head);
module_put(THIS_MODULE);
}
@@ -320,10 +353,15 @@ static void fl_destroy_rcu(struct rcu_head *rcu)
static void fl_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct fl_flow_mask *mask, *next_mask;
struct cls_fl_filter *f, *next;
- list_for_each_entry_safe(f, next, &head->filters, list)
- __fl_delete(tp, f, extack);
+ list_for_each_entry_safe(mask, next_mask, &head->masks, list) {
+ list_for_each_entry_safe(f, next, &mask->filters, list) {
+ if (__fl_delete(tp, f, extack))
+ break;
+ }
+ }
idr_destroy(&head->handle_idr);
__module_get(THIS_MODULE);
@@ -715,14 +753,14 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
return ret;
}
-static bool fl_mask_eq(struct fl_flow_mask *mask1,
- struct fl_flow_mask *mask2)
+static void fl_mask_copy(struct fl_flow_mask *dst,
+ struct fl_flow_mask *src)
{
- const long *lmask1 = fl_key_get_start(&mask1->key, mask1);
- const long *lmask2 = fl_key_get_start(&mask2->key, mask2);
+ const void *psrc = fl_key_get_start(&src->key, src);
+ void *pdst = fl_key_get_start(&dst->key, src);
- return !memcmp(&mask1->range, &mask2->range, sizeof(mask1->range)) &&
- !memcmp(lmask1, lmask2, fl_mask_range(mask1));
+ memcpy(pdst, psrc, fl_mask_range(src));
+ dst->range = src->range;
}
static const struct rhashtable_params fl_ht_params = {
@@ -731,14 +769,13 @@ static const struct rhashtable_params fl_ht_params = {
.automatic_shrinking = true,
};
-static int fl_init_hashtable(struct cls_fl_head *head,
- struct fl_flow_mask *mask)
+static int fl_init_mask_hashtable(struct fl_flow_mask *mask)
{
- head->ht_params = fl_ht_params;
- head->ht_params.key_len = fl_mask_range(mask);
- head->ht_params.key_offset += mask->range.start;
+ mask->filter_ht_params = fl_ht_params;
+ mask->filter_ht_params.key_len = fl_mask_range(mask);
+ mask->filter_ht_params.key_offset += mask->range.start;
- return rhashtable_init(&head->ht, &head->ht_params);
+ return rhashtable_init(&mask->ht, &mask->filter_ht_params);
}
#define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member)
@@ -761,8 +798,7 @@ static int fl_init_hashtable(struct cls_fl_head *head,
FL_KEY_SET(keys, cnt, id, member); \
} while(0);
-static void fl_init_dissector(struct cls_fl_head *head,
- struct fl_flow_mask *mask)
+static void fl_init_dissector(struct fl_flow_mask *mask)
{
struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX];
size_t cnt = 0;
@@ -802,31 +838,66 @@ static void fl_init_dissector(struct cls_fl_head *head,
FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp);
- skb_flow_dissector_init(&head->dissector, keys, cnt);
+ skb_flow_dissector_init(&mask->dissector, keys, cnt);
+}
+
+static struct fl_flow_mask *fl_create_new_mask(struct cls_fl_head *head,
+ struct fl_flow_mask *mask)
+{
+ struct fl_flow_mask *newmask;
+ int err;
+
+ newmask = kzalloc(sizeof(*newmask), GFP_KERNEL);
+ if (!newmask)
+ return ERR_PTR(-ENOMEM);
+
+ fl_mask_copy(newmask, mask);
+
+ err = fl_init_mask_hashtable(newmask);
+ if (err)
+ goto errout_free;
+
+ fl_init_dissector(newmask);
+
+ INIT_LIST_HEAD_RCU(&newmask->filters);
+
+ err = rhashtable_insert_fast(&head->ht, &newmask->ht_node,
+ mask_ht_params);
+ if (err)
+ goto errout_destroy;
+
+ list_add_tail_rcu(&newmask->list, &head->masks);
+
+ return newmask;
+
+errout_destroy:
+ rhashtable_destroy(&newmask->ht);
+errout_free:
+ kfree(newmask);
+
+ return ERR_PTR(err);
}
static int fl_check_assign_mask(struct cls_fl_head *head,
+ struct cls_fl_filter *fnew,
+ struct cls_fl_filter *fold,
struct fl_flow_mask *mask)
{
- int err;
+ struct fl_flow_mask *newmask;
- if (head->mask_assigned) {
- if (!fl_mask_eq(&head->mask, mask))
+ fnew->mask = rhashtable_lookup_fast(&head->ht, mask, mask_ht_params);
+ if (!fnew->mask) {
+ if (fold)
return -EINVAL;
- else
- return 0;
- }
- /* Mask is not assigned yet. So assign it and init hashtable
- * according to that.
- */
- err = fl_init_hashtable(head, mask);
- if (err)
- return err;
- memcpy(&head->mask, mask, sizeof(head->mask));
- head->mask_assigned = true;
+ newmask = fl_create_new_mask(head, mask);
+ if (IS_ERR(newmask))
+ return PTR_ERR(newmask);
- fl_init_dissector(head, mask);
+ fnew->mask = newmask;
+ } else if (fold && fold->mask == fnew->mask) {
+ return -EINVAL;
+ }
return 0;
}
@@ -924,30 +995,26 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
if (err)
goto errout_idr;
- err = fl_check_assign_mask(head, &mask);
+ err = fl_check_assign_mask(head, fnew, fold, &mask);
if (err)
goto errout_idr;
if (!tc_skip_sw(fnew->flags)) {
- if (!fold && fl_lookup(head, &fnew->mkey)) {
+ if (!fold && fl_lookup(fnew->mask, &fnew->mkey)) {
err = -EEXIST;
- goto errout_idr;
+ goto errout_mask;
}
- err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
- head->ht_params);
+ err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node,
+ fnew->mask->filter_ht_params);
if (err)
- goto errout_idr;
+ goto errout_mask;
}
if (!tc_skip_hw(fnew->flags)) {
- err = fl_hw_replace_filter(tp,
- &head->dissector,
- &mask.key,
- fnew,
- extack);
+ err = fl_hw_replace_filter(tp, fnew, extack);
if (err)
- goto errout_idr;
+ goto errout_mask;
}
if (!tc_in_hw(fnew->flags))
@@ -955,8 +1022,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
if (fold) {
if (!tc_skip_sw(fold->flags))
- rhashtable_remove_fast(&head->ht, &fold->ht_node,
- head->ht_params);
+ rhashtable_remove_fast(&fold->mask->ht,
+ &fold->ht_node,
+ fold->mask->filter_ht_params);
if (!tc_skip_hw(fold->flags))
fl_hw_destroy_filter(tp, fold, NULL);
}
@@ -970,12 +1038,15 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
tcf_exts_get_net(&fold->exts);
call_rcu(&fold->rcu, fl_destroy_filter);
} else {
- list_add_tail_rcu(&fnew->list, &head->filters);
+ list_add_tail_rcu(&fnew->list, &fnew->mask->filters);
}
kfree(tb);
return 0;
+errout_mask:
+ fl_mask_put(head, fnew->mask, false);
+
errout_idr:
if (fnew->handle)
idr_remove(&head->handle_idr, fnew->handle);
@@ -994,10 +1065,10 @@ static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
struct cls_fl_filter *f = arg;
if (!tc_skip_sw(f->flags))
- rhashtable_remove_fast(&head->ht, &f->ht_node,
- head->ht_params);
+ rhashtable_remove_fast(&f->mask->ht, &f->ht_node,
+ f->mask->filter_ht_params);
__fl_delete(tp, f, extack);
- *last = list_empty(&head->filters);
+ *last = list_empty(&head->masks);
return 0;
}
@@ -1005,16 +1076,19 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *f;
-
- list_for_each_entry_rcu(f, &head->filters, list) {
- if (arg->count < arg->skip)
- goto skip;
- if (arg->fn(tp, f, arg) < 0) {
- arg->stop = 1;
- break;
- }
+ struct fl_flow_mask *mask;
+
+ list_for_each_entry_rcu(mask, &head->masks, list) {
+ list_for_each_entry_rcu(f, &mask->filters, list) {
+ if (arg->count < arg->skip)
+ goto skip;
+ if (arg->fn(tp, f, arg) < 0) {
+ arg->stop = 1;
+ break;
+ }
skip:
- arg->count++;
+ arg->count++;
+ }
}
}
@@ -1150,7 +1224,6 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask)
static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
struct sk_buff *skb, struct tcmsg *t)
{
- struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *f = fh;
struct nlattr *nest;
struct fl_flow_key *key, *mask;
@@ -1169,7 +1242,7 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
goto nla_put_failure;
key = &f->key;
- mask = &head->mask.key;
+ mask = &f->mask->key;
if (mask->indev_ifindex) {
struct net_device *dev;
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 837806dd5799..039fdb862b17 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -652,33 +652,20 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
*/
peer->param_flags = asoc->param_flags;
- sctp_transport_route(peer, NULL, sp);
-
/* Initialize the pmtu of the transport. */
- if (peer->param_flags & SPP_PMTUD_DISABLE) {
- if (asoc->pathmtu)
- peer->pathmtu = asoc->pathmtu;
- else
- peer->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
- }
+ sctp_transport_route(peer, NULL, sp);
/* If this is the first transport addr on this association,
* initialize the association PMTU to the peer's PMTU.
* If not and the current association PMTU is higher than the new
* peer's PMTU, reset the association PMTU to the new peer's PMTU.
*/
- if (asoc->pathmtu)
- asoc->pathmtu = min_t(int, peer->pathmtu, asoc->pathmtu);
- else
- asoc->pathmtu = peer->pathmtu;
-
- pr_debug("%s: association:%p PMTU set to %d\n", __func__, asoc,
- asoc->pathmtu);
+ sctp_assoc_set_pmtu(asoc, asoc->pathmtu ?
+ min_t(int, peer->pathmtu, asoc->pathmtu) :
+ peer->pathmtu);
peer->pmtu_pending = 0;
- asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu);
-
/* The asoc->peer.port might not be meaningful yet, but
* initialize the packet structure anyway.
*/
@@ -988,31 +975,6 @@ out:
return match;
}
-/* Is this the association we are looking for? */
-struct sctp_transport *sctp_assoc_is_match(struct sctp_association *asoc,
- struct net *net,
- const union sctp_addr *laddr,
- const union sctp_addr *paddr)
-{
- struct sctp_transport *transport;
-
- if ((htons(asoc->base.bind_addr.port) == laddr->v4.sin_port) &&
- (htons(asoc->peer.port) == paddr->v4.sin_port) &&
- net_eq(sock_net(asoc->base.sk), net)) {
- transport = sctp_assoc_lookup_paddr(asoc, paddr);
- if (!transport)
- goto out;
-
- if (sctp_bind_addr_match(&asoc->base.bind_addr, laddr,
- sctp_sk(asoc->base.sk)))
- goto out;
- }
- transport = NULL;
-
-out:
- return transport;
-}
-
/* Do delayed input processing. This is scheduled by sctp_rcv(). */
static void sctp_assoc_bh_rcv(struct work_struct *work)
{
@@ -1406,6 +1368,31 @@ sctp_assoc_choose_alter_transport(struct sctp_association *asoc,
}
}
+void sctp_assoc_update_frag_point(struct sctp_association *asoc)
+{
+ int frag = sctp_mtu_payload(sctp_sk(asoc->base.sk), asoc->pathmtu,
+ sctp_datachk_len(&asoc->stream));
+
+ if (asoc->user_frag)
+ frag = min_t(int, frag, asoc->user_frag);
+
+ frag = min_t(int, frag, SCTP_MAX_CHUNK_LEN -
+ sctp_datachk_len(&asoc->stream));
+
+ asoc->frag_point = SCTP_TRUNC4(frag);
+}
+
+void sctp_assoc_set_pmtu(struct sctp_association *asoc, __u32 pmtu)
+{
+ if (asoc->pathmtu != pmtu) {
+ asoc->pathmtu = pmtu;
+ sctp_assoc_update_frag_point(asoc);
+ }
+
+ pr_debug("%s: asoc:%p, pmtu:%d, frag_point:%d\n", __func__, asoc,
+ asoc->pathmtu, asoc->frag_point);
+}
+
/* Update the association's pmtu and frag_point by going through all the
* transports. This routine is called when a transport's PMTU has changed.
*/
@@ -1418,24 +1405,16 @@ void sctp_assoc_sync_pmtu(struct sctp_association *asoc)
return;
/* Get the lowest pmtu of all the transports. */
- list_for_each_entry(t, &asoc->peer.transport_addr_list,
- transports) {
+ list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) {
if (t->pmtu_pending && t->dst) {
- sctp_transport_update_pmtu(
- t, SCTP_TRUNC4(dst_mtu(t->dst)));
+ sctp_transport_update_pmtu(t, sctp_dst_mtu(t->dst));
t->pmtu_pending = 0;
}
if (!pmtu || (t->pathmtu < pmtu))
pmtu = t->pathmtu;
}
- if (pmtu) {
- asoc->pathmtu = pmtu;
- asoc->frag_point = sctp_frag_point(asoc, pmtu);
- }
-
- pr_debug("%s: asoc:%p, pmtu:%d, frag_point:%d\n", __func__, asoc,
- asoc->pathmtu, asoc->frag_point);
+ sctp_assoc_set_pmtu(asoc, pmtu);
}
/* Should we send a SACK to update our peer? */
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index be296d633e95..79daa98208c3 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -172,8 +172,6 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
struct list_head *pos, *temp;
struct sctp_chunk *chunk;
struct sctp_datamsg *msg;
- struct sctp_sock *sp;
- struct sctp_af *af;
int err;
msg = sctp_datamsg_new(GFP_KERNEL);
@@ -192,12 +190,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
/* This is the biggest possible DATA chunk that can fit into
* the packet
*/
- sp = sctp_sk(asoc->base.sk);
- af = sp->pf->af;
- max_data = asoc->pathmtu - af->net_header_len -
- sizeof(struct sctphdr) - sctp_datachk_len(&asoc->stream) -
- af->ip_options_len(asoc->base.sk);
- max_data = SCTP_TRUNC4(max_data);
+ max_data = asoc->frag_point;
/* If the the peer requested that we authenticate DATA chunks
* we need to account for bundling of the AUTH chunks along with
@@ -222,9 +215,6 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
}
}
- /* Check what's our max considering the above */
- max_data = min_t(size_t, max_data, asoc->frag_point);
-
/* Set first_len and then account for possible bundles on first frag */
first_len = max_data;
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 690d8557bb7b..e672dee302c7 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -90,8 +90,8 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
{
struct sctp_transport *tp = packet->transport;
struct sctp_association *asoc = tp->asoc;
+ struct sctp_sock *sp = NULL;
struct sock *sk;
- size_t overhead = sizeof(struct ipv6hdr) + sizeof(struct sctphdr);
pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag);
packet->vtag = vtag;
@@ -102,28 +102,20 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
/* set packet max_size with pathmtu, then calculate overhead */
packet->max_size = tp->pathmtu;
+
if (asoc) {
- struct sctp_sock *sp = sctp_sk(asoc->base.sk);
- struct sctp_af *af = sp->pf->af;
-
- overhead = af->net_header_len +
- af->ip_options_len(asoc->base.sk);
- overhead += sizeof(struct sctphdr);
- packet->overhead = overhead;
- packet->size = overhead;
- } else {
- packet->overhead = overhead;
- packet->size = overhead;
- return;
+ sk = asoc->base.sk;
+ sp = sctp_sk(sk);
}
+ packet->overhead = sctp_mtu_payload(sp, 0, 0);
+ packet->size = packet->overhead;
+
+ if (!asoc)
+ return;
/* update dst or transport pathmtu if in need */
- sk = asoc->base.sk;
if (!sctp_transport_dst_check(tp)) {
- sctp_transport_route(tp, NULL, sctp_sk(sk));
- if (asoc->param_flags & SPP_PMTUD_ENABLE)
- sctp_assoc_sync_pmtu(asoc);
- } else if (!sctp_transport_pmtu_check(tp)) {
+ sctp_transport_route(tp, NULL, sp);
if (asoc->param_flags & SPP_PMTUD_ENABLE)
sctp_assoc_sync_pmtu(asoc);
}
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index f211b3db6a35..dee7cbd54831 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -1457,7 +1457,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
* the outstanding bytes for this chunk, so only
* count bytes associated with a transport.
*/
- if (transport) {
+ if (transport && !tchunk->tsn_gap_acked) {
/* If this chunk is being used for RTT
* measurement, calculate the RTT and update
* the RTO using this value.
@@ -1469,14 +1469,34 @@ static void sctp_check_transmitted(struct sctp_outq *q,
* first instance of the packet or a later
* instance).
*/
- if (!tchunk->tsn_gap_acked &&
- !sctp_chunk_retransmitted(tchunk) &&
+ if (!sctp_chunk_retransmitted(tchunk) &&
tchunk->rtt_in_progress) {
tchunk->rtt_in_progress = 0;
rtt = jiffies - tchunk->sent_at;
sctp_transport_update_rto(transport,
rtt);
}
+
+ if (TSN_lte(tsn, sack_ctsn)) {
+ /*
+ * SFR-CACC algorithm:
+ * 2) If the SACK contains gap acks
+ * and the flag CHANGEOVER_ACTIVE is
+ * set the receiver of the SACK MUST
+ * take the following action:
+ *
+ * B) For each TSN t being acked that
+ * has not been acked in any SACK so
+ * far, set cacc_saw_newack to 1 for
+ * the destination that the TSN was
+ * sent to.
+ */
+ if (sack->num_gap_ack_blocks &&
+ q->asoc->peer.primary_path->cacc.
+ changeover_active)
+ transport->cacc.cacc_saw_newack
+ = 1;
+ }
}
/* If the chunk hasn't been marked as ACKED,
@@ -1508,28 +1528,6 @@ static void sctp_check_transmitted(struct sctp_outq *q,
restart_timer = 1;
forward_progress = true;
- if (!tchunk->tsn_gap_acked) {
- /*
- * SFR-CACC algorithm:
- * 2) If the SACK contains gap acks
- * and the flag CHANGEOVER_ACTIVE is
- * set the receiver of the SACK MUST
- * take the following action:
- *
- * B) For each TSN t being acked that
- * has not been acked in any SACK so
- * far, set cacc_saw_newack to 1 for
- * the destination that the TSN was
- * sent to.
- */
- if (transport &&
- sack->num_gap_ack_blocks &&
- q->asoc->peer.primary_path->cacc.
- changeover_active)
- transport->cacc.cacc_saw_newack
- = 1;
- }
-
list_add_tail(&tchunk->transmitted_list,
&q->sacked);
} else {
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 5a4fb1dc8400..4d7b3ccea078 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -81,8 +81,6 @@ static int sctp_process_param(struct sctp_association *asoc,
gfp_t gfp);
static void *sctp_addto_param(struct sctp_chunk *chunk, int len,
const void *data);
-static void *sctp_addto_chunk_fixed(struct sctp_chunk *, int len,
- const void *data);
/* Control chunk destructor */
static void sctp_control_release_owner(struct sk_buff *skb)
@@ -154,12 +152,11 @@ static const struct sctp_paramhdr prsctp_param = {
cpu_to_be16(sizeof(struct sctp_paramhdr)),
};
-/* A helper to initialize an op error inside a
- * provided chunk, as most cause codes will be embedded inside an
- * abort chunk.
+/* A helper to initialize an op error inside a provided chunk, as most
+ * cause codes will be embedded inside an abort chunk.
*/
-void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code,
- size_t paylen)
+int sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code,
+ size_t paylen)
{
struct sctp_errhdr err;
__u16 len;
@@ -167,33 +164,16 @@ void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code,
/* Cause code constants are now defined in network order. */
err.cause = cause_code;
len = sizeof(err) + paylen;
- err.length = htons(len);
- chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(err), &err);
-}
-
-/* A helper to initialize an op error inside a
- * provided chunk, as most cause codes will be embedded inside an
- * abort chunk. Differs from sctp_init_cause in that it won't oops
- * if there isn't enough space in the op error chunk
- */
-static int sctp_init_cause_fixed(struct sctp_chunk *chunk, __be16 cause_code,
- size_t paylen)
-{
- struct sctp_errhdr err;
- __u16 len;
-
- /* Cause code constants are now defined in network order. */
- err.cause = cause_code;
- len = sizeof(err) + paylen;
- err.length = htons(len);
+ err.length = htons(len);
if (skb_tailroom(chunk->skb) < len)
return -ENOSPC;
- chunk->subh.err_hdr = sctp_addto_chunk_fixed(chunk, sizeof(err), &err);
+ chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(err), &err);
return 0;
}
+
/* 3.3.2 Initiation (INIT) (1)
*
* This chunk is used to initiate a SCTP association between two
@@ -779,10 +759,9 @@ struct sctp_chunk *sctp_make_datafrag_empty(const struct sctp_association *asoc,
* association. This reports on which TSN's we've seen to date,
* including duplicates and gaps.
*/
-struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc)
+struct sctp_chunk *sctp_make_sack(struct sctp_association *asoc)
{
struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map;
- struct sctp_association *aptr = (struct sctp_association *)asoc;
struct sctp_gap_ack_block gabs[SCTP_MAX_GABS];
__u16 num_gabs, num_dup_tsns;
struct sctp_transport *trans;
@@ -857,7 +836,7 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc)
/* Add the duplicate TSN information. */
if (num_dup_tsns) {
- aptr->stats.idupchunks += num_dup_tsns;
+ asoc->stats.idupchunks += num_dup_tsns;
sctp_addto_chunk(retval, sizeof(__u32) * num_dup_tsns,
sctp_tsnmap_get_dups(map));
}
@@ -869,11 +848,11 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc)
* association so no transport will match after a wrap event like this,
* Until the next sack
*/
- if (++aptr->peer.sack_generation == 0) {
+ if (++asoc->peer.sack_generation == 0) {
list_for_each_entry(trans, &asoc->peer.transport_addr_list,
transports)
trans->sack_generation = 0;
- aptr->peer.sack_generation = 1;
+ asoc->peer.sack_generation = 1;
}
nodata:
return retval;
@@ -1258,20 +1237,26 @@ nodata:
return retval;
}
-/* Create an Operation Error chunk of a fixed size,
- * specifically, max(asoc->pathmtu, SCTP_DEFAULT_MAXSEGMENT)
- * This is a helper function to allocate an error chunk for
- * for those invalid parameter codes in which we may not want
- * to report all the errors, if the incoming chunk is large
+/* Create an Operation Error chunk of a fixed size, specifically,
+ * min(asoc->pathmtu, SCTP_DEFAULT_MAXSEGMENT) - overheads.
+ * This is a helper function to allocate an error chunk for for those
+ * invalid parameter codes in which we may not want to report all the
+ * errors, if the incoming chunk is large. If it can't fit in a single
+ * packet, we ignore it.
*/
-static inline struct sctp_chunk *sctp_make_op_error_fixed(
+static inline struct sctp_chunk *sctp_make_op_error_limited(
const struct sctp_association *asoc,
const struct sctp_chunk *chunk)
{
- size_t size = asoc ? asoc->pathmtu : 0;
+ size_t size = SCTP_DEFAULT_MAXSEGMENT;
+ struct sctp_sock *sp = NULL;
- if (!size)
- size = SCTP_DEFAULT_MAXSEGMENT;
+ if (asoc) {
+ size = min_t(size_t, size, asoc->pathmtu);
+ sp = sctp_sk(asoc->base.sk);
+ }
+
+ size = sctp_mtu_payload(sp, size, sizeof(struct sctp_errhdr));
return sctp_make_op_error_space(asoc, chunk, size);
}
@@ -1523,18 +1508,6 @@ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data)
return target;
}
-/* Append bytes to the end of a chunk. Returns NULL if there isn't sufficient
- * space in the chunk
- */
-static void *sctp_addto_chunk_fixed(struct sctp_chunk *chunk,
- int len, const void *data)
-{
- if (skb_tailroom(chunk->skb) >= len)
- return sctp_addto_chunk(chunk, len, data);
- else
- return NULL;
-}
-
/* Append bytes from user space to the end of a chunk. Will panic if
* chunk is not big enough.
* Returns a kernel err value.
@@ -1829,6 +1802,9 @@ no_hmac:
kt = ktime_get_real();
if (!asoc && ktime_before(bear_cookie->expiration, kt)) {
+ suseconds_t usecs = ktime_to_us(ktime_sub(kt, bear_cookie->expiration));
+ __be32 n = htonl(usecs);
+
/*
* Section 3.3.10.3 Stale Cookie Error (3)
*
@@ -1837,17 +1813,12 @@ no_hmac:
* Stale Cookie Error: Indicates the receipt of a valid State
* Cookie that has expired.
*/
- len = ntohs(chunk->chunk_hdr->length);
- *errp = sctp_make_op_error_space(asoc, chunk, len);
- if (*errp) {
- suseconds_t usecs = ktime_to_us(ktime_sub(kt, bear_cookie->expiration));
- __be32 n = htonl(usecs);
-
- sctp_init_cause(*errp, SCTP_ERROR_STALE_COOKIE,
- sizeof(n));
- sctp_addto_chunk(*errp, sizeof(n), &n);
+ *errp = sctp_make_op_error(asoc, chunk,
+ SCTP_ERROR_STALE_COOKIE, &n,
+ sizeof(n), 0);
+ if (*errp)
*error = -SCTP_IERROR_STALE_COOKIE;
- } else
+ else
*error = -SCTP_IERROR_NOMEM;
goto fail;
@@ -1998,12 +1969,8 @@ static int sctp_process_hn_param(const struct sctp_association *asoc,
if (*errp)
sctp_chunk_free(*errp);
- *errp = sctp_make_op_error_space(asoc, chunk, len);
-
- if (*errp) {
- sctp_init_cause(*errp, SCTP_ERROR_DNS_FAILED, len);
- sctp_addto_chunk(*errp, len, param.v);
- }
+ *errp = sctp_make_op_error(asoc, chunk, SCTP_ERROR_DNS_FAILED,
+ param.v, len, 0);
/* Stop processing this chunk. */
return 0;
@@ -2128,23 +2095,23 @@ static enum sctp_ierror sctp_process_unk_param(
/* Make an ERROR chunk, preparing enough room for
* returning multiple unknown parameters.
*/
- if (NULL == *errp)
- *errp = sctp_make_op_error_fixed(asoc, chunk);
-
- if (*errp) {
- if (!sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM,
- SCTP_PAD4(ntohs(param.p->length))))
- sctp_addto_chunk_fixed(*errp,
- SCTP_PAD4(ntohs(param.p->length)),
- param.v);
- } else {
- /* If there is no memory for generating the ERROR
- * report as specified, an ABORT will be triggered
- * to the peer and the association won't be
- * established.
- */
- retval = SCTP_IERROR_NOMEM;
+ if (!*errp) {
+ *errp = sctp_make_op_error_limited(asoc, chunk);
+ if (!*errp) {
+ /* If there is no memory for generating the
+ * ERROR report as specified, an ABORT will be
+ * triggered to the peer and the association
+ * won't be established.
+ */
+ retval = SCTP_IERROR_NOMEM;
+ break;
+ }
}
+
+ if (!sctp_init_cause(*errp, SCTP_ERROR_UNKNOWN_PARAM,
+ ntohs(param.p->length)))
+ sctp_addto_chunk(*errp, ntohs(param.p->length),
+ param.v);
break;
default:
break;
@@ -2220,10 +2187,10 @@ static enum sctp_ierror sctp_verify_param(struct net *net,
* MUST be aborted. The ABORT chunk SHOULD contain the error
* cause 'Protocol Violation'.
*/
- if (SCTP_AUTH_RANDOM_LENGTH !=
- ntohs(param.p->length) - sizeof(struct sctp_paramhdr)) {
+ if (SCTP_AUTH_RANDOM_LENGTH != ntohs(param.p->length) -
+ sizeof(struct sctp_paramhdr)) {
sctp_process_inv_paramlength(asoc, param.p,
- chunk, err_chunk);
+ chunk, err_chunk);
retval = SCTP_IERROR_ABORT;
}
break;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 80835ac26d2c..1b4593b842b0 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -644,16 +644,15 @@ static int sctp_send_asconf_add_ip(struct sock *sk,
list_for_each_entry(trans,
&asoc->peer.transport_addr_list, transports) {
- /* Clear the source and route cache */
- sctp_transport_dst_release(trans);
trans->cwnd = min(4*asoc->pathmtu, max_t(__u32,
2*asoc->pathmtu, 4380));
trans->ssthresh = asoc->peer.i.a_rwnd;
trans->rto = asoc->rto_initial;
sctp_max_rto(asoc, trans);
trans->rtt = trans->srtt = trans->rttvar = 0;
+ /* Clear the source and route cache */
sctp_transport_route(trans, NULL,
- sctp_sk(asoc->base.sk));
+ sctp_sk(asoc->base.sk));
}
}
retval = sctp_send_asconf(asoc, chunk);
@@ -896,7 +895,6 @@ skip_mkasconf:
*/
list_for_each_entry(transport, &asoc->peer.transport_addr_list,
transports) {
- sctp_transport_dst_release(transport);
sctp_transport_route(transport, NULL,
sctp_sk(asoc->base.sk));
}
@@ -1895,6 +1893,7 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
struct sctp_sndrcvinfo *sinfo)
{
struct sock *sk = asoc->base.sk;
+ struct sctp_sock *sp = sctp_sk(sk);
struct net *net = sock_net(sk);
struct sctp_datamsg *datamsg;
bool wait_connect = false;
@@ -1913,13 +1912,16 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
goto err;
}
- if (sctp_sk(sk)->disable_fragments && msg_len > asoc->frag_point) {
+ if (sp->disable_fragments && msg_len > asoc->frag_point) {
err = -EMSGSIZE;
goto err;
}
- if (asoc->pmtu_pending)
- sctp_assoc_pending_pmtu(asoc);
+ if (asoc->pmtu_pending) {
+ if (sp->param_flags & SPP_PMTUD_ENABLE)
+ sctp_assoc_sync_pmtu(asoc);
+ asoc->pmtu_pending = 0;
+ }
if (sctp_wspace(asoc) < msg_len)
sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
@@ -1936,7 +1938,7 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
if (err)
goto err;
- if (sctp_sk(sk)->strm_interleave) {
+ if (sp->strm_interleave) {
timeo = sock_sndtimeo(sk, 0);
err = sctp_wait_for_connect(asoc, &timeo);
if (err)
@@ -2539,7 +2541,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
trans->pathmtu = params->spp_pathmtu;
sctp_assoc_sync_pmtu(asoc);
} else if (asoc) {
- asoc->pathmtu = params->spp_pathmtu;
+ sctp_assoc_set_pmtu(asoc, params->spp_pathmtu);
} else {
sp->pathmtu = params->spp_pathmtu;
}
@@ -3209,7 +3211,6 @@ static int sctp_setsockopt_mappedv4(struct sock *sk, char __user *optval, unsign
static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned int optlen)
{
struct sctp_sock *sp = sctp_sk(sk);
- struct sctp_af *af = sp->pf->af;
struct sctp_assoc_value params;
struct sctp_association *asoc;
int val;
@@ -3231,30 +3232,24 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
return -EINVAL;
}
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+
if (val) {
int min_len, max_len;
+ __u16 datasize = asoc ? sctp_datachk_len(&asoc->stream) :
+ sizeof(struct sctp_data_chunk);
- min_len = SCTP_DEFAULT_MINSEGMENT - af->net_header_len;
- min_len -= af->ip_options_len(sk);
- min_len -= sizeof(struct sctphdr) +
- sizeof(struct sctp_data_chunk);
-
- max_len = SCTP_MAX_CHUNK_LEN - sizeof(struct sctp_data_chunk);
+ min_len = sctp_mtu_payload(sp, SCTP_DEFAULT_MINSEGMENT,
+ datasize);
+ max_len = SCTP_MAX_CHUNK_LEN - datasize;
if (val < min_len || val > max_len)
return -EINVAL;
}
- asoc = sctp_id2assoc(sk, params.assoc_id);
if (asoc) {
- if (val == 0) {
- val = asoc->pathmtu - af->net_header_len;
- val -= af->ip_options_len(sk);
- val -= sizeof(struct sctphdr) +
- sctp_datachk_len(&asoc->stream);
- }
asoc->user_frag = val;
- asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu);
+ sctp_assoc_update_frag_point(asoc);
} else {
if (params.assoc_id && sctp_style(sk, UDP))
return -EINVAL;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 47f82bd794d9..4a95e260b674 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -242,9 +242,18 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
&transport->fl, sk);
}
- if (transport->dst) {
- transport->pathmtu = SCTP_TRUNC4(dst_mtu(transport->dst));
- } else
+ if (transport->param_flags & SPP_PMTUD_DISABLE) {
+ struct sctp_association *asoc = transport->asoc;
+
+ if (!transport->pathmtu && asoc && asoc->pathmtu)
+ transport->pathmtu = asoc->pathmtu;
+ if (transport->pathmtu)
+ return;
+ }
+
+ if (transport->dst)
+ transport->pathmtu = sctp_dst_mtu(transport->dst);
+ else
transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
}
@@ -290,6 +299,7 @@ void sctp_transport_route(struct sctp_transport *transport,
struct sctp_association *asoc = transport->asoc;
struct sctp_af *af = transport->af_specific;
+ sctp_transport_dst_release(transport);
af->get_dst(transport, saddr, &transport->fl, sctp_opt2sk(opt));
if (saddr)
@@ -297,21 +307,14 @@ void sctp_transport_route(struct sctp_transport *transport,
else
af->get_saddr(opt, transport, &transport->fl);
- if ((transport->param_flags & SPP_PMTUD_DISABLE) && transport->pathmtu) {
- return;
- }
- if (transport->dst) {
- transport->pathmtu = SCTP_TRUNC4(dst_mtu(transport->dst));
+ sctp_transport_pmtu(transport, sctp_opt2sk(opt));
- /* Initialize sk->sk_rcv_saddr, if the transport is the
- * association's active path for getsockname().
- */
- if (asoc && (!asoc->peer.primary_path ||
- (transport == asoc->peer.active_path)))
- opt->pf->to_sk_saddr(&transport->saddr,
- asoc->base.sk);
- } else
- transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
+ /* Initialize sk->sk_rcv_saddr, if the transport is the
+ * association's active path for getsockname().
+ */
+ if (transport->dst && asoc &&
+ (!asoc->peer.primary_path || transport == asoc->peer.active_path))
+ opt->pf->to_sk_saddr(&transport->saddr, asoc->base.sk);
}
/* Hold a reference to a transport. */
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 544bab42f925..95b00e4c3396 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -29,6 +29,7 @@
#include <net/sock.h>
#include <net/tcp.h>
#include <net/smc.h>
+#include <asm/ioctls.h>
#include "smc.h"
#include "smc_clc.h"
@@ -305,6 +306,7 @@ static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc)
static int smc_clnt_conf_first_link(struct smc_sock *smc)
{
+ struct net *net = sock_net(smc->clcsock->sk);
struct smc_link_group *lgr = smc->conn.lgr;
struct smc_link *link;
int rest;
@@ -362,7 +364,7 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
if (rc < 0)
return SMC_CLC_DECL_TCL;
- link->state = SMC_LNK_ACTIVE;
+ smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
return 0;
}
@@ -400,6 +402,9 @@ static int smc_connect_rdma(struct smc_sock *smc)
sock_hold(&smc->sk); /* sock put in passive closing */
+ if (smc->use_fallback)
+ goto out_connected;
+
if (!tcp_sk(smc->clcsock->sk)->syn_smc) {
/* peer has not signalled SMC-capability */
smc->use_fallback = true;
@@ -716,6 +721,7 @@ void smc_close_non_accepted(struct sock *sk)
static int smc_serv_conf_first_link(struct smc_sock *smc)
{
+ struct net *net = sock_net(smc->clcsock->sk);
struct smc_link_group *lgr = smc->conn.lgr;
struct smc_link *link;
int rest;
@@ -768,7 +774,7 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
return rc;
}
- link->state = SMC_LNK_ACTIVE;
+ smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
return 0;
}
@@ -792,6 +798,9 @@ static void smc_listen_work(struct work_struct *work)
int rc = 0;
u8 ibport;
+ if (new_smc->use_fallback)
+ goto out_connected;
+
/* check if peer is smc capable */
if (!tcp_sk(newclcsock->sk)->syn_smc) {
new_smc->use_fallback = true;
@@ -965,7 +974,7 @@ static void smc_tcp_listen_work(struct work_struct *work)
continue;
new_smc->listen_smc = lsmc;
- new_smc->use_fallback = false; /* assume rdma capability first*/
+ new_smc->use_fallback = lsmc->use_fallback;
sock_hold(lsk); /* sock_put in smc_listen_work */
INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
smc_copy_sock_settings_to_smc(new_smc);
@@ -1001,7 +1010,8 @@ static int smc_listen(struct socket *sock, int backlog)
* them to the clc socket -- copy smc socket options to clc socket
*/
smc_copy_sock_settings_to_clc(smc);
- tcp_sk(smc->clcsock->sk)->syn_smc = 1;
+ if (!smc->use_fallback)
+ tcp_sk(smc->clcsock->sk)->syn_smc = 1;
rc = kernel_listen(smc->clcsock, backlog);
if (rc)
@@ -1034,6 +1044,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock,
if (lsmc->sk.sk_state != SMC_LISTEN) {
rc = -EINVAL;
+ release_sock(sk);
goto out;
}
@@ -1061,9 +1072,29 @@ static int smc_accept(struct socket *sock, struct socket *new_sock,
if (!rc)
rc = sock_error(nsk);
+ release_sock(sk);
+ if (rc)
+ goto out;
+
+ if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) {
+ /* wait till data arrives on the socket */
+ timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept *
+ MSEC_PER_SEC);
+ if (smc_sk(nsk)->use_fallback) {
+ struct sock *clcsk = smc_sk(nsk)->clcsock->sk;
+
+ lock_sock(clcsk);
+ if (skb_queue_empty(&clcsk->sk_receive_queue))
+ sk_wait_data(clcsk, &timeo, NULL);
+ release_sock(clcsk);
+ } else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) {
+ lock_sock(nsk);
+ smc_rx_wait_data(smc_sk(nsk), &timeo);
+ release_sock(nsk);
+ }
+ }
out:
- release_sock(sk);
sock_put(sk); /* sock_hold above */
return rc;
}
@@ -1094,6 +1125,16 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
(sk->sk_state != SMC_APPCLOSEWAIT1) &&
(sk->sk_state != SMC_INIT))
goto out;
+
+ if (msg->msg_flags & MSG_FASTOPEN) {
+ if (sk->sk_state == SMC_INIT) {
+ smc->use_fallback = true;
+ } else {
+ rc = -EINVAL;
+ goto out;
+ }
+ }
+
if (smc->use_fallback)
rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len);
else
@@ -1273,14 +1314,64 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
{
struct sock *sk = sock->sk;
struct smc_sock *smc;
+ int val, rc;
smc = smc_sk(sk);
/* generic setsockopts reaching us here always apply to the
* CLC socket
*/
- return smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
- optval, optlen);
+ rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
+ optval, optlen);
+ if (smc->clcsock->sk->sk_err) {
+ sk->sk_err = smc->clcsock->sk->sk_err;
+ sk->sk_error_report(sk);
+ }
+ if (rc)
+ return rc;
+
+ if (optlen < sizeof(int))
+ return rc;
+ get_user(val, (int __user *)optval);
+
+ lock_sock(sk);
+ switch (optname) {
+ case TCP_ULP:
+ case TCP_FASTOPEN:
+ case TCP_FASTOPEN_CONNECT:
+ case TCP_FASTOPEN_KEY:
+ case TCP_FASTOPEN_NO_COOKIE:
+ /* option not supported by SMC */
+ if (sk->sk_state == SMC_INIT) {
+ smc->use_fallback = true;
+ } else {
+ if (!smc->use_fallback)
+ rc = -EINVAL;
+ }
+ break;
+ case TCP_NODELAY:
+ if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) {
+ if (val)
+ mod_delayed_work(system_wq, &smc->conn.tx_work,
+ 0);
+ }
+ break;
+ case TCP_CORK:
+ if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) {
+ if (!val)
+ mod_delayed_work(system_wq, &smc->conn.tx_work,
+ 0);
+ }
+ break;
+ case TCP_DEFER_ACCEPT:
+ smc->sockopt_defer_accept = val;
+ break;
+ default:
+ break;
+ }
+ release_sock(sk);
+
+ return rc;
}
static int smc_getsockopt(struct socket *sock, int level, int optname,
@@ -1298,12 +1389,38 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd,
unsigned long arg)
{
struct smc_sock *smc;
+ int answ;
smc = smc_sk(sock->sk);
- if (smc->use_fallback)
+ if (smc->use_fallback) {
+ if (!smc->clcsock)
+ return -EBADF;
return smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
- else
- return sock_no_ioctl(sock, cmd, arg);
+ }
+ switch (cmd) {
+ case SIOCINQ: /* same as FIONREAD */
+ if (smc->sk.sk_state == SMC_LISTEN)
+ return -EINVAL;
+ answ = atomic_read(&smc->conn.bytes_to_rcv);
+ break;
+ case SIOCOUTQ:
+ /* output queue size (not send + not acked) */
+ if (smc->sk.sk_state == SMC_LISTEN)
+ return -EINVAL;
+ answ = smc->conn.sndbuf_size -
+ atomic_read(&smc->conn.sndbuf_space);
+ break;
+ case SIOCOUTQNSD:
+ /* output queue size (not send only) */
+ if (smc->sk.sk_state == SMC_LISTEN)
+ return -EINVAL;
+ answ = smc_tx_prepared_sends(&smc->conn);
+ break;
+ default:
+ return -ENOIOCTLCMD;
+ }
+
+ return put_user(answ, (int __user *)arg);
}
static ssize_t smc_sendpage(struct socket *sock, struct page *page,
diff --git a/net/smc/smc.h b/net/smc/smc.h
index e4829a2f46ba..2405e889b93d 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -180,6 +180,10 @@ struct smc_sock { /* smc sock container */
struct list_head accept_q; /* sockets to be accepted */
spinlock_t accept_q_lock; /* protects accept_q */
bool use_fallback; /* fallback to tcp */
+ int sockopt_defer_accept;
+ /* sockopt TCP_DEFER_ACCEPT
+ * value
+ */
u8 wait_close_tx_prepared : 1;
/* shutdown wr or close
* started, waiting for unsent
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index b42395d24cba..42ad57365eca 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -82,7 +82,7 @@ static inline void smc_cdc_add_pending_send(struct smc_connection *conn,
sizeof(struct smc_cdc_msg) > SMC_WR_BUF_SIZE,
"must increase SMC_WR_BUF_SIZE to at least sizeof(struct smc_cdc_msg)");
BUILD_BUG_ON_MSG(
- offsetof(struct smc_cdc_msg, reserved) > SMC_WR_TX_SIZE,
+ sizeof(struct smc_cdc_msg) != SMC_WR_TX_SIZE,
"must adapt SMC_WR_TX_SIZE to sizeof(struct smc_cdc_msg); if not all smc_wr upper layer protocols use the same message size any more, must start to set link->wr_tx_sges[i].length on each individual smc_wr_tx_send()");
BUILD_BUG_ON_MSG(
sizeof(struct smc_cdc_tx_pend) > SMC_WR_TX_PEND_PRIV_SIZE,
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index ab240b37ad11..d2012fd22100 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -48,7 +48,7 @@ struct smc_cdc_msg {
struct smc_cdc_producer_flags prod_flags;
struct smc_cdc_conn_state_flags conn_state_flags;
u8 reserved[18];
-} __aligned(8);
+} __packed; /* format defined in RFC7609 */
static inline bool smc_cdc_rxed_any_close(struct smc_connection *conn)
{
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index d4bd01bb44e1..79aa7d312d6b 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -326,6 +326,7 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr)
/* remove a link group */
void smc_lgr_free(struct smc_link_group *lgr)
{
+ smc_llc_link_flush(&lgr->lnk[SMC_SINGLE_LINK]);
smc_lgr_free_bufs(lgr);
smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
kfree(lgr);
@@ -348,6 +349,7 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
struct rb_node *node;
smc_lgr_forget(lgr);
+ smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
write_lock_bh(&lgr->conns_lock);
node = rb_first(&lgr->conns_all);
@@ -374,7 +376,8 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
static int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id)
{
struct dst_entry *dst = sk_dst_get(clcsock->sk);
- int rc = 0;
+ struct net_device *ndev;
+ int i, nest_lvl, rc = 0;
*vlan_id = 0;
if (!dst) {
@@ -386,8 +389,27 @@ static int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id)
goto out_rel;
}
- if (is_vlan_dev(dst->dev))
- *vlan_id = vlan_dev_vlan_id(dst->dev);
+ ndev = dst->dev;
+ if (is_vlan_dev(ndev)) {
+ *vlan_id = vlan_dev_vlan_id(ndev);
+ goto out_rel;
+ }
+
+ rtnl_lock();
+ nest_lvl = dev_get_nest_level(ndev);
+ for (i = 0; i < nest_lvl; i++) {
+ struct list_head *lower = &ndev->adj_list.lower;
+
+ if (list_empty(lower))
+ break;
+ lower = lower->next;
+ ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower);
+ if (is_vlan_dev(ndev)) {
+ *vlan_id = vlan_dev_vlan_id(ndev);
+ break;
+ }
+ }
+ rtnl_unlock();
out_rel:
dst_release(dst);
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 5dfcb15d529f..70ef4f47dc8d 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -79,6 +79,7 @@ struct smc_link {
dma_addr_t wr_rx_dma_addr; /* DMA address of wr_rx_bufs */
u64 wr_rx_id; /* seq # of last recv WR */
u32 wr_rx_cnt; /* number of WR recv buffers */
+ unsigned long wr_rx_tstamp; /* jiffies when last buf rx */
struct ib_reg_wr wr_reg; /* WR register memory region */
wait_queue_head_t wr_reg_wait; /* wait for wr_reg result */
@@ -101,6 +102,9 @@ struct smc_link {
int llc_confirm_resp_rc; /* rc from conf_resp msg */
struct completion llc_add; /* wait for rx of add link */
struct completion llc_add_resp; /* wait for rx of add link rsp*/
+ struct delayed_work llc_testlink_wrk; /* testlink worker */
+ struct completion llc_testlink_resp; /* wait for rx of testlink */
+ int llc_testlink_time; /* testlink interval */
};
/* For now we just allow one parallel link per link group. The SMC protocol
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index 427b91c1c964..05dd7e6d314d 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -38,17 +38,27 @@ static void smc_diag_msg_common_fill(struct smc_diag_msg *r, struct sock *sk)
{
struct smc_sock *smc = smc_sk(sk);
- r->diag_family = sk->sk_family;
if (!smc->clcsock)
return;
r->id.idiag_sport = htons(smc->clcsock->sk->sk_num);
r->id.idiag_dport = smc->clcsock->sk->sk_dport;
r->id.idiag_if = smc->clcsock->sk->sk_bound_dev_if;
sock_diag_save_cookie(sk, r->id.idiag_cookie);
- memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
- memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
- r->id.idiag_src[0] = smc->clcsock->sk->sk_rcv_saddr;
- r->id.idiag_dst[0] = smc->clcsock->sk->sk_daddr;
+ if (sk->sk_protocol == SMCPROTO_SMC) {
+ r->diag_family = PF_INET;
+ memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
+ memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
+ r->id.idiag_src[0] = smc->clcsock->sk->sk_rcv_saddr;
+ r->id.idiag_dst[0] = smc->clcsock->sk->sk_daddr;
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (sk->sk_protocol == SMCPROTO_SMC6) {
+ r->diag_family = PF_INET6;
+ memcpy(&r->id.idiag_src, &smc->clcsock->sk->sk_v6_rcv_saddr,
+ sizeof(smc->clcsock->sk->sk_v6_rcv_saddr));
+ memcpy(&r->id.idiag_dst, &smc->clcsock->sk->sk_v6_daddr,
+ sizeof(smc->clcsock->sk->sk_v6_daddr));
+#endif
+ }
}
static int smc_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
@@ -153,7 +163,8 @@ errout:
return -EMSGSIZE;
}
-static int smc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+static int smc_diag_dump_proto(struct proto *prot, struct sk_buff *skb,
+ struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
struct nlattr *bc = NULL;
@@ -161,8 +172,8 @@ static int smc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
struct sock *sk;
int rc = 0;
- read_lock(&smc_proto.h.smc_hash->lock);
- head = &smc_proto.h.smc_hash->ht;
+ read_lock(&prot->h.smc_hash->lock);
+ head = &prot->h.smc_hash->ht;
if (hlist_empty(head))
goto out;
@@ -175,7 +186,17 @@ static int smc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
}
out:
- read_unlock(&smc_proto.h.smc_hash->lock);
+ read_unlock(&prot->h.smc_hash->lock);
+ return rc;
+}
+
+static int smc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ int rc = 0;
+
+ rc = smc_diag_dump_proto(&smc_proto, skb, cb);
+ if (!rc)
+ rc = smc_diag_dump_proto(&smc_proto6, skb, cb);
return rc;
}
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index ea4b21981b4b..33b4d856f4c6 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -397,7 +397,8 @@ static void smc_llc_rx_test_link(struct smc_link *link,
struct smc_llc_msg_test_link *llc)
{
if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
- /* unused as long as we don't send this type of msg */
+ if (link->state == SMC_LNK_ACTIVE)
+ complete(&link->llc_testlink_resp);
} else {
smc_llc_send_test_link(link, llc->user_data, SMC_LLC_RESP);
}
@@ -502,6 +503,65 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
}
}
+/***************************** worker ****************************************/
+
+static void smc_llc_testlink_work(struct work_struct *work)
+{
+ struct smc_link *link = container_of(to_delayed_work(work),
+ struct smc_link, llc_testlink_wrk);
+ unsigned long next_interval;
+ struct smc_link_group *lgr;
+ unsigned long expire_time;
+ u8 user_data[16] = { 0 };
+ int rc;
+
+ lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+ if (link->state != SMC_LNK_ACTIVE)
+ return; /* don't reschedule worker */
+ expire_time = link->wr_rx_tstamp + link->llc_testlink_time;
+ if (time_is_after_jiffies(expire_time)) {
+ next_interval = expire_time - jiffies;
+ goto out;
+ }
+ reinit_completion(&link->llc_testlink_resp);
+ smc_llc_send_test_link(link, user_data, SMC_LLC_REQ);
+ /* receive TEST LINK response over RoCE fabric */
+ rc = wait_for_completion_interruptible_timeout(&link->llc_testlink_resp,
+ SMC_LLC_WAIT_TIME);
+ if (rc <= 0) {
+ smc_lgr_terminate(lgr);
+ return;
+ }
+ next_interval = link->llc_testlink_time;
+out:
+ schedule_delayed_work(&link->llc_testlink_wrk, next_interval);
+}
+
+void smc_llc_link_active(struct smc_link *link, int testlink_time)
+{
+ init_completion(&link->llc_testlink_resp);
+ INIT_DELAYED_WORK(&link->llc_testlink_wrk, smc_llc_testlink_work);
+ link->state = SMC_LNK_ACTIVE;
+ if (testlink_time) {
+ link->llc_testlink_time = testlink_time * HZ;
+ schedule_delayed_work(&link->llc_testlink_wrk,
+ link->llc_testlink_time);
+ }
+}
+
+/* called in tasklet context */
+void smc_llc_link_inactive(struct smc_link *link)
+{
+ link->state = SMC_LNK_INACTIVE;
+ cancel_delayed_work(&link->llc_testlink_wrk);
+}
+
+/* called in worker context */
+void smc_llc_link_flush(struct smc_link *link)
+{
+ cancel_delayed_work_sync(&link->llc_testlink_wrk);
+}
+
/***************************** init, exit, misc ******************************/
static struct smc_wr_rx_handler smc_llc_rx_handlers[] = {
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index e4a7d5e234d5..d6e42116485e 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -44,6 +44,9 @@ int smc_llc_send_delete_link(struct smc_link *link,
enum smc_llc_reqresp reqresp);
int smc_llc_send_test_link(struct smc_link *lnk, u8 user_data[16],
enum smc_llc_reqresp reqresp);
+void smc_llc_link_active(struct smc_link *link, int testlink_time);
+void smc_llc_link_inactive(struct smc_link *link);
+void smc_llc_link_flush(struct smc_link *link);
int smc_llc_init(void) __init;
#endif /* SMC_LLC_H */
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index eff4e0d0bb31..af851d8df1f8 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -51,7 +51,7 @@ static void smc_rx_data_ready(struct sock *sk)
* 1 if at least 1 byte available in rcvbuf or if socket error/shutdown.
* 0 otherwise (nothing in rcvbuf nor timeout, e.g. interrupted).
*/
-static int smc_rx_wait_data(struct smc_sock *smc, long *timeo)
+int smc_rx_wait_data(struct smc_sock *smc, long *timeo)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct smc_connection *conn = &smc->conn;
diff --git a/net/smc/smc_rx.h b/net/smc/smc_rx.h
index 3a32b59bf06c..0b75a6b470e6 100644
--- a/net/smc/smc_rx.h
+++ b/net/smc/smc_rx.h
@@ -20,5 +20,6 @@
void smc_rx_init(struct smc_sock *smc);
int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len,
int flags);
+int smc_rx_wait_data(struct smc_sock *smc, long *timeo);
#endif /* SMC_RX_H */
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 72f004c9c9b1..58dfe0bd9d60 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -19,6 +19,7 @@
#include <linux/sched/signal.h>
#include <net/sock.h>
+#include <net/tcp.h>
#include "smc.h"
#include "smc_wr.h"
@@ -26,6 +27,7 @@
#include "smc_tx.h"
#define SMC_TX_WORK_DELAY HZ
+#define SMC_TX_CORK_DELAY (HZ >> 2) /* 250 ms */
/***************************** sndbuf producer *******************************/
@@ -115,6 +117,13 @@ static int smc_tx_wait_memory(struct smc_sock *smc, int flags)
return rc;
}
+static bool smc_tx_is_corked(struct smc_sock *smc)
+{
+ struct tcp_sock *tp = tcp_sk(smc->clcsock->sk);
+
+ return (tp->nonagle & TCP_NAGLE_CORK) ? true : false;
+}
+
/* sndbuf producer: main API called by socket layer.
* called under sock lock.
*/
@@ -209,7 +218,16 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
/* since we just produced more new data into sndbuf,
* trigger sndbuf consumer: RDMA write into peer RMBE and CDC
*/
- smc_tx_sndbuf_nonempty(conn);
+ if ((msg->msg_flags & MSG_MORE || smc_tx_is_corked(smc)) &&
+ (atomic_read(&conn->sndbuf_space) >
+ (conn->sndbuf_size >> 1)))
+ /* for a corked socket defer the RDMA writes if there
+ * is still sufficient sndbuf_space available
+ */
+ schedule_delayed_work(&conn->tx_work,
+ SMC_TX_CORK_DELAY);
+ else
+ smc_tx_sndbuf_nonempty(conn);
} /* while (msg_data_left(msg)) */
return send_done;
@@ -409,8 +427,8 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
}
rc = 0;
if (conn->alert_token_local) /* connection healthy */
- schedule_delayed_work(&conn->tx_work,
- SMC_TX_WORK_DELAY);
+ mod_delayed_work(system_wq, &conn->tx_work,
+ SMC_TX_WORK_DELAY);
}
goto out_unlock;
}
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index 1b8af23e6e2b..cc7c1bb60fe8 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -376,6 +376,7 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num)
for (i = 0; i < num; i++) {
link = wc[i].qp->qp_context;
if (wc[i].status == IB_WC_SUCCESS) {
+ link->wr_rx_tstamp = jiffies;
smc_wr_rx_demultiplex(&wc[i]);
smc_wr_rx_post(link); /* refill WR RX */
} else {
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index f7d47c89d658..2dfb492a7c94 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -697,6 +697,9 @@ static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg,
goto prop_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bearer->window))
goto prop_msg_full;
+ if (bearer->media->type_id == TIPC_MEDIA_TYPE_UDP)
+ if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, bearer->mtu))
+ goto prop_msg_full;
nla_nest_end(msg->skb, prop);
@@ -979,12 +982,23 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
if (props[TIPC_NLA_PROP_TOL]) {
b->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
- tipc_node_apply_tolerance(net, b);
+ tipc_node_apply_property(net, b, TIPC_NLA_PROP_TOL);
}
if (props[TIPC_NLA_PROP_PRIO])
b->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
if (props[TIPC_NLA_PROP_WIN])
b->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
+ if (props[TIPC_NLA_PROP_MTU]) {
+ if (b->media->type_id != TIPC_MEDIA_TYPE_UDP)
+ return -EINVAL;
+#ifdef CONFIG_TIPC_MEDIA_UDP
+ if (tipc_udp_mtu_bad(nla_get_u32
+ (props[TIPC_NLA_PROP_MTU])))
+ return -EINVAL;
+ b->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]);
+ tipc_node_apply_property(net, b, TIPC_NLA_PROP_MTU);
+#endif
+ }
}
return 0;
@@ -1029,6 +1043,9 @@ static int __tipc_nl_add_media(struct tipc_nl_msg *msg,
goto prop_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, media->window))
goto prop_msg_full;
+ if (media->type_id == TIPC_MEDIA_TYPE_UDP)
+ if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, media->mtu))
+ goto prop_msg_full;
nla_nest_end(msg->skb, prop);
nla_nest_end(msg->skb, attrs);
@@ -1158,6 +1175,16 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
m->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
if (props[TIPC_NLA_PROP_WIN])
m->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
+ if (props[TIPC_NLA_PROP_MTU]) {
+ if (m->type_id != TIPC_MEDIA_TYPE_UDP)
+ return -EINVAL;
+#ifdef CONFIG_TIPC_MEDIA_UDP
+ if (tipc_udp_mtu_bad(nla_get_u32
+ (props[TIPC_NLA_PROP_MTU])))
+ return -EINVAL;
+ m->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]);
+#endif
+ }
}
return 0;
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 6efcee63a381..394290cbbb1d 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -94,6 +94,8 @@ struct tipc_bearer;
* @priority: default link (and bearer) priority
* @tolerance: default time (in ms) before declaring link failure
* @window: default window (in packets) before declaring link congestion
+ * @mtu: max packet size bearer can support for media type not dependent on
+ * underlying device MTU
* @type_id: TIPC media identifier
* @hwaddr_len: TIPC media address len
* @name: media name
@@ -118,6 +120,7 @@ struct tipc_media {
u32 priority;
u32 tolerance;
u32 window;
+ u32 mtu;
u32 type_id;
u32 hwaddr_len;
char name[TIPC_MAX_MEDIA_NAME];
diff --git a/net/tipc/node.c b/net/tipc/node.c
index baaf93f12cbd..b0b840084f0a 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -195,6 +195,27 @@ int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel)
return mtu;
}
+bool tipc_node_get_id(struct net *net, u32 addr, u8 *id)
+{
+ u8 *own_id = tipc_own_id(net);
+ struct tipc_node *n;
+
+ if (!own_id)
+ return true;
+
+ if (addr == tipc_own_addr(net)) {
+ memcpy(id, own_id, TIPC_NODEID_LEN);
+ return true;
+ }
+ n = tipc_node_find(net, addr);
+ if (!n)
+ return false;
+
+ memcpy(id, &n->peer_id, TIPC_NODEID_LEN);
+ tipc_node_put(n);
+ return true;
+}
+
u16 tipc_node_get_capabilities(struct net *net, u32 addr)
{
struct tipc_node *n;
@@ -1681,7 +1702,8 @@ discard:
kfree_skb(skb);
}
-void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b)
+void tipc_node_apply_property(struct net *net, struct tipc_bearer *b,
+ int prop)
{
struct tipc_net *tn = tipc_net(net);
int bearer_id = b->identity;
@@ -1696,8 +1718,13 @@ void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b)
list_for_each_entry_rcu(n, &tn->node_list, list) {
tipc_node_write_lock(n);
e = &n->links[bearer_id];
- if (e->link)
- tipc_link_set_tolerance(e->link, b->tolerance, &xmitq);
+ if (e->link) {
+ if (prop == TIPC_NLA_PROP_TOL)
+ tipc_link_set_tolerance(e->link, b->tolerance,
+ &xmitq);
+ else if (prop == TIPC_NLA_PROP_MTU)
+ tipc_link_set_mtu(e->link, b->mtu);
+ }
tipc_node_write_unlock(n);
tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr);
}
diff --git a/net/tipc/node.h b/net/tipc/node.h
index f24b83500df1..846c8f240872 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -60,6 +60,7 @@ enum {
#define INVALID_BEARER_ID -1
void tipc_node_stop(struct net *net);
+bool tipc_node_get_id(struct net *net, u32 addr, u8 *id);
u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr);
void tipc_node_check_dest(struct net *net, u32 onode, u8 *peer_id128,
struct tipc_bearer *bearer,
@@ -67,7 +68,7 @@ void tipc_node_check_dest(struct net *net, u32 onode, u8 *peer_id128,
struct tipc_media_addr *maddr,
bool *respond, bool *dupl_addr);
void tipc_node_delete_links(struct net *net, int bearer_id);
-void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b);
+void tipc_node_apply_property(struct net *net, struct tipc_bearer *b, int prop);
int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node,
char *linkname, size_t len);
int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode,
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 252a52ae0893..c4992002fd68 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2973,7 +2973,8 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
- struct sock *sk = sock->sk;
+ struct net *net = sock_net(sock->sk);
+ struct tipc_sioc_nodeid_req nr = {0};
struct tipc_sioc_ln_req lnr;
void __user *argp = (void __user *)arg;
@@ -2981,7 +2982,7 @@ static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCGETLINKNAME:
if (copy_from_user(&lnr, argp, sizeof(lnr)))
return -EFAULT;
- if (!tipc_node_get_linkname(sock_net(sk),
+ if (!tipc_node_get_linkname(net,
lnr.bearer_id & 0xffff, lnr.peer,
lnr.linkname, TIPC_MAX_LINK_NAME)) {
if (copy_to_user(argp, &lnr, sizeof(lnr)))
@@ -2989,6 +2990,14 @@ static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
return 0;
}
return -EADDRNOTAVAIL;
+ case SIOCGETNODEID:
+ if (copy_from_user(&nr, argp, sizeof(nr)))
+ return -EFAULT;
+ if (!tipc_node_get_id(net, nr.peer, nr.node_id))
+ return -EADDRNOTAVAIL;
+ if (copy_to_user(argp, &nr, sizeof(nr)))
+ return -EFAULT;
+ return 0;
default:
return -ENOIOCTLCMD;
}
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index e7d91f5d5cae..9783101bc4a9 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -713,8 +713,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
err = -EINVAL;
goto err;
}
- b->mtu = dev->mtu - sizeof(struct iphdr)
- - sizeof(struct udphdr);
+ b->mtu = b->media->mtu;
#if IS_ENABLED(CONFIG_IPV6)
} else if (local.proto == htons(ETH_P_IPV6)) {
udp_conf.family = AF_INET6;
@@ -803,6 +802,7 @@ struct tipc_media udp_media_info = {
.priority = TIPC_DEF_LINK_PRI,
.tolerance = TIPC_DEF_LINK_TOL,
.window = TIPC_DEF_LINK_WIN,
+ .mtu = TIPC_DEF_LINK_UDP_MTU,
.type_id = TIPC_MEDIA_TYPE_UDP,
.hwaddr_len = 0,
.name = "udp"
diff --git a/net/tipc/udp_media.h b/net/tipc/udp_media.h
index 281bbae87726..e7455cc73e16 100644
--- a/net/tipc/udp_media.h
+++ b/net/tipc/udp_media.h
@@ -38,9 +38,23 @@
#ifndef _TIPC_UDP_MEDIA_H
#define _TIPC_UDP_MEDIA_H
+#include <linux/ip.h>
+#include <linux/udp.h>
+
int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr);
int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, struct tipc_bearer *b);
int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb);
+/* check if configured MTU is too low for tipc headers */
+static inline bool tipc_udp_mtu_bad(u32 mtu)
+{
+ if (mtu >= (TIPC_MIN_BEARER_MTU + sizeof(struct iphdr) +
+ sizeof(struct udphdr)))
+ return false;
+
+ pr_warn("MTU too low for tipc bearer\n");
+ return true;
+}
+
#endif
#endif
diff --git a/net/tls/Kconfig b/net/tls/Kconfig
index 89b8745a986f..73f05ece53d0 100644
--- a/net/tls/Kconfig
+++ b/net/tls/Kconfig
@@ -14,3 +14,13 @@ config TLS
encryption handling of the TLS protocol to be done in-kernel.
If unsure, say N.
+
+config TLS_DEVICE
+ bool "Transport Layer Security HW offload"
+ depends on TLS
+ select SOCK_VALIDATE_XMIT
+ default n
+ help
+ Enable kernel support for HW offload of the TLS protocol.
+
+ If unsure, say N.
diff --git a/net/tls/Makefile b/net/tls/Makefile
index a930fd1c4f7b..4d6b728a67d0 100644
--- a/net/tls/Makefile
+++ b/net/tls/Makefile
@@ -5,3 +5,5 @@
obj-$(CONFIG_TLS) += tls.o
tls-y := tls_main.o tls_sw.o
+
+tls-$(CONFIG_TLS_DEVICE) += tls_device.o tls_device_fallback.o
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
new file mode 100644
index 000000000000..ac45d6224e41
--- /dev/null
+++ b/net/tls/tls_device.c
@@ -0,0 +1,764 @@
+/* Copyright (c) 2018, Mellanox Technologies All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <crypto/aead.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <net/dst.h>
+#include <net/inet_connection_sock.h>
+#include <net/tcp.h>
+#include <net/tls.h>
+
+/* device_offload_lock is used to synchronize tls_dev_add
+ * against NETDEV_DOWN notifications.
+ */
+static DECLARE_RWSEM(device_offload_lock);
+
+static void tls_device_gc_task(struct work_struct *work);
+
+static DECLARE_WORK(tls_device_gc_work, tls_device_gc_task);
+static LIST_HEAD(tls_device_gc_list);
+static LIST_HEAD(tls_device_list);
+static DEFINE_SPINLOCK(tls_device_lock);
+
+static void tls_device_free_ctx(struct tls_context *ctx)
+{
+ struct tls_offload_context *offload_ctx = tls_offload_ctx(ctx);
+
+ kfree(offload_ctx);
+ kfree(ctx);
+}
+
+static void tls_device_gc_task(struct work_struct *work)
+{
+ struct tls_context *ctx, *tmp;
+ unsigned long flags;
+ LIST_HEAD(gc_list);
+
+ spin_lock_irqsave(&tls_device_lock, flags);
+ list_splice_init(&tls_device_gc_list, &gc_list);
+ spin_unlock_irqrestore(&tls_device_lock, flags);
+
+ list_for_each_entry_safe(ctx, tmp, &gc_list, list) {
+ struct net_device *netdev = ctx->netdev;
+
+ if (netdev) {
+ netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
+ TLS_OFFLOAD_CTX_DIR_TX);
+ dev_put(netdev);
+ }
+
+ list_del(&ctx->list);
+ tls_device_free_ctx(ctx);
+ }
+}
+
+static void tls_device_queue_ctx_destruction(struct tls_context *ctx)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&tls_device_lock, flags);
+ list_move_tail(&ctx->list, &tls_device_gc_list);
+
+ /* schedule_work inside the spinlock
+ * to make sure tls_device_down waits for that work.
+ */
+ schedule_work(&tls_device_gc_work);
+
+ spin_unlock_irqrestore(&tls_device_lock, flags);
+}
+
+/* We assume that the socket is already connected */
+static struct net_device *get_netdev_for_sock(struct sock *sk)
+{
+ struct dst_entry *dst = sk_dst_get(sk);
+ struct net_device *netdev = NULL;
+
+ if (likely(dst)) {
+ netdev = dst->dev;
+ dev_hold(netdev);
+ }
+
+ dst_release(dst);
+
+ return netdev;
+}
+
+static void destroy_record(struct tls_record_info *record)
+{
+ int nr_frags = record->num_frags;
+ skb_frag_t *frag;
+
+ while (nr_frags-- > 0) {
+ frag = &record->frags[nr_frags];
+ __skb_frag_unref(frag);
+ }
+ kfree(record);
+}
+
+static void delete_all_records(struct tls_offload_context *offload_ctx)
+{
+ struct tls_record_info *info, *temp;
+
+ list_for_each_entry_safe(info, temp, &offload_ctx->records_list, list) {
+ list_del(&info->list);
+ destroy_record(info);
+ }
+
+ offload_ctx->retransmit_hint = NULL;
+}
+
+static void tls_icsk_clean_acked(struct sock *sk, u32 acked_seq)
+{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_record_info *info, *temp;
+ struct tls_offload_context *ctx;
+ u64 deleted_records = 0;
+ unsigned long flags;
+
+ if (!tls_ctx)
+ return;
+
+ ctx = tls_offload_ctx(tls_ctx);
+
+ spin_lock_irqsave(&ctx->lock, flags);
+ info = ctx->retransmit_hint;
+ if (info && !before(acked_seq, info->end_seq)) {
+ ctx->retransmit_hint = NULL;
+ list_del(&info->list);
+ destroy_record(info);
+ deleted_records++;
+ }
+
+ list_for_each_entry_safe(info, temp, &ctx->records_list, list) {
+ if (before(acked_seq, info->end_seq))
+ break;
+ list_del(&info->list);
+
+ destroy_record(info);
+ deleted_records++;
+ }
+
+ ctx->unacked_record_sn += deleted_records;
+ spin_unlock_irqrestore(&ctx->lock, flags);
+}
+
+/* At this point, there should be no references on this
+ * socket and no in-flight SKBs associated with this
+ * socket, so it is safe to free all the resources.
+ */
+void tls_device_sk_destruct(struct sock *sk)
+{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_offload_context *ctx = tls_offload_ctx(tls_ctx);
+
+ if (ctx->open_record)
+ destroy_record(ctx->open_record);
+
+ delete_all_records(ctx);
+ crypto_free_aead(ctx->aead_send);
+ ctx->sk_destruct(sk);
+ clean_acked_data_disable(inet_csk(sk));
+
+ if (refcount_dec_and_test(&tls_ctx->refcount))
+ tls_device_queue_ctx_destruction(tls_ctx);
+}
+EXPORT_SYMBOL(tls_device_sk_destruct);
+
+static void tls_append_frag(struct tls_record_info *record,
+ struct page_frag *pfrag,
+ int size)
+{
+ skb_frag_t *frag;
+
+ frag = &record->frags[record->num_frags - 1];
+ if (frag->page.p == pfrag->page &&
+ frag->page_offset + frag->size == pfrag->offset) {
+ frag->size += size;
+ } else {
+ ++frag;
+ frag->page.p = pfrag->page;
+ frag->page_offset = pfrag->offset;
+ frag->size = size;
+ ++record->num_frags;
+ get_page(pfrag->page);
+ }
+
+ pfrag->offset += size;
+ record->len += size;
+}
+
+static int tls_push_record(struct sock *sk,
+ struct tls_context *ctx,
+ struct tls_offload_context *offload_ctx,
+ struct tls_record_info *record,
+ struct page_frag *pfrag,
+ int flags,
+ unsigned char record_type)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct page_frag dummy_tag_frag;
+ skb_frag_t *frag;
+ int i;
+
+ /* fill prepend */
+ frag = &record->frags[0];
+ tls_fill_prepend(ctx,
+ skb_frag_address(frag),
+ record->len - ctx->tx.prepend_size,
+ record_type);
+
+ /* HW doesn't care about the data in the tag, because it fills it. */
+ dummy_tag_frag.page = skb_frag_page(frag);
+ dummy_tag_frag.offset = 0;
+
+ tls_append_frag(record, &dummy_tag_frag, ctx->tx.tag_size);
+ record->end_seq = tp->write_seq + record->len;
+ spin_lock_irq(&offload_ctx->lock);
+ list_add_tail(&record->list, &offload_ctx->records_list);
+ spin_unlock_irq(&offload_ctx->lock);
+ offload_ctx->open_record = NULL;
+ set_bit(TLS_PENDING_CLOSED_RECORD, &ctx->flags);
+ tls_advance_record_sn(sk, &ctx->tx);
+
+ for (i = 0; i < record->num_frags; i++) {
+ frag = &record->frags[i];
+ sg_unmark_end(&offload_ctx->sg_tx_data[i]);
+ sg_set_page(&offload_ctx->sg_tx_data[i], skb_frag_page(frag),
+ frag->size, frag->page_offset);
+ sk_mem_charge(sk, frag->size);
+ get_page(skb_frag_page(frag));
+ }
+ sg_mark_end(&offload_ctx->sg_tx_data[record->num_frags - 1]);
+
+ /* all ready, send */
+ return tls_push_sg(sk, ctx, offload_ctx->sg_tx_data, 0, flags);
+}
+
+static int tls_create_new_record(struct tls_offload_context *offload_ctx,
+ struct page_frag *pfrag,
+ size_t prepend_size)
+{
+ struct tls_record_info *record;
+ skb_frag_t *frag;
+
+ record = kmalloc(sizeof(*record), GFP_KERNEL);
+ if (!record)
+ return -ENOMEM;
+
+ frag = &record->frags[0];
+ __skb_frag_set_page(frag, pfrag->page);
+ frag->page_offset = pfrag->offset;
+ skb_frag_size_set(frag, prepend_size);
+
+ get_page(pfrag->page);
+ pfrag->offset += prepend_size;
+
+ record->num_frags = 1;
+ record->len = prepend_size;
+ offload_ctx->open_record = record;
+ return 0;
+}
+
+static int tls_do_allocation(struct sock *sk,
+ struct tls_offload_context *offload_ctx,
+ struct page_frag *pfrag,
+ size_t prepend_size)
+{
+ int ret;
+
+ if (!offload_ctx->open_record) {
+ if (unlikely(!skb_page_frag_refill(prepend_size, pfrag,
+ sk->sk_allocation))) {
+ sk->sk_prot->enter_memory_pressure(sk);
+ sk_stream_moderate_sndbuf(sk);
+ return -ENOMEM;
+ }
+
+ ret = tls_create_new_record(offload_ctx, pfrag, prepend_size);
+ if (ret)
+ return ret;
+
+ if (pfrag->size > pfrag->offset)
+ return 0;
+ }
+
+ if (!sk_page_frag_refill(sk, pfrag))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int tls_push_data(struct sock *sk,
+ struct iov_iter *msg_iter,
+ size_t size, int flags,
+ unsigned char record_type)
+{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_offload_context *ctx = tls_offload_ctx(tls_ctx);
+ int tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST;
+ int more = flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE);
+ struct tls_record_info *record = ctx->open_record;
+ struct page_frag *pfrag;
+ size_t orig_size = size;
+ u32 max_open_record_len;
+ int copy, rc = 0;
+ bool done = false;
+ long timeo;
+
+ if (flags &
+ ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_SENDPAGE_NOTLAST))
+ return -ENOTSUPP;
+
+ if (sk->sk_err)
+ return -sk->sk_err;
+
+ timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
+ rc = tls_complete_pending_work(sk, tls_ctx, flags, &timeo);
+ if (rc < 0)
+ return rc;
+
+ pfrag = sk_page_frag(sk);
+
+ /* TLS_HEADER_SIZE is not counted as part of the TLS record, and
+ * we need to leave room for an authentication tag.
+ */
+ max_open_record_len = TLS_MAX_PAYLOAD_SIZE +
+ tls_ctx->tx.prepend_size;
+ do {
+ rc = tls_do_allocation(sk, ctx, pfrag,
+ tls_ctx->tx.prepend_size);
+ if (rc) {
+ rc = sk_stream_wait_memory(sk, &timeo);
+ if (!rc)
+ continue;
+
+ record = ctx->open_record;
+ if (!record)
+ break;
+handle_error:
+ if (record_type != TLS_RECORD_TYPE_DATA) {
+ /* avoid sending partial
+ * record with type !=
+ * application_data
+ */
+ size = orig_size;
+ destroy_record(record);
+ ctx->open_record = NULL;
+ } else if (record->len > tls_ctx->tx.prepend_size) {
+ goto last_record;
+ }
+
+ break;
+ }
+
+ record = ctx->open_record;
+ copy = min_t(size_t, size, (pfrag->size - pfrag->offset));
+ copy = min_t(size_t, copy, (max_open_record_len - record->len));
+
+ if (copy_from_iter_nocache(page_address(pfrag->page) +
+ pfrag->offset,
+ copy, msg_iter) != copy) {
+ rc = -EFAULT;
+ goto handle_error;
+ }
+ tls_append_frag(record, pfrag, copy);
+
+ size -= copy;
+ if (!size) {
+last_record:
+ tls_push_record_flags = flags;
+ if (more) {
+ tls_ctx->pending_open_record_frags =
+ record->num_frags;
+ break;
+ }
+
+ done = true;
+ }
+
+ if (done || record->len >= max_open_record_len ||
+ (record->num_frags >= MAX_SKB_FRAGS - 1)) {
+ rc = tls_push_record(sk,
+ tls_ctx,
+ ctx,
+ record,
+ pfrag,
+ tls_push_record_flags,
+ record_type);
+ if (rc < 0)
+ break;
+ }
+ } while (!done);
+
+ if (orig_size - size > 0)
+ rc = orig_size - size;
+
+ return rc;
+}
+
+int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+{
+ unsigned char record_type = TLS_RECORD_TYPE_DATA;
+ int rc;
+
+ lock_sock(sk);
+
+ if (unlikely(msg->msg_controllen)) {
+ rc = tls_proccess_cmsg(sk, msg, &record_type);
+ if (rc)
+ goto out;
+ }
+
+ rc = tls_push_data(sk, &msg->msg_iter, size,
+ msg->msg_flags, record_type);
+
+out:
+ release_sock(sk);
+ return rc;
+}
+
+int tls_device_sendpage(struct sock *sk, struct page *page,
+ int offset, size_t size, int flags)
+{
+ struct iov_iter msg_iter;
+ char *kaddr = kmap(page);
+ struct kvec iov;
+ int rc;
+
+ if (flags & MSG_SENDPAGE_NOTLAST)
+ flags |= MSG_MORE;
+
+ lock_sock(sk);
+
+ if (flags & MSG_OOB) {
+ rc = -ENOTSUPP;
+ goto out;
+ }
+
+ iov.iov_base = kaddr + offset;
+ iov.iov_len = size;
+ iov_iter_kvec(&msg_iter, WRITE | ITER_KVEC, &iov, 1, size);
+ rc = tls_push_data(sk, &msg_iter, size,
+ flags, TLS_RECORD_TYPE_DATA);
+ kunmap(page);
+
+out:
+ release_sock(sk);
+ return rc;
+}
+
+struct tls_record_info *tls_get_record(struct tls_offload_context *context,
+ u32 seq, u64 *p_record_sn)
+{
+ u64 record_sn = context->hint_record_sn;
+ struct tls_record_info *info;
+
+ info = context->retransmit_hint;
+ if (!info ||
+ before(seq, info->end_seq - info->len)) {
+ /* if retransmit_hint is irrelevant start
+ * from the beggining of the list
+ */
+ info = list_first_entry(&context->records_list,
+ struct tls_record_info, list);
+ record_sn = context->unacked_record_sn;
+ }
+
+ list_for_each_entry_from(info, &context->records_list, list) {
+ if (before(seq, info->end_seq)) {
+ if (!context->retransmit_hint ||
+ after(info->end_seq,
+ context->retransmit_hint->end_seq)) {
+ context->hint_record_sn = record_sn;
+ context->retransmit_hint = info;
+ }
+ *p_record_sn = record_sn;
+ return info;
+ }
+ record_sn++;
+ }
+
+ return NULL;
+}
+EXPORT_SYMBOL(tls_get_record);
+
+static int tls_device_push_pending_record(struct sock *sk, int flags)
+{
+ struct iov_iter msg_iter;
+
+ iov_iter_kvec(&msg_iter, WRITE | ITER_KVEC, NULL, 0, 0);
+ return tls_push_data(sk, &msg_iter, 0, flags, TLS_RECORD_TYPE_DATA);
+}
+
+int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
+{
+ u16 nonce_size, tag_size, iv_size, rec_seq_size;
+ struct tls_record_info *start_marker_record;
+ struct tls_offload_context *offload_ctx;
+ struct tls_crypto_info *crypto_info;
+ struct net_device *netdev;
+ char *iv, *rec_seq;
+ struct sk_buff *skb;
+ int rc = -EINVAL;
+ __be64 rcd_sn;
+
+ if (!ctx)
+ goto out;
+
+ if (ctx->priv_ctx_tx) {
+ rc = -EEXIST;
+ goto out;
+ }
+
+ start_marker_record = kmalloc(sizeof(*start_marker_record), GFP_KERNEL);
+ if (!start_marker_record) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ offload_ctx = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE, GFP_KERNEL);
+ if (!offload_ctx) {
+ rc = -ENOMEM;
+ goto free_marker_record;
+ }
+
+ crypto_info = &ctx->crypto_send;
+ switch (crypto_info->cipher_type) {
+ case TLS_CIPHER_AES_GCM_128:
+ nonce_size = TLS_CIPHER_AES_GCM_128_IV_SIZE;
+ tag_size = TLS_CIPHER_AES_GCM_128_TAG_SIZE;
+ iv_size = TLS_CIPHER_AES_GCM_128_IV_SIZE;
+ iv = ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->iv;
+ rec_seq_size = TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE;
+ rec_seq =
+ ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->rec_seq;
+ break;
+ default:
+ rc = -EINVAL;
+ goto free_offload_ctx;
+ }
+
+ ctx->tx.prepend_size = TLS_HEADER_SIZE + nonce_size;
+ ctx->tx.tag_size = tag_size;
+ ctx->tx.overhead_size = ctx->tx.prepend_size + ctx->tx.tag_size;
+ ctx->tx.iv_size = iv_size;
+ ctx->tx.iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
+ GFP_KERNEL);
+ if (!ctx->tx.iv) {
+ rc = -ENOMEM;
+ goto free_offload_ctx;
+ }
+
+ memcpy(ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size);
+
+ ctx->tx.rec_seq_size = rec_seq_size;
+ ctx->tx.rec_seq = kmalloc(rec_seq_size, GFP_KERNEL);
+ if (!ctx->tx.rec_seq) {
+ rc = -ENOMEM;
+ goto free_iv;
+ }
+ memcpy(ctx->tx.rec_seq, rec_seq, rec_seq_size);
+
+ rc = tls_sw_fallback_init(sk, offload_ctx, crypto_info);
+ if (rc)
+ goto free_rec_seq;
+
+ /* start at rec_seq - 1 to account for the start marker record */
+ memcpy(&rcd_sn, ctx->tx.rec_seq, sizeof(rcd_sn));
+ offload_ctx->unacked_record_sn = be64_to_cpu(rcd_sn) - 1;
+
+ start_marker_record->end_seq = tcp_sk(sk)->write_seq;
+ start_marker_record->len = 0;
+ start_marker_record->num_frags = 0;
+
+ INIT_LIST_HEAD(&offload_ctx->records_list);
+ list_add_tail(&start_marker_record->list, &offload_ctx->records_list);
+ spin_lock_init(&offload_ctx->lock);
+
+ clean_acked_data_enable(inet_csk(sk), &tls_icsk_clean_acked);
+ ctx->push_pending_record = tls_device_push_pending_record;
+ offload_ctx->sk_destruct = sk->sk_destruct;
+
+ /* TLS offload is greatly simplified if we don't send
+ * SKBs where only part of the payload needs to be encrypted.
+ * So mark the last skb in the write queue as end of record.
+ */
+ skb = tcp_write_queue_tail(sk);
+ if (skb)
+ TCP_SKB_CB(skb)->eor = 1;
+
+ refcount_set(&ctx->refcount, 1);
+
+ /* We support starting offload on multiple sockets
+ * concurrently, so we only need a read lock here.
+ * This lock must precede get_netdev_for_sock to prevent races between
+ * NETDEV_DOWN and setsockopt.
+ */
+ down_read(&device_offload_lock);
+ netdev = get_netdev_for_sock(sk);
+ if (!netdev) {
+ pr_err_ratelimited("%s: netdev not found\n", __func__);
+ rc = -EINVAL;
+ goto release_lock;
+ }
+
+ if (!(netdev->features & NETIF_F_HW_TLS_TX)) {
+ rc = -ENOTSUPP;
+ goto release_netdev;
+ }
+
+ /* Avoid offloading if the device is down
+ * We don't want to offload new flows after
+ * the NETDEV_DOWN event
+ */
+ if (!(netdev->flags & IFF_UP)) {
+ rc = -EINVAL;
+ goto release_netdev;
+ }
+
+ ctx->priv_ctx_tx = offload_ctx;
+ rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_TX,
+ &ctx->crypto_send,
+ tcp_sk(sk)->write_seq);
+ if (rc)
+ goto release_netdev;
+
+ ctx->netdev = netdev;
+
+ spin_lock_irq(&tls_device_lock);
+ list_add_tail(&ctx->list, &tls_device_list);
+ spin_unlock_irq(&tls_device_lock);
+
+ sk->sk_validate_xmit_skb = tls_validate_xmit_skb;
+ /* following this assignment tls_is_sk_tx_device_offloaded
+ * will return true and the context might be accessed
+ * by the netdev's xmit function.
+ */
+ smp_store_release(&sk->sk_destruct,
+ &tls_device_sk_destruct);
+ up_read(&device_offload_lock);
+ goto out;
+
+release_netdev:
+ dev_put(netdev);
+release_lock:
+ up_read(&device_offload_lock);
+ clean_acked_data_disable(inet_csk(sk));
+ crypto_free_aead(offload_ctx->aead_send);
+free_rec_seq:
+ kfree(ctx->tx.rec_seq);
+free_iv:
+ kfree(ctx->tx.iv);
+free_offload_ctx:
+ kfree(offload_ctx);
+ ctx->priv_ctx_tx = NULL;
+free_marker_record:
+ kfree(start_marker_record);
+out:
+ return rc;
+}
+
+static int tls_device_down(struct net_device *netdev)
+{
+ struct tls_context *ctx, *tmp;
+ unsigned long flags;
+ LIST_HEAD(list);
+
+ /* Request a write lock to block new offload attempts */
+ down_write(&device_offload_lock);
+
+ spin_lock_irqsave(&tls_device_lock, flags);
+ list_for_each_entry_safe(ctx, tmp, &tls_device_list, list) {
+ if (ctx->netdev != netdev ||
+ !refcount_inc_not_zero(&ctx->refcount))
+ continue;
+
+ list_move(&ctx->list, &list);
+ }
+ spin_unlock_irqrestore(&tls_device_lock, flags);
+
+ list_for_each_entry_safe(ctx, tmp, &list, list) {
+ netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
+ TLS_OFFLOAD_CTX_DIR_TX);
+ ctx->netdev = NULL;
+ dev_put(netdev);
+ list_del_init(&ctx->list);
+
+ if (refcount_dec_and_test(&ctx->refcount))
+ tls_device_free_ctx(ctx);
+ }
+
+ up_write(&device_offload_lock);
+
+ flush_work(&tls_device_gc_work);
+
+ return NOTIFY_DONE;
+}
+
+static int tls_dev_event(struct notifier_block *this, unsigned long event,
+ void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+ if (!(dev->features & NETIF_F_HW_TLS_TX))
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ case NETDEV_FEAT_CHANGE:
+ if (dev->tlsdev_ops &&
+ dev->tlsdev_ops->tls_dev_add &&
+ dev->tlsdev_ops->tls_dev_del)
+ return NOTIFY_DONE;
+ else
+ return NOTIFY_BAD;
+ case NETDEV_DOWN:
+ return tls_device_down(dev);
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block tls_dev_notifier = {
+ .notifier_call = tls_dev_event,
+};
+
+void __init tls_device_init(void)
+{
+ register_netdevice_notifier(&tls_dev_notifier);
+}
+
+void __exit tls_device_cleanup(void)
+{
+ unregister_netdevice_notifier(&tls_dev_notifier);
+ flush_work(&tls_device_gc_work);
+}
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
new file mode 100644
index 000000000000..748914abdb60
--- /dev/null
+++ b/net/tls/tls_device_fallback.c
@@ -0,0 +1,450 @@
+/* Copyright (c) 2018, Mellanox Technologies All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <net/tls.h>
+#include <crypto/aead.h>
+#include <crypto/scatterwalk.h>
+#include <net/ip6_checksum.h>
+
+static void chain_to_walk(struct scatterlist *sg, struct scatter_walk *walk)
+{
+ struct scatterlist *src = walk->sg;
+ int diff = walk->offset - src->offset;
+
+ sg_set_page(sg, sg_page(src),
+ src->length - diff, walk->offset);
+
+ scatterwalk_crypto_chain(sg, sg_next(src), 0, 2);
+}
+
+static int tls_enc_record(struct aead_request *aead_req,
+ struct crypto_aead *aead, char *aad,
+ char *iv, __be64 rcd_sn,
+ struct scatter_walk *in,
+ struct scatter_walk *out, int *in_len)
+{
+ unsigned char buf[TLS_HEADER_SIZE + TLS_CIPHER_AES_GCM_128_IV_SIZE];
+ struct scatterlist sg_in[3];
+ struct scatterlist sg_out[3];
+ u16 len;
+ int rc;
+
+ len = min_t(int, *in_len, ARRAY_SIZE(buf));
+
+ scatterwalk_copychunks(buf, in, len, 0);
+ scatterwalk_copychunks(buf, out, len, 1);
+
+ *in_len -= len;
+ if (!*in_len)
+ return 0;
+
+ scatterwalk_pagedone(in, 0, 1);
+ scatterwalk_pagedone(out, 1, 1);
+
+ len = buf[4] | (buf[3] << 8);
+ len -= TLS_CIPHER_AES_GCM_128_IV_SIZE;
+
+ tls_make_aad(aad, len - TLS_CIPHER_AES_GCM_128_TAG_SIZE,
+ (char *)&rcd_sn, sizeof(rcd_sn), buf[0]);
+
+ memcpy(iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, buf + TLS_HEADER_SIZE,
+ TLS_CIPHER_AES_GCM_128_IV_SIZE);
+
+ sg_init_table(sg_in, ARRAY_SIZE(sg_in));
+ sg_init_table(sg_out, ARRAY_SIZE(sg_out));
+ sg_set_buf(sg_in, aad, TLS_AAD_SPACE_SIZE);
+ sg_set_buf(sg_out, aad, TLS_AAD_SPACE_SIZE);
+ chain_to_walk(sg_in + 1, in);
+ chain_to_walk(sg_out + 1, out);
+
+ *in_len -= len;
+ if (*in_len < 0) {
+ *in_len += TLS_CIPHER_AES_GCM_128_TAG_SIZE;
+ /* the input buffer doesn't contain the entire record.
+ * trim len accordingly. The resulting authentication tag
+ * will contain garbage, but we don't care, so we won't
+ * include any of it in the output skb
+ * Note that we assume the output buffer length
+ * is larger then input buffer length + tag size
+ */
+ if (*in_len < 0)
+ len += *in_len;
+
+ *in_len = 0;
+ }
+
+ if (*in_len) {
+ scatterwalk_copychunks(NULL, in, len, 2);
+ scatterwalk_pagedone(in, 0, 1);
+ scatterwalk_copychunks(NULL, out, len, 2);
+ scatterwalk_pagedone(out, 1, 1);
+ }
+
+ len -= TLS_CIPHER_AES_GCM_128_TAG_SIZE;
+ aead_request_set_crypt(aead_req, sg_in, sg_out, len, iv);
+
+ rc = crypto_aead_encrypt(aead_req);
+
+ return rc;
+}
+
+static void tls_init_aead_request(struct aead_request *aead_req,
+ struct crypto_aead *aead)
+{
+ aead_request_set_tfm(aead_req, aead);
+ aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE);
+}
+
+static struct aead_request *tls_alloc_aead_request(struct crypto_aead *aead,
+ gfp_t flags)
+{
+ unsigned int req_size = sizeof(struct aead_request) +
+ crypto_aead_reqsize(aead);
+ struct aead_request *aead_req;
+
+ aead_req = kzalloc(req_size, flags);
+ if (aead_req)
+ tls_init_aead_request(aead_req, aead);
+ return aead_req;
+}
+
+static int tls_enc_records(struct aead_request *aead_req,
+ struct crypto_aead *aead, struct scatterlist *sg_in,
+ struct scatterlist *sg_out, char *aad, char *iv,
+ u64 rcd_sn, int len)
+{
+ struct scatter_walk out, in;
+ int rc;
+
+ scatterwalk_start(&in, sg_in);
+ scatterwalk_start(&out, sg_out);
+
+ do {
+ rc = tls_enc_record(aead_req, aead, aad, iv,
+ cpu_to_be64(rcd_sn), &in, &out, &len);
+ rcd_sn++;
+
+ } while (rc == 0 && len);
+
+ scatterwalk_done(&in, 0, 0);
+ scatterwalk_done(&out, 1, 0);
+
+ return rc;
+}
+
+/* Can't use icsk->icsk_af_ops->send_check here because the ip addresses
+ * might have been changed by NAT.
+ */
+static void update_chksum(struct sk_buff *skb, int headln)
+{
+ struct tcphdr *th = tcp_hdr(skb);
+ int datalen = skb->len - headln;
+ const struct ipv6hdr *ipv6h;
+ const struct iphdr *iph;
+
+ /* We only changed the payload so if we are using partial we don't
+ * need to update anything.
+ */
+ if (likely(skb->ip_summed == CHECKSUM_PARTIAL))
+ return;
+
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct tcphdr, check);
+
+ if (skb->sk->sk_family == AF_INET6) {
+ ipv6h = ipv6_hdr(skb);
+ th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
+ datalen, IPPROTO_TCP, 0);
+ } else {
+ iph = ip_hdr(skb);
+ th->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen,
+ IPPROTO_TCP, 0);
+ }
+}
+
+static void complete_skb(struct sk_buff *nskb, struct sk_buff *skb, int headln)
+{
+ skb_copy_header(nskb, skb);
+
+ skb_put(nskb, skb->len);
+ memcpy(nskb->data, skb->data, headln);
+ update_chksum(nskb, headln);
+
+ nskb->destructor = skb->destructor;
+ nskb->sk = skb->sk;
+ skb->destructor = NULL;
+ skb->sk = NULL;
+ refcount_add(nskb->truesize - skb->truesize,
+ &nskb->sk->sk_wmem_alloc);
+}
+
+/* This function may be called after the user socket is already
+ * closed so make sure we don't use anything freed during
+ * tls_sk_proto_close here
+ */
+
+static int fill_sg_in(struct scatterlist *sg_in,
+ struct sk_buff *skb,
+ struct tls_offload_context *ctx,
+ u64 *rcd_sn,
+ s32 *sync_size,
+ int *resync_sgs)
+{
+ int tcp_payload_offset = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ int payload_len = skb->len - tcp_payload_offset;
+ u32 tcp_seq = ntohl(tcp_hdr(skb)->seq);
+ struct tls_record_info *record;
+ unsigned long flags;
+ int remaining;
+ int i;
+
+ spin_lock_irqsave(&ctx->lock, flags);
+ record = tls_get_record(ctx, tcp_seq, rcd_sn);
+ if (!record) {
+ spin_unlock_irqrestore(&ctx->lock, flags);
+ WARN(1, "Record not found for seq %u\n", tcp_seq);
+ return -EINVAL;
+ }
+
+ *sync_size = tcp_seq - tls_record_start_seq(record);
+ if (*sync_size < 0) {
+ int is_start_marker = tls_record_is_start_marker(record);
+
+ spin_unlock_irqrestore(&ctx->lock, flags);
+ /* This should only occur if the relevant record was
+ * already acked. In that case it should be ok
+ * to drop the packet and avoid retransmission.
+ *
+ * There is a corner case where the packet contains
+ * both an acked and a non-acked record.
+ * We currently don't handle that case and rely
+ * on TCP to retranmit a packet that doesn't contain
+ * already acked payload.
+ */
+ if (!is_start_marker)
+ *sync_size = 0;
+ return -EINVAL;
+ }
+
+ remaining = *sync_size;
+ for (i = 0; remaining > 0; i++) {
+ skb_frag_t *frag = &record->frags[i];
+
+ __skb_frag_ref(frag);
+ sg_set_page(sg_in + i, skb_frag_page(frag),
+ skb_frag_size(frag), frag->page_offset);
+
+ remaining -= skb_frag_size(frag);
+
+ if (remaining < 0)
+ sg_in[i].length += remaining;
+ }
+ *resync_sgs = i;
+
+ spin_unlock_irqrestore(&ctx->lock, flags);
+ if (skb_to_sgvec(skb, &sg_in[i], tcp_payload_offset, payload_len) < 0)
+ return -EINVAL;
+
+ return 0;
+}
+
+static void fill_sg_out(struct scatterlist sg_out[3], void *buf,
+ struct tls_context *tls_ctx,
+ struct sk_buff *nskb,
+ int tcp_payload_offset,
+ int payload_len,
+ int sync_size,
+ void *dummy_buf)
+{
+ sg_set_buf(&sg_out[0], dummy_buf, sync_size);
+ sg_set_buf(&sg_out[1], nskb->data + tcp_payload_offset, payload_len);
+ /* Add room for authentication tag produced by crypto */
+ dummy_buf += sync_size;
+ sg_set_buf(&sg_out[2], dummy_buf, TLS_CIPHER_AES_GCM_128_TAG_SIZE);
+}
+
+static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx,
+ struct scatterlist sg_out[3],
+ struct scatterlist *sg_in,
+ struct sk_buff *skb,
+ s32 sync_size, u64 rcd_sn)
+{
+ int tcp_payload_offset = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ struct tls_offload_context *ctx = tls_offload_ctx(tls_ctx);
+ int payload_len = skb->len - tcp_payload_offset;
+ void *buf, *iv, *aad, *dummy_buf;
+ struct aead_request *aead_req;
+ struct sk_buff *nskb = NULL;
+ int buf_len;
+
+ aead_req = tls_alloc_aead_request(ctx->aead_send, GFP_ATOMIC);
+ if (!aead_req)
+ return NULL;
+
+ buf_len = TLS_CIPHER_AES_GCM_128_SALT_SIZE +
+ TLS_CIPHER_AES_GCM_128_IV_SIZE +
+ TLS_AAD_SPACE_SIZE +
+ sync_size +
+ TLS_CIPHER_AES_GCM_128_TAG_SIZE;
+ buf = kmalloc(buf_len, GFP_ATOMIC);
+ if (!buf)
+ goto free_req;
+
+ iv = buf;
+ memcpy(iv, tls_ctx->crypto_send_aes_gcm_128.salt,
+ TLS_CIPHER_AES_GCM_128_SALT_SIZE);
+ aad = buf + TLS_CIPHER_AES_GCM_128_SALT_SIZE +
+ TLS_CIPHER_AES_GCM_128_IV_SIZE;
+ dummy_buf = aad + TLS_AAD_SPACE_SIZE;
+
+ nskb = alloc_skb(skb_headroom(skb) + skb->len, GFP_ATOMIC);
+ if (!nskb)
+ goto free_buf;
+
+ skb_reserve(nskb, skb_headroom(skb));
+
+ fill_sg_out(sg_out, buf, tls_ctx, nskb, tcp_payload_offset,
+ payload_len, sync_size, dummy_buf);
+
+ if (tls_enc_records(aead_req, ctx->aead_send, sg_in, sg_out, aad, iv,
+ rcd_sn, sync_size + payload_len) < 0)
+ goto free_nskb;
+
+ complete_skb(nskb, skb, tcp_payload_offset);
+
+ /* validate_xmit_skb_list assumes that if the skb wasn't segmented
+ * nskb->prev will point to the skb itself
+ */
+ nskb->prev = nskb;
+
+free_buf:
+ kfree(buf);
+free_req:
+ kfree(aead_req);
+ return nskb;
+free_nskb:
+ kfree_skb(nskb);
+ nskb = NULL;
+ goto free_buf;
+}
+
+static struct sk_buff *tls_sw_fallback(struct sock *sk, struct sk_buff *skb)
+{
+ int tcp_payload_offset = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_offload_context *ctx = tls_offload_ctx(tls_ctx);
+ int payload_len = skb->len - tcp_payload_offset;
+ struct scatterlist *sg_in, sg_out[3];
+ struct sk_buff *nskb = NULL;
+ int sg_in_max_elements;
+ int resync_sgs = 0;
+ s32 sync_size = 0;
+ u64 rcd_sn;
+
+ /* worst case is:
+ * MAX_SKB_FRAGS in tls_record_info
+ * MAX_SKB_FRAGS + 1 in SKB head and frags.
+ */
+ sg_in_max_elements = 2 * MAX_SKB_FRAGS + 1;
+
+ if (!payload_len)
+ return skb;
+
+ sg_in = kmalloc_array(sg_in_max_elements, sizeof(*sg_in), GFP_ATOMIC);
+ if (!sg_in)
+ goto free_orig;
+
+ sg_init_table(sg_in, sg_in_max_elements);
+ sg_init_table(sg_out, ARRAY_SIZE(sg_out));
+
+ if (fill_sg_in(sg_in, skb, ctx, &rcd_sn, &sync_size, &resync_sgs)) {
+ /* bypass packets before kernel TLS socket option was set */
+ if (sync_size < 0 && payload_len <= -sync_size)
+ nskb = skb_get(skb);
+ goto put_sg;
+ }
+
+ nskb = tls_enc_skb(tls_ctx, sg_out, sg_in, skb, sync_size, rcd_sn);
+
+put_sg:
+ while (resync_sgs)
+ put_page(sg_page(&sg_in[--resync_sgs]));
+ kfree(sg_in);
+free_orig:
+ kfree_skb(skb);
+ return nskb;
+}
+
+struct sk_buff *tls_validate_xmit_skb(struct sock *sk,
+ struct net_device *dev,
+ struct sk_buff *skb)
+{
+ if (dev == tls_get_ctx(sk)->netdev)
+ return skb;
+
+ return tls_sw_fallback(sk, skb);
+}
+
+int tls_sw_fallback_init(struct sock *sk,
+ struct tls_offload_context *offload_ctx,
+ struct tls_crypto_info *crypto_info)
+{
+ const u8 *key;
+ int rc;
+
+ offload_ctx->aead_send =
+ crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(offload_ctx->aead_send)) {
+ rc = PTR_ERR(offload_ctx->aead_send);
+ pr_err_ratelimited("crypto_alloc_aead failed rc=%d\n", rc);
+ offload_ctx->aead_send = NULL;
+ goto err_out;
+ }
+
+ key = ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->key;
+
+ rc = crypto_aead_setkey(offload_ctx->aead_send, key,
+ TLS_CIPHER_AES_GCM_128_KEY_SIZE);
+ if (rc)
+ goto free_aead;
+
+ rc = crypto_aead_setauthsize(offload_ctx->aead_send,
+ TLS_CIPHER_AES_GCM_128_TAG_SIZE);
+ if (rc)
+ goto free_aead;
+
+ return 0;
+free_aead:
+ crypto_free_aead(offload_ctx->aead_send);
+err_out:
+ return rc;
+}
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index cc03e00785c7..4b57ddd72f34 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -51,12 +51,12 @@ enum {
TLSV6,
TLS_NUM_PROTS,
};
-
enum {
TLS_BASE,
- TLS_SW_TX,
- TLS_SW_RX,
- TLS_SW_RXTX,
+ TLS_SW,
+#ifdef CONFIG_TLS_DEVICE
+ TLS_HW,
+#endif
TLS_HW_RECORD,
TLS_NUM_CONFIG,
};
@@ -65,14 +65,14 @@ static struct proto *saved_tcpv6_prot;
static DEFINE_MUTEX(tcpv6_prot_mutex);
static LIST_HEAD(device_list);
static DEFINE_MUTEX(device_mutex);
-static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG];
+static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CONFIG];
static struct proto_ops tls_sw_proto_ops;
-static inline void update_sk_prot(struct sock *sk, struct tls_context *ctx)
+static void update_sk_prot(struct sock *sk, struct tls_context *ctx)
{
int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
- sk->sk_prot = &tls_prots[ip_ver][ctx->conf];
+ sk->sk_prot = &tls_prots[ip_ver][ctx->tx_conf][ctx->rx_conf];
}
int wait_on_pending_writer(struct sock *sk, long *timeo)
@@ -252,10 +252,10 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
lock_sock(sk);
sk_proto_close = ctx->sk_proto_close;
- if (ctx->conf == TLS_HW_RECORD)
+ if (ctx->tx_conf == TLS_HW_RECORD && ctx->rx_conf == TLS_HW_RECORD)
goto skip_tx_cleanup;
- if (ctx->conf == TLS_BASE) {
+ if (ctx->tx_conf == TLS_BASE && ctx->rx_conf == TLS_BASE) {
kfree(ctx);
ctx = NULL;
goto skip_tx_cleanup;
@@ -277,15 +277,26 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
}
}
- kfree(ctx->tx.rec_seq);
- kfree(ctx->tx.iv);
- kfree(ctx->rx.rec_seq);
- kfree(ctx->rx.iv);
+ /* We need these for tls_sw_fallback handling of other packets */
+ if (ctx->tx_conf == TLS_SW) {
+ kfree(ctx->tx.rec_seq);
+ kfree(ctx->tx.iv);
+ tls_sw_free_resources_tx(sk);
+ }
- if (ctx->conf == TLS_SW_TX ||
- ctx->conf == TLS_SW_RX ||
- ctx->conf == TLS_SW_RXTX) {
- tls_sw_free_resources(sk);
+ if (ctx->rx_conf == TLS_SW) {
+ kfree(ctx->rx.rec_seq);
+ kfree(ctx->rx.iv);
+ tls_sw_free_resources_rx(sk);
+ }
+
+#ifdef CONFIG_TLS_DEVICE
+ if (ctx->tx_conf != TLS_HW) {
+#else
+ {
+#endif
+ kfree(ctx);
+ ctx = NULL;
}
skip_tx_cleanup:
@@ -294,7 +305,8 @@ skip_tx_cleanup:
/* free ctx for TLS_HW_RECORD, used by tcp_set_state
* for sk->sk_prot->unhash [tls_hw_unhash]
*/
- if (ctx && ctx->conf == TLS_HW_RECORD)
+ if (ctx && ctx->tx_conf == TLS_HW_RECORD &&
+ ctx->rx_conf == TLS_HW_RECORD)
kfree(ctx);
}
@@ -448,25 +460,29 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
goto err_crypto_info;
}
- /* currently SW is default, we will have ethtool in future */
if (tx) {
- rc = tls_set_sw_offload(sk, ctx, 1);
- if (ctx->conf == TLS_SW_RX)
- conf = TLS_SW_RXTX;
- else
- conf = TLS_SW_TX;
+#ifdef CONFIG_TLS_DEVICE
+ rc = tls_set_device_offload(sk, ctx);
+ conf = TLS_HW;
+ if (rc) {
+#else
+ {
+#endif
+ rc = tls_set_sw_offload(sk, ctx, 1);
+ conf = TLS_SW;
+ }
} else {
rc = tls_set_sw_offload(sk, ctx, 0);
- if (ctx->conf == TLS_SW_TX)
- conf = TLS_SW_RXTX;
- else
- conf = TLS_SW_RX;
+ conf = TLS_SW;
}
if (rc)
goto err_crypto_info;
- ctx->conf = conf;
+ if (tx)
+ ctx->tx_conf = conf;
+ else
+ ctx->rx_conf = conf;
update_sk_prot(sk, ctx);
if (tx) {
ctx->sk_write_space = sk->sk_write_space;
@@ -542,7 +558,8 @@ static int tls_hw_prot(struct sock *sk)
ctx->hash = sk->sk_prot->hash;
ctx->unhash = sk->sk_prot->unhash;
ctx->sk_proto_close = sk->sk_prot->close;
- ctx->conf = TLS_HW_RECORD;
+ ctx->rx_conf = TLS_HW_RECORD;
+ ctx->tx_conf = TLS_HW_RECORD;
update_sk_prot(sk, ctx);
rc = 1;
break;
@@ -586,29 +603,40 @@ static int tls_hw_hash(struct sock *sk)
return err;
}
-static void build_protos(struct proto *prot, struct proto *base)
+static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
+ struct proto *base)
{
- prot[TLS_BASE] = *base;
- prot[TLS_BASE].setsockopt = tls_setsockopt;
- prot[TLS_BASE].getsockopt = tls_getsockopt;
- prot[TLS_BASE].close = tls_sk_proto_close;
-
- prot[TLS_SW_TX] = prot[TLS_BASE];
- prot[TLS_SW_TX].sendmsg = tls_sw_sendmsg;
- prot[TLS_SW_TX].sendpage = tls_sw_sendpage;
-
- prot[TLS_SW_RX] = prot[TLS_BASE];
- prot[TLS_SW_RX].recvmsg = tls_sw_recvmsg;
- prot[TLS_SW_RX].close = tls_sk_proto_close;
-
- prot[TLS_SW_RXTX] = prot[TLS_SW_TX];
- prot[TLS_SW_RXTX].recvmsg = tls_sw_recvmsg;
- prot[TLS_SW_RXTX].close = tls_sk_proto_close;
-
- prot[TLS_HW_RECORD] = *base;
- prot[TLS_HW_RECORD].hash = tls_hw_hash;
- prot[TLS_HW_RECORD].unhash = tls_hw_unhash;
- prot[TLS_HW_RECORD].close = tls_sk_proto_close;
+ prot[TLS_BASE][TLS_BASE] = *base;
+ prot[TLS_BASE][TLS_BASE].setsockopt = tls_setsockopt;
+ prot[TLS_BASE][TLS_BASE].getsockopt = tls_getsockopt;
+ prot[TLS_BASE][TLS_BASE].close = tls_sk_proto_close;
+
+ prot[TLS_SW][TLS_BASE] = prot[TLS_BASE][TLS_BASE];
+ prot[TLS_SW][TLS_BASE].sendmsg = tls_sw_sendmsg;
+ prot[TLS_SW][TLS_BASE].sendpage = tls_sw_sendpage;
+
+ prot[TLS_BASE][TLS_SW] = prot[TLS_BASE][TLS_BASE];
+ prot[TLS_BASE][TLS_SW].recvmsg = tls_sw_recvmsg;
+ prot[TLS_BASE][TLS_SW].close = tls_sk_proto_close;
+
+ prot[TLS_SW][TLS_SW] = prot[TLS_SW][TLS_BASE];
+ prot[TLS_SW][TLS_SW].recvmsg = tls_sw_recvmsg;
+ prot[TLS_SW][TLS_SW].close = tls_sk_proto_close;
+
+#ifdef CONFIG_TLS_DEVICE
+ prot[TLS_HW][TLS_BASE] = prot[TLS_BASE][TLS_BASE];
+ prot[TLS_HW][TLS_BASE].sendmsg = tls_device_sendmsg;
+ prot[TLS_HW][TLS_BASE].sendpage = tls_device_sendpage;
+
+ prot[TLS_HW][TLS_SW] = prot[TLS_BASE][TLS_SW];
+ prot[TLS_HW][TLS_SW].sendmsg = tls_device_sendmsg;
+ prot[TLS_HW][TLS_SW].sendpage = tls_device_sendpage;
+#endif
+
+ prot[TLS_HW_RECORD][TLS_HW_RECORD] = *base;
+ prot[TLS_HW_RECORD][TLS_HW_RECORD].hash = tls_hw_hash;
+ prot[TLS_HW_RECORD][TLS_HW_RECORD].unhash = tls_hw_unhash;
+ prot[TLS_HW_RECORD][TLS_HW_RECORD].close = tls_sk_proto_close;
}
static int tls_init(struct sock *sk)
@@ -639,7 +667,7 @@ static int tls_init(struct sock *sk)
ctx->getsockopt = sk->sk_prot->getsockopt;
ctx->sk_proto_close = sk->sk_prot->close;
- /* Build IPv6 TLS whenever the address of tcpv6_prot changes */
+ /* Build IPv6 TLS whenever the address of tcpv6 _prot changes */
if (ip_ver == TLSV6 &&
unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) {
mutex_lock(&tcpv6_prot_mutex);
@@ -650,7 +678,8 @@ static int tls_init(struct sock *sk)
mutex_unlock(&tcpv6_prot_mutex);
}
- ctx->conf = TLS_BASE;
+ ctx->tx_conf = TLS_BASE;
+ ctx->rx_conf = TLS_BASE;
update_sk_prot(sk, ctx);
out:
return rc;
@@ -688,6 +717,9 @@ static int __init tls_register(void)
tls_sw_proto_ops.poll = tls_sw_poll;
tls_sw_proto_ops.splice_read = tls_sw_splice_read;
+#ifdef CONFIG_TLS_DEVICE
+ tls_device_init();
+#endif
tcp_register_ulp(&tcp_tls_ulp_ops);
return 0;
@@ -696,6 +728,9 @@ static int __init tls_register(void)
static void __exit tls_unregister(void)
{
tcp_unregister_ulp(&tcp_tls_ulp_ops);
+#ifdef CONFIG_TLS_DEVICE
+ tls_device_cleanup();
+#endif
}
module_init(tls_register);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 71e79597f940..5c3909c311f1 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -52,7 +52,7 @@ static int tls_do_decryption(struct sock *sk,
gfp_t flags)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+ struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
struct strp_msg *rxm = strp_msg(skb);
struct aead_request *aead_req;
@@ -122,7 +122,7 @@ out:
static void trim_both_sgl(struct sock *sk, int target_size)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+ struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
trim_sg(sk, ctx->sg_plaintext_data,
&ctx->sg_plaintext_num_elem,
@@ -141,7 +141,7 @@ static void trim_both_sgl(struct sock *sk, int target_size)
static int alloc_encrypted_sg(struct sock *sk, int len)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+ struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
int rc = 0;
rc = sk_alloc_sg(sk, len,
@@ -155,7 +155,7 @@ static int alloc_encrypted_sg(struct sock *sk, int len)
static int alloc_plaintext_sg(struct sock *sk, int len)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+ struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
int rc = 0;
rc = sk_alloc_sg(sk, len, ctx->sg_plaintext_data, 0,
@@ -181,7 +181,7 @@ static void free_sg(struct sock *sk, struct scatterlist *sg,
static void tls_free_both_sg(struct sock *sk)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+ struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
free_sg(sk, ctx->sg_encrypted_data, &ctx->sg_encrypted_num_elem,
&ctx->sg_encrypted_size);
@@ -191,7 +191,7 @@ static void tls_free_both_sg(struct sock *sk)
}
static int tls_do_encryption(struct tls_context *tls_ctx,
- struct tls_sw_context *ctx, size_t data_len,
+ struct tls_sw_context_tx *ctx, size_t data_len,
gfp_t flags)
{
unsigned int req_size = sizeof(struct aead_request) +
@@ -227,7 +227,7 @@ static int tls_push_record(struct sock *sk, int flags,
unsigned char record_type)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+ struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
int rc;
sg_mark_end(ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem - 1);
@@ -339,7 +339,7 @@ static int memcopy_from_iter(struct sock *sk, struct iov_iter *from,
int bytes)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+ struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
struct scatterlist *sg = ctx->sg_plaintext_data;
int copy, i, rc = 0;
@@ -367,7 +367,7 @@ out:
int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+ struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
int ret = 0;
int required_size;
long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
@@ -522,7 +522,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+ struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
int ret = 0;
long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
bool eor;
@@ -636,7 +636,7 @@ static struct sk_buff *tls_wait_data(struct sock *sk, int flags,
long timeo, int *err)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+ struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
struct sk_buff *skb;
DEFINE_WAIT_FUNC(wait, woken_wake_function);
@@ -674,7 +674,7 @@ static int decrypt_skb(struct sock *sk, struct sk_buff *skb,
struct scatterlist *sgout)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+ struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
char iv[TLS_CIPHER_AES_GCM_128_SALT_SIZE + MAX_IV_SIZE];
struct scatterlist sgin_arr[MAX_SKB_FRAGS + 2];
struct scatterlist *sgin = &sgin_arr[0];
@@ -693,8 +693,7 @@ static int decrypt_skb(struct sock *sk, struct sk_buff *skb,
if (!sgout) {
nsg = skb_cow_data(skb, 0, &unused) + 1;
sgin = kmalloc_array(nsg, sizeof(*sgin), sk->sk_allocation);
- if (!sgout)
- sgout = sgin;
+ sgout = sgin;
}
sg_init_table(sgin, nsg);
@@ -724,7 +723,7 @@ static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb,
unsigned int len)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+ struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
struct strp_msg *rxm = strp_msg(skb);
if (len < rxm->full_len) {
@@ -750,7 +749,7 @@ int tls_sw_recvmsg(struct sock *sk,
int *addr_len)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+ struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
unsigned char control;
struct strp_msg *rxm;
struct sk_buff *skb;
@@ -870,7 +869,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
size_t len, unsigned int flags)
{
struct tls_context *tls_ctx = tls_get_ctx(sock->sk);
- struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+ struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
struct strp_msg *rxm = NULL;
struct sock *sk = sock->sk;
struct sk_buff *skb;
@@ -923,7 +922,7 @@ unsigned int tls_sw_poll(struct file *file, struct socket *sock,
unsigned int ret;
struct sock *sk = sock->sk;
struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+ struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
/* Grab POLLOUT and POLLHUP from the underlying socket */
ret = ctx->sk_poll(file, sock, wait);
@@ -939,7 +938,7 @@ unsigned int tls_sw_poll(struct file *file, struct socket *sock,
static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
{
struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
- struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+ struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
char header[tls_ctx->rx.prepend_size];
struct strp_msg *rxm = strp_msg(skb);
size_t cipher_overhead;
@@ -988,7 +987,7 @@ read_failure:
static void tls_queue(struct strparser *strp, struct sk_buff *skb)
{
struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
- struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+ struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
struct strp_msg *rxm;
rxm = strp_msg(skb);
@@ -1004,18 +1003,28 @@ static void tls_queue(struct strparser *strp, struct sk_buff *skb)
static void tls_data_ready(struct sock *sk)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+ struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
strp_data_ready(&ctx->strp);
}
-void tls_sw_free_resources(struct sock *sk)
+void tls_sw_free_resources_tx(struct sock *sk)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+ struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
if (ctx->aead_send)
crypto_free_aead(ctx->aead_send);
+ tls_free_both_sg(sk);
+
+ kfree(ctx);
+}
+
+void tls_sw_free_resources_rx(struct sock *sk)
+{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+
if (ctx->aead_recv) {
if (ctx->recv_pkt) {
kfree_skb(ctx->recv_pkt);
@@ -1031,10 +1040,7 @@ void tls_sw_free_resources(struct sock *sk)
lock_sock(sk);
}
- tls_free_both_sg(sk);
-
kfree(ctx);
- kfree(tls_ctx);
}
int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
@@ -1042,7 +1048,8 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
char keyval[TLS_CIPHER_AES_GCM_128_KEY_SIZE];
struct tls_crypto_info *crypto_info;
struct tls12_crypto_info_aes_gcm_128 *gcm_128_info;
- struct tls_sw_context *sw_ctx;
+ struct tls_sw_context_tx *sw_ctx_tx = NULL;
+ struct tls_sw_context_rx *sw_ctx_rx = NULL;
struct cipher_context *cctx;
struct crypto_aead **aead;
struct strp_callbacks cb;
@@ -1055,27 +1062,32 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
goto out;
}
- if (!ctx->priv_ctx) {
- sw_ctx = kzalloc(sizeof(*sw_ctx), GFP_KERNEL);
- if (!sw_ctx) {
+ if (tx) {
+ sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL);
+ if (!sw_ctx_tx) {
rc = -ENOMEM;
goto out;
}
- crypto_init_wait(&sw_ctx->async_wait);
+ crypto_init_wait(&sw_ctx_tx->async_wait);
+ ctx->priv_ctx_tx = sw_ctx_tx;
} else {
- sw_ctx = ctx->priv_ctx;
+ sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL);
+ if (!sw_ctx_rx) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ crypto_init_wait(&sw_ctx_rx->async_wait);
+ ctx->priv_ctx_rx = sw_ctx_rx;
}
- ctx->priv_ctx = (struct tls_offload_context *)sw_ctx;
-
if (tx) {
crypto_info = &ctx->crypto_send;
cctx = &ctx->tx;
- aead = &sw_ctx->aead_send;
+ aead = &sw_ctx_tx->aead_send;
} else {
crypto_info = &ctx->crypto_recv;
cctx = &ctx->rx;
- aead = &sw_ctx->aead_recv;
+ aead = &sw_ctx_rx->aead_recv;
}
switch (crypto_info->cipher_type) {
@@ -1122,22 +1134,24 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
}
memcpy(cctx->rec_seq, rec_seq, rec_seq_size);
- if (tx) {
- sg_init_table(sw_ctx->sg_encrypted_data,
- ARRAY_SIZE(sw_ctx->sg_encrypted_data));
- sg_init_table(sw_ctx->sg_plaintext_data,
- ARRAY_SIZE(sw_ctx->sg_plaintext_data));
-
- sg_init_table(sw_ctx->sg_aead_in, 2);
- sg_set_buf(&sw_ctx->sg_aead_in[0], sw_ctx->aad_space,
- sizeof(sw_ctx->aad_space));
- sg_unmark_end(&sw_ctx->sg_aead_in[1]);
- sg_chain(sw_ctx->sg_aead_in, 2, sw_ctx->sg_plaintext_data);
- sg_init_table(sw_ctx->sg_aead_out, 2);
- sg_set_buf(&sw_ctx->sg_aead_out[0], sw_ctx->aad_space,
- sizeof(sw_ctx->aad_space));
- sg_unmark_end(&sw_ctx->sg_aead_out[1]);
- sg_chain(sw_ctx->sg_aead_out, 2, sw_ctx->sg_encrypted_data);
+ if (sw_ctx_tx) {
+ sg_init_table(sw_ctx_tx->sg_encrypted_data,
+ ARRAY_SIZE(sw_ctx_tx->sg_encrypted_data));
+ sg_init_table(sw_ctx_tx->sg_plaintext_data,
+ ARRAY_SIZE(sw_ctx_tx->sg_plaintext_data));
+
+ sg_init_table(sw_ctx_tx->sg_aead_in, 2);
+ sg_set_buf(&sw_ctx_tx->sg_aead_in[0], sw_ctx_tx->aad_space,
+ sizeof(sw_ctx_tx->aad_space));
+ sg_unmark_end(&sw_ctx_tx->sg_aead_in[1]);
+ sg_chain(sw_ctx_tx->sg_aead_in, 2,
+ sw_ctx_tx->sg_plaintext_data);
+ sg_init_table(sw_ctx_tx->sg_aead_out, 2);
+ sg_set_buf(&sw_ctx_tx->sg_aead_out[0], sw_ctx_tx->aad_space,
+ sizeof(sw_ctx_tx->aad_space));
+ sg_unmark_end(&sw_ctx_tx->sg_aead_out[1]);
+ sg_chain(sw_ctx_tx->sg_aead_out, 2,
+ sw_ctx_tx->sg_encrypted_data);
}
if (!*aead) {
@@ -1162,22 +1176,22 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
if (rc)
goto free_aead;
- if (!tx) {
+ if (sw_ctx_rx) {
/* Set up strparser */
memset(&cb, 0, sizeof(cb));
cb.rcv_msg = tls_queue;
cb.parse_msg = tls_read_size;
- strp_init(&sw_ctx->strp, sk, &cb);
+ strp_init(&sw_ctx_rx->strp, sk, &cb);
write_lock_bh(&sk->sk_callback_lock);
- sw_ctx->saved_data_ready = sk->sk_data_ready;
+ sw_ctx_rx->saved_data_ready = sk->sk_data_ready;
sk->sk_data_ready = tls_data_ready;
write_unlock_bh(&sk->sk_callback_lock);
- sw_ctx->sk_poll = sk->sk_socket->ops->poll;
+ sw_ctx_rx->sk_poll = sk->sk_socket->ops->poll;
- strp_check_rcv(&sw_ctx->strp);
+ strp_check_rcv(&sw_ctx_rx->strp);
}
goto out;
@@ -1189,11 +1203,16 @@ free_rec_seq:
kfree(cctx->rec_seq);
cctx->rec_seq = NULL;
free_iv:
- kfree(ctx->tx.iv);
- ctx->tx.iv = NULL;
+ kfree(cctx->iv);
+ cctx->iv = NULL;
free_priv:
- kfree(ctx->priv_ctx);
- ctx->priv_ctx = NULL;
+ if (tx) {
+ kfree(ctx->priv_ctx_tx);
+ ctx->priv_ctx_tx = NULL;
+ } else {
+ kfree(ctx->priv_ctx_rx);
+ ctx->priv_ctx_rx = NULL;
+ }
out:
return rc;
}
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 4d6a6edd4bf6..b853581592fd 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -44,6 +44,7 @@ hostprogs-y += xdp_monitor
hostprogs-y += xdp_rxq_info
hostprogs-y += syscall_tp
hostprogs-y += cpustat
+hostprogs-y += xdp_adjust_tail
# Libbpf dependencies
LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
@@ -95,6 +96,7 @@ xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o
xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o
syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o
cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o
+xdp_adjust_tail-objs := bpf_load.o $(LIBBPF) xdp_adjust_tail_user.o
# Tell kbuild to always build the programs
always := $(hostprogs-y)
@@ -112,7 +114,6 @@ always += sock_flags_kern.o
always += test_probe_write_user_kern.o
always += trace_output_kern.o
always += tcbpf1_kern.o
-always += tcbpf2_kern.o
always += tc_l2_redirect_kern.o
always += lathist_kern.o
always += offwaketime_kern.o
@@ -148,6 +149,7 @@ always += xdp_rxq_info_kern.o
always += xdp2skb_meta_kern.o
always += syscall_tp_kern.o
always += cpustat_kern.o
+always += xdp_adjust_tail_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -193,6 +195,7 @@ HOSTLOADLIBES_xdp_monitor += -lelf
HOSTLOADLIBES_xdp_rxq_info += -lelf
HOSTLOADLIBES_syscall_tp += -lelf
HOSTLOADLIBES_cpustat += -lelf
+HOSTLOADLIBES_xdp_adjust_tail += -lelf
# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index bebe4188b4b3..feca497d6afd 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -549,7 +549,6 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
if (nr_maps < 0) {
printf("Error: Failed loading ELF maps (errno:%d):%s\n",
nr_maps, strerror(-nr_maps));
- ret = 1;
goto done;
}
if (load_maps(map_data, nr_maps, fixup_map))
@@ -615,7 +614,6 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
}
}
- ret = 0;
done:
close(fd);
return ret;
diff --git a/samples/bpf/sock_example.c b/samples/bpf/sock_example.c
index 6fc6e193ef1b..33a637507c00 100644
--- a/samples/bpf/sock_example.c
+++ b/samples/bpf/sock_example.c
@@ -9,10 +9,10 @@
* if (value)
* (*(u64*)value) += 1;
*
- * - attaches this program to eth0 raw socket
+ * - attaches this program to loopback interface "lo" raw socket
*
* - every second user space reads map[tcp], map[udp], map[icmp] to see
- * how many packets of given protocol were seen on eth0
+ * how many packets of given protocol were seen on "lo"
*/
#include <stdio.h>
#include <unistd.h>
diff --git a/samples/bpf/test_tunnel_bpf.sh b/samples/bpf/test_tunnel_bpf.sh
deleted file mode 100755
index c265863ccdf9..000000000000
--- a/samples/bpf/test_tunnel_bpf.sh
+++ /dev/null
@@ -1,319 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-# In Namespace 0 (at_ns0) using native tunnel
-# Overlay IP: 10.1.1.100
-# local 192.16.1.100 remote 192.16.1.200
-# veth0 IP: 172.16.1.100, tunnel dev <type>00
-
-# Out of Namespace using BPF set/get on lwtunnel
-# Overlay IP: 10.1.1.200
-# local 172.16.1.200 remote 172.16.1.100
-# veth1 IP: 172.16.1.200, tunnel dev <type>11
-
-function config_device {
- ip netns add at_ns0
- ip link add veth0 type veth peer name veth1
- ip link set veth0 netns at_ns0
- ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
- ip netns exec at_ns0 ip link set dev veth0 up
- ip link set dev veth1 up mtu 1500
- ip addr add dev veth1 172.16.1.200/24
-}
-
-function add_gre_tunnel {
- # in namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq key 2 \
- local 172.16.1.100 remote 172.16.1.200
- ip netns exec at_ns0 ip link set dev $DEV_NS up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-
- # out of namespace
- ip link add dev $DEV type $TYPE key 2 external
- ip link set dev $DEV up
- ip addr add dev $DEV 10.1.1.200/24
-}
-
-function add_ip6gretap_tunnel {
-
- # assign ipv6 address
- ip netns exec at_ns0 ip addr add ::11/96 dev veth0
- ip netns exec at_ns0 ip link set dev veth0 up
- ip addr add dev veth1 ::22/96
- ip link set dev veth1 up
-
- # in namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq flowlabel 0xbcdef key 2 \
- local ::11 remote ::22
-
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
- ip netns exec at_ns0 ip addr add dev $DEV_NS fc80::100/96
- ip netns exec at_ns0 ip link set dev $DEV_NS up
-
- # out of namespace
- ip link add dev $DEV type $TYPE external
- ip addr add dev $DEV 10.1.1.200/24
- ip addr add dev $DEV fc80::200/24
- ip link set dev $DEV up
-}
-
-function add_erspan_tunnel {
- # in namespace
- if [ "$1" == "v1" ]; then
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq key 2 \
- local 172.16.1.100 remote 172.16.1.200 \
- erspan_ver 1 erspan 123
- else
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq key 2 \
- local 172.16.1.100 remote 172.16.1.200 \
- erspan_ver 2 erspan_dir egress erspan_hwid 3
- fi
- ip netns exec at_ns0 ip link set dev $DEV_NS up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-
- # out of namespace
- ip link add dev $DEV type $TYPE external
- ip link set dev $DEV up
- ip addr add dev $DEV 10.1.1.200/24
-}
-
-function add_ip6erspan_tunnel {
-
- # assign ipv6 address
- ip netns exec at_ns0 ip addr add ::11/96 dev veth0
- ip netns exec at_ns0 ip link set dev veth0 up
- ip addr add dev veth1 ::22/96
- ip link set dev veth1 up
-
- # in namespace
- if [ "$1" == "v1" ]; then
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq key 2 \
- local ::11 remote ::22 \
- erspan_ver 1 erspan 123
- else
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq key 2 \
- local ::11 remote ::22 \
- erspan_ver 2 erspan_dir egress erspan_hwid 7
- fi
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
- ip netns exec at_ns0 ip link set dev $DEV_NS up
-
- # out of namespace
- ip link add dev $DEV type $TYPE external
- ip addr add dev $DEV 10.1.1.200/24
- ip link set dev $DEV up
-}
-
-function add_vxlan_tunnel {
- # Set static ARP entry here because iptables set-mark works
- # on L3 packet, as a result not applying to ARP packets,
- # causing errors at get_tunnel_{key/opt}.
-
- # in namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE id 2 dstport 4789 gbp remote 172.16.1.200
- ip netns exec at_ns0 ip link set dev $DEV_NS address 52:54:00:d9:01:00 up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
- ip netns exec at_ns0 arp -s 10.1.1.200 52:54:00:d9:02:00
- ip netns exec at_ns0 iptables -A OUTPUT -j MARK --set-mark 0x800FF
-
- # out of namespace
- ip link add dev $DEV type $TYPE external gbp dstport 4789
- ip link set dev $DEV address 52:54:00:d9:02:00 up
- ip addr add dev $DEV 10.1.1.200/24
- arp -s 10.1.1.100 52:54:00:d9:01:00
-}
-
-function add_geneve_tunnel {
- # in namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE id 2 dstport 6081 remote 172.16.1.200
- ip netns exec at_ns0 ip link set dev $DEV_NS up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-
- # out of namespace
- ip link add dev $DEV type $TYPE dstport 6081 external
- ip link set dev $DEV up
- ip addr add dev $DEV 10.1.1.200/24
-}
-
-function add_ipip_tunnel {
- # in namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE local 172.16.1.100 remote 172.16.1.200
- ip netns exec at_ns0 ip link set dev $DEV_NS up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-
- # out of namespace
- ip link add dev $DEV type $TYPE external
- ip link set dev $DEV up
- ip addr add dev $DEV 10.1.1.200/24
-}
-
-function attach_bpf {
- DEV=$1
- SET_TUNNEL=$2
- GET_TUNNEL=$3
- tc qdisc add dev $DEV clsact
- tc filter add dev $DEV egress bpf da obj tcbpf2_kern.o sec $SET_TUNNEL
- tc filter add dev $DEV ingress bpf da obj tcbpf2_kern.o sec $GET_TUNNEL
-}
-
-function test_gre {
- TYPE=gretap
- DEV_NS=gretap00
- DEV=gretap11
- config_device
- add_gre_tunnel
- attach_bpf $DEV gre_set_tunnel gre_get_tunnel
- ping -c 1 10.1.1.100
- ip netns exec at_ns0 ping -c 1 10.1.1.200
- cleanup
-}
-
-function test_ip6gre {
- TYPE=ip6gre
- DEV_NS=ip6gre00
- DEV=ip6gre11
- config_device
- # reuse the ip6gretap function
- add_ip6gretap_tunnel
- attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
- # underlay
- ping6 -c 4 ::11
- # overlay: ipv4 over ipv6
- ip netns exec at_ns0 ping -c 1 10.1.1.200
- ping -c 1 10.1.1.100
- # overlay: ipv6 over ipv6
- ip netns exec at_ns0 ping6 -c 1 fc80::200
- cleanup
-}
-
-function test_ip6gretap {
- TYPE=ip6gretap
- DEV_NS=ip6gretap00
- DEV=ip6gretap11
- config_device
- add_ip6gretap_tunnel
- attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
- # underlay
- ping6 -c 4 ::11
- # overlay: ipv4 over ipv6
- ip netns exec at_ns0 ping -i .2 -c 1 10.1.1.200
- ping -c 1 10.1.1.100
- # overlay: ipv6 over ipv6
- ip netns exec at_ns0 ping6 -c 1 fc80::200
- cleanup
-}
-
-function test_erspan {
- TYPE=erspan
- DEV_NS=erspan00
- DEV=erspan11
- config_device
- add_erspan_tunnel $1
- attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel
- ping -c 1 10.1.1.100
- ip netns exec at_ns0 ping -c 1 10.1.1.200
- cleanup
-}
-
-function test_ip6erspan {
- TYPE=ip6erspan
- DEV_NS=ip6erspan00
- DEV=ip6erspan11
- config_device
- add_ip6erspan_tunnel $1
- attach_bpf $DEV ip4ip6erspan_set_tunnel ip4ip6erspan_get_tunnel
- ping6 -c 3 ::11
- ip netns exec at_ns0 ping -c 1 10.1.1.200
- cleanup
-}
-
-function test_vxlan {
- TYPE=vxlan
- DEV_NS=vxlan00
- DEV=vxlan11
- config_device
- add_vxlan_tunnel
- attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel
- ping -c 1 10.1.1.100
- ip netns exec at_ns0 ping -c 1 10.1.1.200
- cleanup
-}
-
-function test_geneve {
- TYPE=geneve
- DEV_NS=geneve00
- DEV=geneve11
- config_device
- add_geneve_tunnel
- attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel
- ping -c 1 10.1.1.100
- ip netns exec at_ns0 ping -c 1 10.1.1.200
- cleanup
-}
-
-function test_ipip {
- TYPE=ipip
- DEV_NS=ipip00
- DEV=ipip11
- config_device
- tcpdump -nei veth1 &
- cat /sys/kernel/debug/tracing/trace_pipe &
- add_ipip_tunnel
- ethtool -K veth1 gso off gro off rx off tx off
- ip link set dev veth1 mtu 1500
- attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel
- ping -c 1 10.1.1.100
- ip netns exec at_ns0 ping -c 1 10.1.1.200
- ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null
- sleep 0.2
- iperf -c 10.1.1.100 -n 5k -p 5200
- cleanup
-}
-
-function cleanup {
- set +ex
- pkill iperf
- ip netns delete at_ns0
- ip link del veth1
- ip link del ipip11
- ip link del gretap11
- ip link del ip6gre11
- ip link del ip6gretap11
- ip link del vxlan11
- ip link del geneve11
- ip link del erspan11
- ip link del ip6erspan11
- pkill tcpdump
- pkill cat
- set -ex
-}
-
-trap cleanup 0 2 3 6 9
-cleanup
-echo "Testing GRE tunnel..."
-test_gre
-echo "Testing IP6GRE tunnel..."
-test_ip6gre
-echo "Testing IP6GRETAP tunnel..."
-test_ip6gretap
-echo "Testing ERSPAN tunnel..."
-test_erspan v1
-test_erspan v2
-echo "Testing IP6ERSPAN tunnel..."
-test_ip6erspan v1
-test_ip6erspan v2
-echo "Testing VXLAN tunnel..."
-test_vxlan
-echo "Testing GENEVE tunnel..."
-test_geneve
-echo "Testing IPIP tunnel..."
-test_ipip
-echo "*** PASS ***"
diff --git a/samples/bpf/xdp_adjust_tail_kern.c b/samples/bpf/xdp_adjust_tail_kern.c
new file mode 100644
index 000000000000..411fdb21f8bc
--- /dev/null
+++ b/samples/bpf/xdp_adjust_tail_kern.c
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2018 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program shows how to use bpf_xdp_adjust_tail() by
+ * generating ICMPv4 "packet to big" (unreachable/ df bit set frag needed
+ * to be more preice in case of v4)" where receiving packets bigger then
+ * 600 bytes.
+ */
+#define KBUILD_MODNAME "foo"
+#include <uapi/linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/icmp.h>
+#include "bpf_helpers.h"
+
+#define DEFAULT_TTL 64
+#define MAX_PCKT_SIZE 600
+#define ICMP_TOOBIG_SIZE 98
+#define ICMP_TOOBIG_PAYLOAD_SIZE 92
+
+struct bpf_map_def SEC("maps") icmpcnt = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u64),
+ .max_entries = 1,
+};
+
+static __always_inline void count_icmp(void)
+{
+ u64 key = 0;
+ u64 *icmp_count;
+
+ icmp_count = bpf_map_lookup_elem(&icmpcnt, &key);
+ if (icmp_count)
+ *icmp_count += 1;
+}
+
+static __always_inline void swap_mac(void *data, struct ethhdr *orig_eth)
+{
+ struct ethhdr *eth;
+
+ eth = data;
+ memcpy(eth->h_source, orig_eth->h_dest, ETH_ALEN);
+ memcpy(eth->h_dest, orig_eth->h_source, ETH_ALEN);
+ eth->h_proto = orig_eth->h_proto;
+}
+
+static __always_inline __u16 csum_fold_helper(__u32 csum)
+{
+ return ~((csum & 0xffff) + (csum >> 16));
+}
+
+static __always_inline void ipv4_csum(void *data_start, int data_size,
+ __u32 *csum)
+{
+ *csum = bpf_csum_diff(0, 0, data_start, data_size, *csum);
+ *csum = csum_fold_helper(*csum);
+}
+
+static __always_inline int send_icmp4_too_big(struct xdp_md *xdp)
+{
+ int headroom = (int)sizeof(struct iphdr) + (int)sizeof(struct icmphdr);
+
+ if (bpf_xdp_adjust_head(xdp, 0 - headroom))
+ return XDP_DROP;
+ void *data = (void *)(long)xdp->data;
+ void *data_end = (void *)(long)xdp->data_end;
+
+ if (data + (ICMP_TOOBIG_SIZE + headroom) > data_end)
+ return XDP_DROP;
+
+ struct iphdr *iph, *orig_iph;
+ struct icmphdr *icmp_hdr;
+ struct ethhdr *orig_eth;
+ __u32 csum = 0;
+ __u64 off = 0;
+
+ orig_eth = data + headroom;
+ swap_mac(data, orig_eth);
+ off += sizeof(struct ethhdr);
+ iph = data + off;
+ off += sizeof(struct iphdr);
+ icmp_hdr = data + off;
+ off += sizeof(struct icmphdr);
+ orig_iph = data + off;
+ icmp_hdr->type = ICMP_DEST_UNREACH;
+ icmp_hdr->code = ICMP_FRAG_NEEDED;
+ icmp_hdr->un.frag.mtu = htons(MAX_PCKT_SIZE-sizeof(struct ethhdr));
+ icmp_hdr->checksum = 0;
+ ipv4_csum(icmp_hdr, ICMP_TOOBIG_PAYLOAD_SIZE, &csum);
+ icmp_hdr->checksum = csum;
+ iph->ttl = DEFAULT_TTL;
+ iph->daddr = orig_iph->saddr;
+ iph->saddr = orig_iph->daddr;
+ iph->version = 4;
+ iph->ihl = 5;
+ iph->protocol = IPPROTO_ICMP;
+ iph->tos = 0;
+ iph->tot_len = htons(
+ ICMP_TOOBIG_SIZE + headroom - sizeof(struct ethhdr));
+ iph->check = 0;
+ csum = 0;
+ ipv4_csum(iph, sizeof(struct iphdr), &csum);
+ iph->check = csum;
+ count_icmp();
+ return XDP_TX;
+}
+
+
+static __always_inline int handle_ipv4(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ int pckt_size = data_end - data;
+ int offset;
+
+ if (pckt_size > MAX_PCKT_SIZE) {
+ offset = pckt_size - ICMP_TOOBIG_SIZE;
+ if (bpf_xdp_adjust_tail(xdp, 0 - offset))
+ return XDP_PASS;
+ return send_icmp4_too_big(xdp);
+ }
+ return XDP_PASS;
+}
+
+SEC("xdp_icmp")
+int _xdp_icmp(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct ethhdr *eth = data;
+ __u16 h_proto;
+
+ if (eth + 1 > data_end)
+ return XDP_DROP;
+
+ h_proto = eth->h_proto;
+
+ if (h_proto == htons(ETH_P_IP))
+ return handle_ipv4(xdp);
+ else
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c
new file mode 100644
index 000000000000..f621a541b574
--- /dev/null
+++ b/samples/bpf/xdp_adjust_tail_user.c
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2018 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <arpa/inet.h>
+#include <netinet/ether.h>
+#include <unistd.h>
+#include <time.h>
+#include "bpf_load.h"
+#include "libbpf.h"
+#include "bpf_util.h"
+
+#define STATS_INTERVAL_S 2U
+
+static int ifindex = -1;
+static __u32 xdp_flags;
+
+static void int_exit(int sig)
+{
+ if (ifindex > -1)
+ bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+ exit(0);
+}
+
+/* simple "icmp packet too big sent" counter
+ */
+static void poll_stats(unsigned int kill_after_s)
+{
+ time_t started_at = time(NULL);
+ __u64 value = 0;
+ int key = 0;
+
+
+ while (!kill_after_s || time(NULL) - started_at <= kill_after_s) {
+ sleep(STATS_INTERVAL_S);
+
+ assert(bpf_map_lookup_elem(map_fd[0], &key, &value) == 0);
+
+ printf("icmp \"packet too big\" sent: %10llu pkts\n", value);
+ }
+}
+
+static void usage(const char *cmd)
+{
+ printf("Start a XDP prog which send ICMP \"packet too big\" \n"
+ "messages if ingress packet is bigger then MAX_SIZE bytes\n");
+ printf("Usage: %s [...]\n", cmd);
+ printf(" -i <ifindex> Interface Index\n");
+ printf(" -T <stop-after-X-seconds> Default: 0 (forever)\n");
+ printf(" -S use skb-mode\n");
+ printf(" -N enforce native mode\n");
+ printf(" -h Display this help\n");
+}
+
+int main(int argc, char **argv)
+{
+ unsigned char opt_flags[256] = {};
+ unsigned int kill_after_s = 0;
+ const char *optstr = "i:T:SNh";
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ char filename[256];
+ int opt;
+ int i;
+
+
+ for (i = 0; i < strlen(optstr); i++)
+ if (optstr[i] != 'h' && 'a' <= optstr[i] && optstr[i] <= 'z')
+ opt_flags[(unsigned char)optstr[i]] = 1;
+
+ while ((opt = getopt(argc, argv, optstr)) != -1) {
+
+ switch (opt) {
+ case 'i':
+ ifindex = atoi(optarg);
+ break;
+ case 'T':
+ kill_after_s = atoi(optarg);
+ break;
+ case 'S':
+ xdp_flags |= XDP_FLAGS_SKB_MODE;
+ break;
+ case 'N':
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+ break;
+ default:
+ usage(argv[0]);
+ return 1;
+ }
+ opt_flags[opt] = 0;
+ }
+
+ for (i = 0; i < strlen(optstr); i++) {
+ if (opt_flags[(unsigned int)optstr[i]]) {
+ fprintf(stderr, "Missing argument -%c\n", optstr[i]);
+ usage(argv[0]);
+ return 1;
+ }
+ }
+
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)");
+ return 1;
+ }
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ if (!prog_fd[0]) {
+ printf("load_bpf_file: %s\n", strerror(errno));
+ return 1;
+ }
+
+ signal(SIGINT, int_exit);
+ signal(SIGTERM, int_exit);
+
+ if (bpf_set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
+ printf("link set xdp fd failed\n");
+ return 1;
+ }
+
+ poll_stats(kill_after_s);
+
+ bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+
+ return 0;
+}
diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c
index eec14520d513..894bc64c2cac 100644
--- a/samples/bpf/xdp_monitor_user.c
+++ b/samples/bpf/xdp_monitor_user.c
@@ -330,7 +330,7 @@ static void stats_print(struct stats_record *stats_rec,
pps = calc_pps_u64(r, p, t);
if (pps > 0)
printf(fmt1, "Exception", i,
- 0.0, pps, err2str(rec_i));
+ 0.0, pps, action2str(rec_i));
}
pps = calc_pps_u64(&rec->total, &prev->total, t);
if (pps > 0)
diff --git a/samples/sockmap/Makefile b/samples/sockmap/Makefile
deleted file mode 100644
index fa53f4d77834..000000000000
--- a/samples/sockmap/Makefile
+++ /dev/null
@@ -1,78 +0,0 @@
-# List of programs to build
-hostprogs-y := sockmap
-
-# Libbpf dependencies
-LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
-
-HOSTCFLAGS += -I$(objtree)/usr/include
-HOSTCFLAGS += -I$(srctree)/tools/lib/
-HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
-HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include
-HOSTCFLAGS += -I$(srctree)/tools/perf
-
-sockmap-objs := ../bpf/bpf_load.o $(LIBBPF) sockmap_user.o
-
-# Tell kbuild to always build the programs
-always := $(hostprogs-y)
-always += sockmap_kern.o
-
-HOSTLOADLIBES_sockmap += -lelf -lpthread
-
-# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
-# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
-LLC ?= llc
-CLANG ?= clang
-
-# Trick to allow make to be run from this directory
-all:
- $(MAKE) -C ../../ $(CURDIR)/
-
-clean:
- $(MAKE) -C ../../ M=$(CURDIR) clean
- @rm -f *~
-
-$(obj)/syscall_nrs.s: $(src)/syscall_nrs.c
- $(call if_changed_dep,cc_s_c)
-
-$(obj)/syscall_nrs.h: $(obj)/syscall_nrs.s FORCE
- $(call filechk,offsets,__SYSCALL_NRS_H__)
-
-clean-files += syscall_nrs.h
-
-FORCE:
-
-
-# Verify LLVM compiler tools are available and bpf target is supported by llc
-.PHONY: verify_cmds verify_target_bpf $(CLANG) $(LLC)
-
-verify_cmds: $(CLANG) $(LLC)
- @for TOOL in $^ ; do \
- if ! (which -- "$${TOOL}" > /dev/null 2>&1); then \
- echo "*** ERROR: Cannot find LLVM tool $${TOOL}" ;\
- exit 1; \
- else true; fi; \
- done
-
-verify_target_bpf: verify_cmds
- @if ! (${LLC} -march=bpf -mattr=help > /dev/null 2>&1); then \
- echo "*** ERROR: LLVM (${LLC}) does not support 'bpf' target" ;\
- echo " NOTICE: LLVM version >= 3.7.1 required" ;\
- exit 2; \
- else true; fi
-
-$(src)/*.c: verify_target_bpf
-
-# asm/sysreg.h - inline assembly used by it is incompatible with llvm.
-# But, there is no easy way to fix it, so just exclude it since it is
-# useless for BPF samples.
-#
-# -target bpf option required with SK_MSG programs, this is to ensure
-# reading 'void *' data types for data and data_end are __u64 reads.
-$(obj)/%.o: $(src)/%.c
- $(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \
- -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \
- -Wno-compare-distinct-pointer-types \
- -Wno-gnu-variable-sized-type-not-at-end \
- -Wno-address-of-packed-member -Wno-tautological-compare \
- -Wno-unknown-warning-option -O2 -target bpf \
- -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
diff --git a/samples/sockmap/sockmap_test.sh b/samples/sockmap/sockmap_test.sh
deleted file mode 100755
index ace75f070eb8..000000000000
--- a/samples/sockmap/sockmap_test.sh
+++ /dev/null
@@ -1,488 +0,0 @@
-#Test a bunch of positive cases to verify basic functionality
-for prog in "--txmsg_redir --txmsg_skb" "--txmsg_redir --txmsg_ingress" "--txmsg" "--txmsg_redir" "--txmsg_redir --txmsg_ingress" "--txmsg_drop"; do
-for t in "sendmsg" "sendpage"; do
-for r in 1 10 100; do
- for i in 1 10 100; do
- for l in 1 10 100; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
- done
- done
-done
-done
-done
-
-#Test max iov
-t="sendmsg"
-r=1
-i=1024
-l=1
-prog="--txmsg"
-
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-echo $TEST
-$TEST
-sleep 2
-prog="--txmsg_redir"
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-echo $TEST
-$TEST
-
-# Test max iov with 1k send
-
-t="sendmsg"
-r=1
-i=1024
-l=1024
-prog="--txmsg"
-
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-echo $TEST
-$TEST
-sleep 2
-prog="--txmsg_redir"
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-echo $TEST
-$TEST
-sleep 2
-
-# Test apply with 1B
-r=1
-i=1024
-l=1024
-prog="--txmsg_apply 1"
-
-for t in "sendmsg" "sendpage"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Test apply with larger value than send
-r=1
-i=8
-l=1024
-prog="--txmsg_apply 2048"
-
-for t in "sendmsg" "sendpage"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Test apply with apply that never reaches limit
-r=1024
-i=1
-l=1
-prog="--txmsg_apply 2048"
-
-for t in "sendmsg" "sendpage"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Test apply and redirect with 1B
-r=1
-i=1024
-l=1024
-prog="--txmsg_redir --txmsg_apply 1"
-
-for t in "sendmsg" "sendpage"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-prog="--txmsg_redir --txmsg_apply 1 --txmsg_ingress"
-
-for t in "sendmsg" "sendpage"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-prog="--txmsg_redir --txmsg_apply 1 --txmsg_skb"
-
-for t in "sendmsg" "sendpage"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-
-# Test apply and redirect with larger value than send
-r=1
-i=8
-l=1024
-prog="--txmsg_redir --txmsg_apply 2048"
-
-for t in "sendmsg" "sendpage"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-prog="--txmsg_redir --txmsg_apply 2048 --txmsg_ingress"
-
-for t in "sendmsg" "sendpage"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-prog="--txmsg_redir --txmsg_apply 2048 --txmsg_skb"
-
-for t in "sendmsg" "sendpage"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-
-# Test apply and redirect with apply that never reaches limit
-r=1024
-i=1
-l=1
-prog="--txmsg_apply 2048"
-
-for t in "sendmsg" "sendpage"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Test cork with 1B not really useful but test it anyways
-r=1
-i=1024
-l=1024
-prog="--txmsg_cork 1"
-
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Test cork with a more reasonable 100B
-r=1
-i=1000
-l=1000
-prog="--txmsg_cork 100"
-
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Test cork with larger value than send
-r=1
-i=8
-l=1024
-prog="--txmsg_cork 2048"
-
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Test cork with cork that never reaches limit
-r=1024
-i=1
-l=1
-prog="--txmsg_cork 2048"
-
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-r=1
-i=1024
-l=1024
-prog="--txmsg_redir --txmsg_cork 1"
-
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Test cork with a more reasonable 100B
-r=1
-i=1000
-l=1000
-prog="--txmsg_redir --txmsg_cork 100"
-
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Test cork with larger value than send
-r=1
-i=8
-l=1024
-prog="--txmsg_redir --txmsg_cork 2048"
-
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Test cork with cork that never reaches limit
-r=1024
-i=1
-l=1
-prog="--txmsg_cork 2048"
-
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-
-# mix and match cork and apply not really useful but valid programs
-
-# Test apply < cork
-r=100
-i=1
-l=5
-prog="--txmsg_apply 10 --txmsg_cork 100"
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Try again with larger sizes so we hit overflow case
-r=100
-i=1000
-l=2048
-prog="--txmsg_apply 4096 --txmsg_cork 8096"
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Test apply > cork
-r=100
-i=1
-l=5
-prog="--txmsg_apply 100 --txmsg_cork 10"
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Again with larger sizes so we hit overflow cases
-r=100
-i=1000
-l=2048
-prog="--txmsg_apply 8096 --txmsg_cork 4096"
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-
-# Test apply = cork
-r=100
-i=1
-l=5
-prog="--txmsg_apply 10 --txmsg_cork 10"
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-r=100
-i=1000
-l=2048
-prog="--txmsg_apply 4096 --txmsg_cork 4096"
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Test apply < cork
-r=100
-i=1
-l=5
-prog="--txmsg_redir --txmsg_apply 10 --txmsg_cork 100"
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Try again with larger sizes so we hit overflow case
-r=100
-i=1000
-l=2048
-prog="--txmsg_redir --txmsg_apply 4096 --txmsg_cork 8096"
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Test apply > cork
-r=100
-i=1
-l=5
-prog="--txmsg_redir --txmsg_apply 100 --txmsg_cork 10"
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Again with larger sizes so we hit overflow cases
-r=100
-i=1000
-l=2048
-prog="--txmsg_redir --txmsg_apply 8096 --txmsg_cork 4096"
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-
-# Test apply = cork
-r=100
-i=1
-l=5
-prog="--txmsg_redir --txmsg_apply 10 --txmsg_cork 10"
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-r=100
-i=1000
-l=2048
-prog="--txmsg_redir --txmsg_apply 4096 --txmsg_cork 4096"
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Tests for bpf_msg_pull_data()
-for i in `seq 99 100 1600`; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
- --txmsg --txmsg_start 0 --txmsg_end $i --txmsg_cork 1600"
- echo $TEST
- $TEST
- sleep 2
-done
-
-for i in `seq 199 100 1600`; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
- --txmsg --txmsg_start 100 --txmsg_end $i --txmsg_cork 1600"
- echo $TEST
- $TEST
- sleep 2
-done
-
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
- --txmsg --txmsg_start 1500 --txmsg_end 1600 --txmsg_cork 1600"
-echo $TEST
-$TEST
-sleep 2
-
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
- --txmsg --txmsg_start 1111 --txmsg_end 1112 --txmsg_cork 1600"
-echo $TEST
-$TEST
-sleep 2
-
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
- --txmsg --txmsg_start 1111 --txmsg_end 0 --txmsg_cork 1600"
-echo $TEST
-$TEST
-sleep 2
-
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
- --txmsg --txmsg_start 0 --txmsg_end 1601 --txmsg_cork 1600"
-echo $TEST
-$TEST
-sleep 2
-
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
- --txmsg --txmsg_start 0 --txmsg_end 1601 --txmsg_cork 1602"
-echo $TEST
-$TEST
-sleep 2
-
-# Run through gamut again with start and end
-for prog in "--txmsg" "--txmsg_redir" "--txmsg_drop"; do
-for t in "sendmsg" "sendpage"; do
-for r in 1 10 100; do
- for i in 1 10 100; do
- for l in 1 10 100; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog --txmsg_start 1 --txmsg_end 2"
- echo $TEST
- $TEST
- sleep 2
- done
- done
-done
-done
-done
-
-# Some specific tests to cover specific code paths
-./sockmap --cgroup /mnt/cgroup2/ -t sendpage \
- -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 3
-./sockmap --cgroup /mnt/cgroup2/ -t sendmsg \
- -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 3
-./sockmap --cgroup /mnt/cgroup2/ -t sendpage \
- -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 5
-./sockmap --cgroup /mnt/cgroup2/ -t sendmsg \
- -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 5
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
new file mode 100755
index 000000000000..30ba0fee36e4
--- /dev/null
+++ b/scripts/bpf_helpers_doc.py
@@ -0,0 +1,421 @@
+#!/usr/bin/python3
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Copyright (C) 2018 Netronome Systems, Inc.
+
+# In case user attempts to run with Python 2.
+from __future__ import print_function
+
+import argparse
+import re
+import sys, os
+
+class NoHelperFound(BaseException):
+ pass
+
+class ParsingError(BaseException):
+ def __init__(self, line='<line not provided>', reader=None):
+ if reader:
+ BaseException.__init__(self,
+ 'Error at file offset %d, parsing line: %s' %
+ (reader.tell(), line))
+ else:
+ BaseException.__init__(self, 'Error parsing line: %s' % line)
+
+class Helper(object):
+ """
+ An object representing the description of an eBPF helper function.
+ @proto: function prototype of the helper function
+ @desc: textual description of the helper function
+ @ret: description of the return value of the helper function
+ """
+ def __init__(self, proto='', desc='', ret=''):
+ self.proto = proto
+ self.desc = desc
+ self.ret = ret
+
+ def proto_break_down(self):
+ """
+ Break down helper function protocol into smaller chunks: return type,
+ name, distincts arguments.
+ """
+ arg_re = re.compile('^((const )?(struct )?(\w+|...))( (\**)(\w+))?$')
+ res = {}
+ proto_re = re.compile('^(.+) (\**)(\w+)\(((([^,]+)(, )?){1,5})\)$')
+
+ capture = proto_re.match(self.proto)
+ res['ret_type'] = capture.group(1)
+ res['ret_star'] = capture.group(2)
+ res['name'] = capture.group(3)
+ res['args'] = []
+
+ args = capture.group(4).split(', ')
+ for a in args:
+ capture = arg_re.match(a)
+ res['args'].append({
+ 'type' : capture.group(1),
+ 'star' : capture.group(6),
+ 'name' : capture.group(7)
+ })
+
+ return res
+
+class HeaderParser(object):
+ """
+ An object used to parse a file in order to extract the documentation of a
+ list of eBPF helper functions. All the helpers that can be retrieved are
+ stored as Helper object, in the self.helpers() array.
+ @filename: name of file to parse, usually include/uapi/linux/bpf.h in the
+ kernel tree
+ """
+ def __init__(self, filename):
+ self.reader = open(filename, 'r')
+ self.line = ''
+ self.helpers = []
+
+ def parse_helper(self):
+ proto = self.parse_proto()
+ desc = self.parse_desc()
+ ret = self.parse_ret()
+ return Helper(proto=proto, desc=desc, ret=ret)
+
+ def parse_proto(self):
+ # Argument can be of shape:
+ # - "void"
+ # - "type name"
+ # - "type *name"
+ # - Same as above, with "const" and/or "struct" in front of type
+ # - "..." (undefined number of arguments, for bpf_trace_printk())
+ # There is at least one term ("void"), and at most five arguments.
+ p = re.compile('^ \* ((.+) \**\w+\((((const )?(struct )?(\w+|\.\.\.)( \**\w+)?)(, )?){1,5}\))$')
+ capture = p.match(self.line)
+ if not capture:
+ raise NoHelperFound
+ self.line = self.reader.readline()
+ return capture.group(1)
+
+ def parse_desc(self):
+ p = re.compile('^ \* \tDescription$')
+ capture = p.match(self.line)
+ if not capture:
+ # Helper can have empty description and we might be parsing another
+ # attribute: return but do not consume.
+ return ''
+ # Description can be several lines, some of them possibly empty, and it
+ # stops when another subsection title is met.
+ desc = ''
+ while True:
+ self.line = self.reader.readline()
+ if self.line == ' *\n':
+ desc += '\n'
+ else:
+ p = re.compile('^ \* \t\t(.*)')
+ capture = p.match(self.line)
+ if capture:
+ desc += capture.group(1) + '\n'
+ else:
+ break
+ return desc
+
+ def parse_ret(self):
+ p = re.compile('^ \* \tReturn$')
+ capture = p.match(self.line)
+ if not capture:
+ # Helper can have empty retval and we might be parsing another
+ # attribute: return but do not consume.
+ return ''
+ # Return value description can be several lines, some of them possibly
+ # empty, and it stops when another subsection title is met.
+ ret = ''
+ while True:
+ self.line = self.reader.readline()
+ if self.line == ' *\n':
+ ret += '\n'
+ else:
+ p = re.compile('^ \* \t\t(.*)')
+ capture = p.match(self.line)
+ if capture:
+ ret += capture.group(1) + '\n'
+ else:
+ break
+ return ret
+
+ def run(self):
+ # Advance to start of helper function descriptions.
+ offset = self.reader.read().find('* Start of BPF helper function descriptions:')
+ if offset == -1:
+ raise Exception('Could not find start of eBPF helper descriptions list')
+ self.reader.seek(offset)
+ self.reader.readline()
+ self.reader.readline()
+ self.line = self.reader.readline()
+
+ while True:
+ try:
+ helper = self.parse_helper()
+ self.helpers.append(helper)
+ except NoHelperFound:
+ break
+
+ self.reader.close()
+ print('Parsed description of %d helper function(s)' % len(self.helpers),
+ file=sys.stderr)
+
+###############################################################################
+
+class Printer(object):
+ """
+ A generic class for printers. Printers should be created with an array of
+ Helper objects, and implement a way to print them in the desired fashion.
+ @helpers: array of Helper objects to print to standard output
+ """
+ def __init__(self, helpers):
+ self.helpers = helpers
+
+ def print_header(self):
+ pass
+
+ def print_footer(self):
+ pass
+
+ def print_one(self, helper):
+ pass
+
+ def print_all(self):
+ self.print_header()
+ for helper in self.helpers:
+ self.print_one(helper)
+ self.print_footer()
+
+class PrinterRST(Printer):
+ """
+ A printer for dumping collected information about helpers as a ReStructured
+ Text page compatible with the rst2man program, which can be used to
+ generate a manual page for the helpers.
+ @helpers: array of Helper objects to print to standard output
+ """
+ def print_header(self):
+ header = '''\
+.. Copyright (C) All BPF authors and contributors from 2014 to present.
+.. See git log include/uapi/linux/bpf.h in kernel tree for details.
+..
+.. %%%LICENSE_START(VERBATIM)
+.. Permission is granted to make and distribute verbatim copies of this
+.. manual provided the copyright notice and this permission notice are
+.. preserved on all copies.
+..
+.. Permission is granted to copy and distribute modified versions of this
+.. manual under the conditions for verbatim copying, provided that the
+.. entire resulting derived work is distributed under the terms of a
+.. permission notice identical to this one.
+..
+.. Since the Linux kernel and libraries are constantly changing, this
+.. manual page may be incorrect or out-of-date. The author(s) assume no
+.. responsibility for errors or omissions, or for damages resulting from
+.. the use of the information contained herein. The author(s) may not
+.. have taken the same level of care in the production of this manual,
+.. which is licensed free of charge, as they might when working
+.. professionally.
+..
+.. Formatted or processed versions of this manual, if unaccompanied by
+.. the source, must acknowledge the copyright and authors of this work.
+.. %%%LICENSE_END
+..
+.. Please do not edit this file. It was generated from the documentation
+.. located in file include/uapi/linux/bpf.h of the Linux kernel sources
+.. (helpers description), and from scripts/bpf_helpers_doc.py in the same
+.. repository (header and footer).
+
+===========
+BPF-HELPERS
+===========
+-------------------------------------------------------------------------------
+list of eBPF helper functions
+-------------------------------------------------------------------------------
+
+:Manual section: 7
+
+DESCRIPTION
+===========
+
+The extended Berkeley Packet Filter (eBPF) subsystem consists in programs
+written in a pseudo-assembly language, then attached to one of the several
+kernel hooks and run in reaction of specific events. This framework differs
+from the older, "classic" BPF (or "cBPF") in several aspects, one of them being
+the ability to call special functions (or "helpers") from within a program.
+These functions are restricted to a white-list of helpers defined in the
+kernel.
+
+These helpers are used by eBPF programs to interact with the system, or with
+the context in which they work. For instance, they can be used to print
+debugging messages, to get the time since the system was booted, to interact
+with eBPF maps, or to manipulate network packets. Since there are several eBPF
+program types, and that they do not run in the same context, each program type
+can only call a subset of those helpers.
+
+Due to eBPF conventions, a helper can not have more than five arguments.
+
+Internally, eBPF programs call directly into the compiled helper functions
+without requiring any foreign-function interface. As a result, calling helpers
+introduces no overhead, thus offering excellent performance.
+
+This document is an attempt to list and document the helpers available to eBPF
+developers. They are sorted by chronological order (the oldest helpers in the
+kernel at the top).
+
+HELPERS
+=======
+'''
+ print(header)
+
+ def print_footer(self):
+ footer = '''
+EXAMPLES
+========
+
+Example usage for most of the eBPF helpers listed in this manual page are
+available within the Linux kernel sources, at the following locations:
+
+* *samples/bpf/*
+* *tools/testing/selftests/bpf/*
+
+LICENSE
+=======
+
+eBPF programs can have an associated license, passed along with the bytecode
+instructions to the kernel when the programs are loaded. The format for that
+string is identical to the one in use for kernel modules (Dual licenses, such
+as "Dual BSD/GPL", may be used). Some helper functions are only accessible to
+programs that are compatible with the GNU Privacy License (GPL).
+
+In order to use such helpers, the eBPF program must be loaded with the correct
+license string passed (via **attr**) to the **bpf**\ () system call, and this
+generally translates into the C source code of the program containing a line
+similar to the following:
+
+::
+
+ char ____license[] __attribute__((section("license"), used)) = "GPL";
+
+IMPLEMENTATION
+==============
+
+This manual page is an effort to document the existing eBPF helper functions.
+But as of this writing, the BPF sub-system is under heavy development. New eBPF
+program or map types are added, along with new helper functions. Some helpers
+are occasionally made available for additional program types. So in spite of
+the efforts of the community, this page might not be up-to-date. If you want to
+check by yourself what helper functions exist in your kernel, or what types of
+programs they can support, here are some files among the kernel tree that you
+may be interested in:
+
+* *include/uapi/linux/bpf.h* is the main BPF header. It contains the full list
+ of all helper functions, as well as many other BPF definitions including most
+ of the flags, structs or constants used by the helpers.
+* *net/core/filter.c* contains the definition of most network-related helper
+ functions, and the list of program types from which they can be used.
+* *kernel/trace/bpf_trace.c* is the equivalent for most tracing program-related
+ helpers.
+* *kernel/bpf/verifier.c* contains the functions used to check that valid types
+ of eBPF maps are used with a given helper function.
+* *kernel/bpf/* directory contains other files in which additional helpers are
+ defined (for cgroups, sockmaps, etc.).
+
+Compatibility between helper functions and program types can generally be found
+in the files where helper functions are defined. Look for the **struct
+bpf_func_proto** objects and for functions returning them: these functions
+contain a list of helpers that a given program type can call. Note that the
+**default:** label of the **switch ... case** used to filter helpers can call
+other functions, themselves allowing access to additional helpers. The
+requirement for GPL license is also in those **struct bpf_func_proto**.
+
+Compatibility between helper functions and map types can be found in the
+**check_map_func_compatibility**\ () function in file *kernel/bpf/verifier.c*.
+
+Helper functions that invalidate the checks on **data** and **data_end**
+pointers for network processing are listed in function
+**bpf_helper_changes_pkt_data**\ () in file *net/core/filter.c*.
+
+SEE ALSO
+========
+
+**bpf**\ (2),
+**cgroups**\ (7),
+**ip**\ (8),
+**perf_event_open**\ (2),
+**sendmsg**\ (2),
+**socket**\ (7),
+**tc-bpf**\ (8)'''
+ print(footer)
+
+ def print_proto(self, helper):
+ """
+ Format function protocol with bold and italics markers. This makes RST
+ file less readable, but gives nice results in the manual page.
+ """
+ proto = helper.proto_break_down()
+
+ print('**%s %s%s(' % (proto['ret_type'],
+ proto['ret_star'].replace('*', '\\*'),
+ proto['name']),
+ end='')
+
+ comma = ''
+ for a in proto['args']:
+ one_arg = '{}{}'.format(comma, a['type'])
+ if a['name']:
+ if a['star']:
+ one_arg += ' {}**\ '.format(a['star'].replace('*', '\\*'))
+ else:
+ one_arg += '** '
+ one_arg += '*{}*\\ **'.format(a['name'])
+ comma = ', '
+ print(one_arg, end='')
+
+ print(')**')
+
+ def print_one(self, helper):
+ self.print_proto(helper)
+
+ if (helper.desc):
+ print('\tDescription')
+ # Do not strip all newline characters: formatted code at the end of
+ # a section must be followed by a blank line.
+ for line in re.sub('\n$', '', helper.desc, count=1).split('\n'):
+ print('{}{}'.format('\t\t' if line else '', line))
+
+ if (helper.ret):
+ print('\tReturn')
+ for line in helper.ret.rstrip().split('\n'):
+ print('{}{}'.format('\t\t' if line else '', line))
+
+ print('')
+
+###############################################################################
+
+# If script is launched from scripts/ from kernel tree and can access
+# ../include/uapi/linux/bpf.h, use it as a default name for the file to parse,
+# otherwise the --filename argument will be required from the command line.
+script = os.path.abspath(sys.argv[0])
+linuxRoot = os.path.dirname(os.path.dirname(script))
+bpfh = os.path.join(linuxRoot, 'include/uapi/linux/bpf.h')
+
+argParser = argparse.ArgumentParser(description="""
+Parse eBPF header file and generate documentation for eBPF helper functions.
+The RST-formatted output produced can be turned into a manual page with the
+rst2man utility.
+""")
+if (os.path.isfile(bpfh)):
+ argParser.add_argument('--filename', help='path to include/uapi/linux/bpf.h',
+ default=bpfh)
+else:
+ argParser.add_argument('--filename', help='path to include/uapi/linux/bpf.h')
+args = argParser.parse_args()
+
+# Parse file.
+headerParser = HeaderParser(args.filename)
+headerParser.run()
+
+# Print formatted output to standard output.
+printer = PrinterRST(headerParser.helpers)
+printer.print_all()
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index 0e4e923235b6..d004f6382dab 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -26,7 +26,8 @@ MAP COMMANDS
| **bpftool** **cgroup help**
|
| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
-| *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** }
+| *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** |
+| **bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** }
| *ATTACH_FLAGS* := { **multi** | **override** }
DESCRIPTION
@@ -63,7 +64,13 @@ DESCRIPTION
**egress** egress path of the inet socket (since 4.10);
**sock_create** opening of an inet socket (since 4.10);
**sock_ops** various socket operations (since 4.12);
- **device** device access (since 4.15).
+ **device** device access (since 4.15);
+ **bind4** call to bind(2) for an inet4 socket (since 4.17);
+ **bind6** call to bind(2) for an inet6 socket (since 4.17);
+ **post_bind4** return from bind(2) for an inet4 socket (since 4.17);
+ **post_bind6** return from bind(2) for an inet6 socket (since 4.17);
+ **connect4** call to connect(2) for an inet4 socket (since 4.17);
+ **connect6** call to connect(2) for an inet6 socket (since 4.17).
**bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
Detach *PROG* from the cgroup *CGROUP* and attach type
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 457e868bd32f..5f512b14bff9 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -23,10 +23,10 @@ MAP COMMANDS
| **bpftool** **map { show | list }** [*MAP*]
| **bpftool** **map dump** *MAP*
-| **bpftool** **map update** *MAP* **key** *BYTES* **value** *VALUE* [*UPDATE_FLAGS*]
-| **bpftool** **map lookup** *MAP* **key** *BYTES*
-| **bpftool** **map getnext** *MAP* [**key** *BYTES*]
-| **bpftool** **map delete** *MAP* **key** *BYTES*
+| **bpftool** **map update** *MAP* **key** [**hex**] *BYTES* **value** [**hex**] *VALUE* [*UPDATE_FLAGS*]
+| **bpftool** **map lookup** *MAP* **key** [**hex**] *BYTES*
+| **bpftool** **map getnext** *MAP* [**key** [**hex**] *BYTES*]
+| **bpftool** **map delete** *MAP* **key** [**hex**] *BYTES*
| **bpftool** **map pin** *MAP* *FILE*
| **bpftool** **map help**
|
@@ -48,20 +48,26 @@ DESCRIPTION
**bpftool map dump** *MAP*
Dump all entries in a given *MAP*.
- **bpftool map update** *MAP* **key** *BYTES* **value** *VALUE* [*UPDATE_FLAGS*]
+ **bpftool map update** *MAP* **key** [**hex**] *BYTES* **value** [**hex**] *VALUE* [*UPDATE_FLAGS*]
Update map entry for a given *KEY*.
*UPDATE_FLAGS* can be one of: **any** update existing entry
or add if doesn't exit; **exist** update only if entry already
exists; **noexist** update only if entry doesn't exist.
- **bpftool map lookup** *MAP* **key** *BYTES*
+ If the **hex** keyword is provided in front of the bytes
+ sequence, the bytes are parsed as hexadeximal values, even if
+ no "0x" prefix is added. If the keyword is not provided, then
+ the bytes are parsed as decimal values, unless a "0x" prefix
+ (for hexadecimal) or a "0" prefix (for octal) is provided.
+
+ **bpftool map lookup** *MAP* **key** [**hex**] *BYTES*
Lookup **key** in the map.
- **bpftool map getnext** *MAP* [**key** *BYTES*]
+ **bpftool map getnext** *MAP* [**key** [**hex**] *BYTES*]
Get next key. If *key* is not specified, get first key.
- **bpftool map delete** *MAP* **key** *BYTES*
+ **bpftool map delete** *MAP* **key** [**hex**] *BYTES*
Remove entry from the map.
**bpftool map pin** *MAP* *FILE*
@@ -98,7 +104,12 @@ EXAMPLES
10: hash name some_map flags 0x0
key 4B value 8B max_entries 2048 memlock 167936B
-**# bpftool map update id 10 key 13 00 07 00 value 02 00 00 00 01 02 03 04**
+The following three commands are equivalent:
+
+|
+| **# bpftool map update id 10 key hex 20 c4 b7 00 value hex 0f ff ff ab 01 02 03 4c**
+| **# bpftool map update id 10 key 0x20 0xc4 0xb7 0x00 value 0x0f 0xff 0xff 0xab 0x01 0x02 0x03 0x4c**
+| **# bpftool map update id 10 key 32 196 183 0 value 15 255 255 171 1 2 3 76**
**# bpftool map lookup id 10 key 0 1 2 3**
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 67ca6c69376c..43d34a5c3ec5 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -95,7 +95,7 @@ EXAMPLES
**# bpftool prog show**
::
- 10: xdp name some_prog tag 005a3d2123620c8b
+ 10: xdp name some_prog tag 005a3d2123620c8b gpl
loaded_at Sep 29/20:11 uid 0
xlated 528B jited 370B memlock 4096B map_ids 10
@@ -108,6 +108,7 @@ EXAMPLES
"id": 10,
"type": "xdp",
"tag": "005a3d2123620c8b",
+ "gpl_compatible": true,
"loaded_at": "Sep 29/20:11",
"uid": 0,
"bytes_xlated": 528,
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 490811b45fa7..852d84a98acd 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -147,7 +147,7 @@ _bpftool()
# Deal with simplest keywords
case $prev in
- help|key|opcodes|visual)
+ help|hex|opcodes|visual)
return 0
;;
tag)
@@ -283,7 +283,7 @@ _bpftool()
return 0
;;
key)
- return 0
+ COMPREPLY+=( $( compgen -W 'hex' -- "$cur" ) )
;;
*)
_bpftool_once_attr 'key'
@@ -302,7 +302,7 @@ _bpftool()
return 0
;;
key)
- return 0
+ COMPREPLY+=( $( compgen -W 'hex' -- "$cur" ) )
;;
value)
# We can have bytes, or references to a prog or a
@@ -321,6 +321,8 @@ _bpftool()
return 0
;;
*)
+ COMPREPLY+=( $( compgen -W 'hex' \
+ -- "$cur" ) )
return 0
;;
esac
@@ -372,7 +374,8 @@ _bpftool()
;;
attach|detach)
local ATTACH_TYPES='ingress egress sock_create sock_ops \
- device'
+ device bind4 bind6 post_bind4 post_bind6 connect4 \
+ connect6'
local ATTACH_FLAGS='multi override'
local PROG_TYPE='id pinned tag'
case $prev in
@@ -380,7 +383,8 @@ _bpftool()
_filedir
return 0
;;
- ingress|egress|sock_create|sock_ops|device)
+ ingress|egress|sock_create|sock_ops|device|bind4|bind6|\
+ post_bind4|post_bind6|connect4|connect6)
COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \
"$cur" ) )
return 0
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index cae32a61cb18..1351bd6b5aa7 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -16,8 +16,11 @@
#define HELP_SPEC_ATTACH_FLAGS \
"ATTACH_FLAGS := { multi | override }"
-#define HELP_SPEC_ATTACH_TYPES \
- "ATTACH_TYPE := { ingress | egress | sock_create | sock_ops | device }"
+#define HELP_SPEC_ATTACH_TYPES \
+ " ATTACH_TYPE := { ingress | egress | sock_create |\n" \
+ " sock_ops | device | bind4 | bind6 |\n" \
+ " post_bind4 | post_bind6 | connect4 |\n" \
+ " connect6 }"
static const char * const attach_type_strings[] = {
[BPF_CGROUP_INET_INGRESS] = "ingress",
@@ -25,6 +28,12 @@ static const char * const attach_type_strings[] = {
[BPF_CGROUP_INET_SOCK_CREATE] = "sock_create",
[BPF_CGROUP_SOCK_OPS] = "sock_ops",
[BPF_CGROUP_DEVICE] = "device",
+ [BPF_CGROUP_INET4_BIND] = "bind4",
+ [BPF_CGROUP_INET6_BIND] = "bind6",
+ [BPF_CGROUP_INET4_CONNECT] = "connect4",
+ [BPF_CGROUP_INET6_CONNECT] = "connect6",
+ [BPF_CGROUP_INET4_POST_BIND] = "post_bind4",
+ [BPF_CGROUP_INET6_POST_BIND] = "post_bind6",
[__MAX_BPF_ATTACH_TYPE] = NULL,
};
@@ -282,7 +291,7 @@ static int do_help(int argc, char **argv)
" %s %s detach CGROUP ATTACH_TYPE PROG\n"
" %s %s help\n"
"\n"
- " " HELP_SPEC_ATTACH_TYPES "\n"
+ HELP_SPEC_ATTACH_TYPES "\n"
" " HELP_SPEC_ATTACH_FLAGS "\n"
" " HELP_SPEC_PROGRAM "\n"
" " HELP_SPEC_OPTIONS "\n"
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index f509c86faede..a6cdb640a0d7 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -283,11 +283,16 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key,
static char **parse_bytes(char **argv, const char *name, unsigned char *val,
unsigned int n)
{
- unsigned int i = 0;
+ unsigned int i = 0, base = 0;
char *endptr;
+ if (is_prefix(*argv, "hex")) {
+ base = 16;
+ argv++;
+ }
+
while (i < n && argv[i]) {
- val[i] = strtoul(argv[i], &endptr, 0);
+ val[i] = strtoul(argv[i], &endptr, base);
if (*endptr) {
p_err("error parsing byte: %s", argv[i]);
return NULL;
@@ -869,10 +874,10 @@ static int do_help(int argc, char **argv)
fprintf(stderr,
"Usage: %s %s { show | list } [MAP]\n"
" %s %s dump MAP\n"
- " %s %s update MAP key BYTES value VALUE [UPDATE_FLAGS]\n"
- " %s %s lookup MAP key BYTES\n"
- " %s %s getnext MAP [key BYTES]\n"
- " %s %s delete MAP key BYTES\n"
+ " %s %s update MAP key [hex] BYTES value [hex] VALUE [UPDATE_FLAGS]\n"
+ " %s %s lookup MAP key [hex] BYTES\n"
+ " %s %s getnext MAP [key [hex] BYTES]\n"
+ " %s %s delete MAP key [hex] BYTES\n"
" %s %s pin MAP FILE\n"
" %s %s help\n"
"\n"
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index f7a810897eac..e71a0a11afde 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -68,6 +68,9 @@ static const char * const prog_type_name[] = {
[BPF_PROG_TYPE_SOCK_OPS] = "sock_ops",
[BPF_PROG_TYPE_SK_SKB] = "sk_skb",
[BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device",
+ [BPF_PROG_TYPE_SK_MSG] = "sk_msg",
+ [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
+ [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
};
static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
@@ -232,6 +235,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
info->tag[0], info->tag[1], info->tag[2], info->tag[3],
info->tag[4], info->tag[5], info->tag[6], info->tag[7]);
+ jsonw_bool_field(json_wtr, "gpl_compatible", info->gpl_compatible);
+
print_dev_json(info->ifindex, info->netns_dev, info->netns_ino);
if (info->load_time) {
@@ -292,6 +297,7 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
printf("tag ");
fprint_hex(stdout, info->tag, BPF_TAG_SIZE, "");
print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino);
+ printf("%s", info->gpl_compatible ? " gpl" : "");
printf("\n");
if (info->load_time) {
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index c5ec89732a8d..da77a9388947 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -95,6 +95,7 @@ enum bpf_cmd {
BPF_OBJ_GET_INFO_BY_FD,
BPF_PROG_QUERY,
BPF_RAW_TRACEPOINT_OPEN,
+ BPF_BTF_LOAD,
};
enum bpf_map_type {
@@ -279,6 +280,9 @@ union bpf_attr {
*/
char map_name[BPF_OBJ_NAME_LEN];
__u32 map_ifindex; /* ifindex of netdev to create on */
+ __u32 btf_fd; /* fd pointing to a BTF type data */
+ __u32 btf_key_id; /* BTF type_id of the key */
+ __u32 btf_value_id; /* BTF type_id of the value */
};
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -363,398 +367,1406 @@ union bpf_attr {
__u64 name;
__u32 prog_fd;
} raw_tracepoint;
+
+ struct { /* anonymous struct for BPF_BTF_LOAD */
+ __aligned_u64 btf;
+ __aligned_u64 btf_log_buf;
+ __u32 btf_size;
+ __u32 btf_log_size;
+ __u32 btf_log_level;
+ };
} __attribute__((aligned(8)));
-/* BPF helper function descriptions:
- *
- * void *bpf_map_lookup_elem(&map, &key)
- * Return: Map value or NULL
- *
- * int bpf_map_update_elem(&map, &key, &value, flags)
- * Return: 0 on success or negative error
- *
- * int bpf_map_delete_elem(&map, &key)
- * Return: 0 on success or negative error
- *
- * int bpf_probe_read(void *dst, int size, void *src)
- * Return: 0 on success or negative error
+/* The description below is an attempt at providing documentation to eBPF
+ * developers about the multiple available eBPF helper functions. It can be
+ * parsed and used to produce a manual page. The workflow is the following,
+ * and requires the rst2man utility:
+ *
+ * $ ./scripts/bpf_helpers_doc.py \
+ * --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
+ * $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
+ * $ man /tmp/bpf-helpers.7
+ *
+ * Note that in order to produce this external documentation, some RST
+ * formatting is used in the descriptions to get "bold" and "italics" in
+ * manual pages. Also note that the few trailing white spaces are
+ * intentional, removing them would break paragraphs for rst2man.
+ *
+ * Start of BPF helper function descriptions:
+ *
+ * void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+ * Description
+ * Perform a lookup in *map* for an entry associated to *key*.
+ * Return
+ * Map value associated to *key*, or **NULL** if no entry was
+ * found.
+ *
+ * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
+ * Description
+ * Add or update the value of the entry associated to *key* in
+ * *map* with *value*. *flags* is one of:
+ *
+ * **BPF_NOEXIST**
+ * The entry for *key* must not exist in the map.
+ * **BPF_EXIST**
+ * The entry for *key* must already exist in the map.
+ * **BPF_ANY**
+ * No condition on the existence of the entry for *key*.
+ *
+ * Flag value **BPF_NOEXIST** cannot be used for maps of types
+ * **BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY** (all
+ * elements always exist), the helper would return an error.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_delete_elem(struct bpf_map *map, const void *key)
+ * Description
+ * Delete entry with *key* from *map*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read(void *dst, u32 size, const void *src)
+ * Description
+ * For tracing programs, safely attempt to read *size* bytes from
+ * address *src* and store the data in *dst*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
*
* u64 bpf_ktime_get_ns(void)
- * Return: current ktime
- *
- * int bpf_trace_printk(const char *fmt, int fmt_size, ...)
- * Return: length of buffer written or negative error
- *
- * u32 bpf_prandom_u32(void)
- * Return: random value
- *
- * u32 bpf_raw_smp_processor_id(void)
- * Return: SMP processor ID
- *
- * int bpf_skb_store_bytes(skb, offset, from, len, flags)
- * store bytes into packet
- * @skb: pointer to skb
- * @offset: offset within packet from skb->mac_header
- * @from: pointer where to copy bytes from
- * @len: number of bytes to store into packet
- * @flags: bit 0 - if true, recompute skb->csum
- * other bits - reserved
- * Return: 0 on success or negative error
- *
- * int bpf_l3_csum_replace(skb, offset, from, to, flags)
- * recompute IP checksum
- * @skb: pointer to skb
- * @offset: offset within packet where IP checksum is located
- * @from: old value of header field
- * @to: new value of header field
- * @flags: bits 0-3 - size of header field
- * other bits - reserved
- * Return: 0 on success or negative error
- *
- * int bpf_l4_csum_replace(skb, offset, from, to, flags)
- * recompute TCP/UDP checksum
- * @skb: pointer to skb
- * @offset: offset within packet where TCP/UDP checksum is located
- * @from: old value of header field
- * @to: new value of header field
- * @flags: bits 0-3 - size of header field
- * bit 4 - is pseudo header
- * other bits - reserved
- * Return: 0 on success or negative error
- *
- * int bpf_tail_call(ctx, prog_array_map, index)
- * jump into another BPF program
- * @ctx: context pointer passed to next program
- * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
- * @index: 32-bit index inside array that selects specific program to run
- * Return: 0 on success or negative error
- *
- * int bpf_clone_redirect(skb, ifindex, flags)
- * redirect to another netdev
- * @skb: pointer to skb
- * @ifindex: ifindex of the net device
- * @flags: bit 0 - if set, redirect to ingress instead of egress
- * other bits - reserved
- * Return: 0 on success or negative error
+ * Description
+ * Return the time elapsed since system boot, in nanoseconds.
+ * Return
+ * Current *ktime*.
+ *
+ * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...)
+ * Description
+ * This helper is a "printk()-like" facility for debugging. It
+ * prints a message defined by format *fmt* (of size *fmt_size*)
+ * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if
+ * available. It can take up to three additional **u64**
+ * arguments (as an eBPF helpers, the total number of arguments is
+ * limited to five).
+ *
+ * Each time the helper is called, it appends a line to the trace.
+ * The format of the trace is customizable, and the exact output
+ * one will get depends on the options set in
+ * *\/sys/kernel/debug/tracing/trace_options* (see also the
+ * *README* file under the same directory). However, it usually
+ * defaults to something like:
+ *
+ * ::
+ *
+ * telnet-470 [001] .N.. 419421.045894: 0x00000001: <formatted msg>
+ *
+ * In the above:
+ *
+ * * ``telnet`` is the name of the current task.
+ * * ``470`` is the PID of the current task.
+ * * ``001`` is the CPU number on which the task is
+ * running.
+ * * In ``.N..``, each character refers to a set of
+ * options (whether irqs are enabled, scheduling
+ * options, whether hard/softirqs are running, level of
+ * preempt_disabled respectively). **N** means that
+ * **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED**
+ * are set.
+ * * ``419421.045894`` is a timestamp.
+ * * ``0x00000001`` is a fake value used by BPF for the
+ * instruction pointer register.
+ * * ``<formatted msg>`` is the message formatted with
+ * *fmt*.
+ *
+ * The conversion specifiers supported by *fmt* are similar, but
+ * more limited than for printk(). They are **%d**, **%i**,
+ * **%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**,
+ * **%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size
+ * of field, padding with zeroes, etc.) is available, and the
+ * helper will return **-EINVAL** (but print nothing) if it
+ * encounters an unknown specifier.
+ *
+ * Also, note that **bpf_trace_printk**\ () is slow, and should
+ * only be used for debugging purposes. For this reason, a notice
+ * bloc (spanning several lines) is printed to kernel logs and
+ * states that the helper should not be used "for production use"
+ * the first time this helper is used (or more precisely, when
+ * **trace_printk**\ () buffers are allocated). For passing values
+ * to user space, perf events should be preferred.
+ * Return
+ * The number of bytes written to the buffer, or a negative error
+ * in case of failure.
+ *
+ * u32 bpf_get_prandom_u32(void)
+ * Description
+ * Get a pseudo-random number.
+ *
+ * From a security point of view, this helper uses its own
+ * pseudo-random internal state, and cannot be used to infer the
+ * seed of other random functions in the kernel. However, it is
+ * essential to note that the generator used by the helper is not
+ * cryptographically secure.
+ * Return
+ * A random 32-bit unsigned value.
+ *
+ * u32 bpf_get_smp_processor_id(void)
+ * Description
+ * Get the SMP (symmetric multiprocessing) processor id. Note that
+ * all programs run with preemption disabled, which means that the
+ * SMP processor id is stable during all the execution of the
+ * program.
+ * Return
+ * The SMP id of the processor running the program.
+ *
+ * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
+ * Description
+ * Store *len* bytes from address *from* into the packet
+ * associated to *skb*, at *offset*. *flags* are a combination of
+ * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the
+ * checksum for the packet after storing the bytes) and
+ * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\
+ * **->swhash** and *skb*\ **->l4hash** to 0).
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size)
+ * Description
+ * Recompute the layer 3 (e.g. IP) checksum for the packet
+ * associated to *skb*. Computation is incremental, so the helper
+ * must know the former value of the header field that was
+ * modified (*from*), the new value of this field (*to*), and the
+ * number of bytes (2 or 4) for this field, stored in *size*.
+ * Alternatively, it is possible to store the difference between
+ * the previous and the new values of the header field in *to*, by
+ * setting *from* and *size* to 0. For both methods, *offset*
+ * indicates the location of the IP checksum within the packet.
+ *
+ * This helper works in combination with **bpf_csum_diff**\ (),
+ * which does not update the checksum in-place, but offers more
+ * flexibility and can handle sizes larger than 2 or 4 for the
+ * checksum to update.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags)
+ * Description
+ * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the
+ * packet associated to *skb*. Computation is incremental, so the
+ * helper must know the former value of the header field that was
+ * modified (*from*), the new value of this field (*to*), and the
+ * number of bytes (2 or 4) for this field, stored on the lowest
+ * four bits of *flags*. Alternatively, it is possible to store
+ * the difference between the previous and the new values of the
+ * header field in *to*, by setting *from* and the four lowest
+ * bits of *flags* to 0. For both methods, *offset* indicates the
+ * location of the IP checksum within the packet. In addition to
+ * the size of the field, *flags* can be added (bitwise OR) actual
+ * flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left
+ * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and
+ * for updates resulting in a null checksum the value is set to
+ * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
+ * the checksum is to be computed against a pseudo-header.
+ *
+ * This helper works in combination with **bpf_csum_diff**\ (),
+ * which does not update the checksum in-place, but offers more
+ * flexibility and can handle sizes larger than 2 or 4 for the
+ * checksum to update.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index)
+ * Description
+ * This special helper is used to trigger a "tail call", or in
+ * other words, to jump into another eBPF program. The same stack
+ * frame is used (but values on stack and in registers for the
+ * caller are not accessible to the callee). This mechanism allows
+ * for program chaining, either for raising the maximum number of
+ * available eBPF instructions, or to execute given programs in
+ * conditional blocks. For security reasons, there is an upper
+ * limit to the number of successive tail calls that can be
+ * performed.
+ *
+ * Upon call of this helper, the program attempts to jump into a
+ * program referenced at index *index* in *prog_array_map*, a
+ * special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes
+ * *ctx*, a pointer to the context.
+ *
+ * If the call succeeds, the kernel immediately runs the first
+ * instruction of the new program. This is not a function call,
+ * and it never returns to the previous program. If the call
+ * fails, then the helper has no effect, and the caller continues
+ * to run its subsequent instructions. A call can fail if the
+ * destination program for the jump does not exist (i.e. *index*
+ * is superior to the number of entries in *prog_array_map*), or
+ * if the maximum number of tail calls has been reached for this
+ * chain of programs. This limit is defined in the kernel by the
+ * macro **MAX_TAIL_CALL_CNT** (not accessible to user space),
+ * which is currently set to 32.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags)
+ * Description
+ * Clone and redirect the packet associated to *skb* to another
+ * net device of index *ifindex*. Both ingress and egress
+ * interfaces can be used for redirection. The **BPF_F_INGRESS**
+ * value in *flags* is used to make the distinction (ingress path
+ * is selected if the flag is present, egress path otherwise).
+ * This is the only flag supported for now.
+ *
+ * In comparison with **bpf_redirect**\ () helper,
+ * **bpf_clone_redirect**\ () has the associated cost of
+ * duplicating the packet buffer, but this can be executed out of
+ * the eBPF program. Conversely, **bpf_redirect**\ () is more
+ * efficient, but it is handled through an action code where the
+ * redirection happens only after the eBPF program has returned.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
*
* u64 bpf_get_current_pid_tgid(void)
- * Return: current->tgid << 32 | current->pid
+ * Return
+ * A 64-bit integer containing the current tgid and pid, and
+ * created as such:
+ * *current_task*\ **->tgid << 32 \|**
+ * *current_task*\ **->pid**.
*
* u64 bpf_get_current_uid_gid(void)
- * Return: current_gid << 32 | current_uid
- *
- * int bpf_get_current_comm(char *buf, int size_of_buf)
- * stores current->comm into buf
- * Return: 0 on success or negative error
- *
- * u32 bpf_get_cgroup_classid(skb)
- * retrieve a proc's classid
- * @skb: pointer to skb
- * Return: classid if != 0
- *
- * int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci)
- * Return: 0 on success or negative error
- *
- * int bpf_skb_vlan_pop(skb)
- * Return: 0 on success or negative error
- *
- * int bpf_skb_get_tunnel_key(skb, key, size, flags)
- * int bpf_skb_set_tunnel_key(skb, key, size, flags)
- * retrieve or populate tunnel metadata
- * @skb: pointer to skb
- * @key: pointer to 'struct bpf_tunnel_key'
- * @size: size of 'struct bpf_tunnel_key'
- * @flags: room for future extensions
- * Return: 0 on success or negative error
- *
- * u64 bpf_perf_event_read(map, flags)
- * read perf event counter value
- * @map: pointer to perf_event_array map
- * @flags: index of event in the map or bitmask flags
- * Return: value of perf event counter read or error code
- *
- * int bpf_redirect(ifindex, flags)
- * redirect to another netdev
- * @ifindex: ifindex of the net device
- * @flags:
- * cls_bpf:
- * bit 0 - if set, redirect to ingress instead of egress
- * other bits - reserved
- * xdp_bpf:
- * all bits - reserved
- * Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error
- * xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error
- * int bpf_redirect_map(map, key, flags)
- * redirect to endpoint in map
- * @map: pointer to dev map
- * @key: index in map to lookup
- * @flags: --
- * Return: XDP_REDIRECT on success or XDP_ABORT on error
- *
- * u32 bpf_get_route_realm(skb)
- * retrieve a dst's tclassid
- * @skb: pointer to skb
- * Return: realm if != 0
- *
- * int bpf_perf_event_output(ctx, map, flags, data, size)
- * output perf raw sample
- * @ctx: struct pt_regs*
- * @map: pointer to perf_event_array map
- * @flags: index of event in the map or bitmask flags
- * @data: data on stack to be output as raw data
- * @size: size of data
- * Return: 0 on success or negative error
- *
- * int bpf_get_stackid(ctx, map, flags)
- * walk user or kernel stack and return id
- * @ctx: struct pt_regs*
- * @map: pointer to stack_trace map
- * @flags: bits 0-7 - numer of stack frames to skip
- * bit 8 - collect user stack instead of kernel
- * bit 9 - compare stacks by hash only
- * bit 10 - if two different stacks hash into the same stackid
- * discard old
- * other bits - reserved
- * Return: >= 0 stackid on success or negative error
- *
- * s64 bpf_csum_diff(from, from_size, to, to_size, seed)
- * calculate csum diff
- * @from: raw from buffer
- * @from_size: length of from buffer
- * @to: raw to buffer
- * @to_size: length of to buffer
- * @seed: optional seed
- * Return: csum result or negative error code
- *
- * int bpf_skb_get_tunnel_opt(skb, opt, size)
- * retrieve tunnel options metadata
- * @skb: pointer to skb
- * @opt: pointer to raw tunnel option data
- * @size: size of @opt
- * Return: option size
- *
- * int bpf_skb_set_tunnel_opt(skb, opt, size)
- * populate tunnel options metadata
- * @skb: pointer to skb
- * @opt: pointer to raw tunnel option data
- * @size: size of @opt
- * Return: 0 on success or negative error
- *
- * int bpf_skb_change_proto(skb, proto, flags)
- * Change protocol of the skb. Currently supported is v4 -> v6,
- * v6 -> v4 transitions. The helper will also resize the skb. eBPF
- * program is expected to fill the new headers via skb_store_bytes
- * and lX_csum_replace.
- * @skb: pointer to skb
- * @proto: new skb->protocol type
- * @flags: reserved
- * Return: 0 on success or negative error
- *
- * int bpf_skb_change_type(skb, type)
- * Change packet type of skb.
- * @skb: pointer to skb
- * @type: new skb->pkt_type type
- * Return: 0 on success or negative error
- *
- * int bpf_skb_under_cgroup(skb, map, index)
- * Check cgroup2 membership of skb
- * @skb: pointer to skb
- * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
- * @index: index of the cgroup in the bpf_map
- * Return:
- * == 0 skb failed the cgroup2 descendant test
- * == 1 skb succeeded the cgroup2 descendant test
- * < 0 error
- *
- * u32 bpf_get_hash_recalc(skb)
- * Retrieve and possibly recalculate skb->hash.
- * @skb: pointer to skb
- * Return: hash
+ * Return
+ * A 64-bit integer containing the current GID and UID, and
+ * created as such: *current_gid* **<< 32 \|** *current_uid*.
+ *
+ * int bpf_get_current_comm(char *buf, u32 size_of_buf)
+ * Description
+ * Copy the **comm** attribute of the current task into *buf* of
+ * *size_of_buf*. The **comm** attribute contains the name of
+ * the executable (excluding the path) for the current task. The
+ * *size_of_buf* must be strictly positive. On success, the
+ * helper makes sure that the *buf* is NUL-terminated. On failure,
+ * it is filled with zeroes.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u32 bpf_get_cgroup_classid(struct sk_buff *skb)
+ * Description
+ * Retrieve the classid for the current task, i.e. for the net_cls
+ * cgroup to which *skb* belongs.
+ *
+ * This helper can be used on TC egress path, but not on ingress.
+ *
+ * The net_cls cgroup provides an interface to tag network packets
+ * based on a user-provided identifier for all traffic coming from
+ * the tasks belonging to the related cgroup. See also the related
+ * kernel documentation, available from the Linux sources in file
+ * *Documentation/cgroup-v1/net_cls.txt*.
+ *
+ * The Linux kernel has two versions for cgroups: there are
+ * cgroups v1 and cgroups v2. Both are available to users, who can
+ * use a mixture of them, but note that the net_cls cgroup is for
+ * cgroup v1 only. This makes it incompatible with BPF programs
+ * run on cgroups, which is a cgroup-v2-only feature (a socket can
+ * only hold data for one version of cgroups at a time).
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_CGROUP_NET_CLASSID** configuration option set to
+ * "**y**" or to "**m**".
+ * Return
+ * The classid, or 0 for the default unconfigured classid.
+ *
+ * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
+ * Description
+ * Push a *vlan_tci* (VLAN tag control information) of protocol
+ * *vlan_proto* to the packet associated to *skb*, then update
+ * the checksum. Note that if *vlan_proto* is different from
+ * **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to
+ * be **ETH_P_8021Q**.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_vlan_pop(struct sk_buff *skb)
+ * Description
+ * Pop a VLAN header from the packet associated to *skb*.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ * Description
+ * Get tunnel metadata. This helper takes a pointer *key* to an
+ * empty **struct bpf_tunnel_key** of **size**, that will be
+ * filled with tunnel metadata for the packet associated to *skb*.
+ * The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which
+ * indicates that the tunnel is based on IPv6 protocol instead of
+ * IPv4.
+ *
+ * The **struct bpf_tunnel_key** is an object that generalizes the
+ * principal parameters used by various tunneling protocols into a
+ * single struct. This way, it can be used to easily make a
+ * decision based on the contents of the encapsulation header,
+ * "summarized" in this struct. In particular, it holds the IP
+ * address of the remote end (IPv4 or IPv6, depending on the case)
+ * in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also,
+ * this struct exposes the *key*\ **->tunnel_id**, which is
+ * generally mapped to a VNI (Virtual Network Identifier), making
+ * it programmable together with the **bpf_skb_set_tunnel_key**\
+ * () helper.
+ *
+ * Let's imagine that the following code is part of a program
+ * attached to the TC ingress interface, on one end of a GRE
+ * tunnel, and is supposed to filter out all messages coming from
+ * remote ends with IPv4 address other than 10.0.0.1:
+ *
+ * ::
+ *
+ * int ret;
+ * struct bpf_tunnel_key key = {};
+ *
+ * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ * if (ret < 0)
+ * return TC_ACT_SHOT; // drop packet
+ *
+ * if (key.remote_ipv4 != 0x0a000001)
+ * return TC_ACT_SHOT; // drop packet
+ *
+ * return TC_ACT_OK; // accept packet
+ *
+ * This interface can also be used with all encapsulation devices
+ * that can operate in "collect metadata" mode: instead of having
+ * one network device per specific configuration, the "collect
+ * metadata" mode only requires a single device where the
+ * configuration can be extracted from this helper.
+ *
+ * This can be used together with various tunnels such as VXLan,
+ * Geneve, GRE or IP in IP (IPIP).
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ * Description
+ * Populate tunnel metadata for packet associated to *skb.* The
+ * tunnel metadata is set to the contents of *key*, of *size*. The
+ * *flags* can be set to a combination of the following values:
+ *
+ * **BPF_F_TUNINFO_IPV6**
+ * Indicate that the tunnel is based on IPv6 protocol
+ * instead of IPv4.
+ * **BPF_F_ZERO_CSUM_TX**
+ * For IPv4 packets, add a flag to tunnel metadata
+ * indicating that checksum computation should be skipped
+ * and checksum set to zeroes.
+ * **BPF_F_DONT_FRAGMENT**
+ * Add a flag to tunnel metadata indicating that the
+ * packet should not be fragmented.
+ * **BPF_F_SEQ_NUMBER**
+ * Add a flag to tunnel metadata indicating that a
+ * sequence number should be added to tunnel header before
+ * sending the packet. This flag was added for GRE
+ * encapsulation, but might be used with other protocols
+ * as well in the future.
+ *
+ * Here is a typical usage on the transmit path:
+ *
+ * ::
+ *
+ * struct bpf_tunnel_key key;
+ * populate key ...
+ * bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
+ * bpf_clone_redirect(skb, vxlan_dev_ifindex, 0);
+ *
+ * See also the description of the **bpf_skb_get_tunnel_key**\ ()
+ * helper for additional information.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_perf_event_read(struct bpf_map *map, u64 flags)
+ * Description
+ * Read the value of a perf event counter. This helper relies on a
+ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of
+ * the perf event counter is selected when *map* is updated with
+ * perf event file descriptors. The *map* is an array whose size
+ * is the number of available CPUs, and each cell contains a value
+ * relative to one CPU. The value to retrieve is indicated by
+ * *flags*, that contains the index of the CPU to look up, masked
+ * with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+ * **BPF_F_CURRENT_CPU** to indicate that the value for the
+ * current CPU should be retrieved.
+ *
+ * Note that before Linux 4.13, only hardware perf event can be
+ * retrieved.
+ *
+ * Also, be aware that the newer helper
+ * **bpf_perf_event_read_value**\ () is recommended over
+ * **bpf_perf_event_read*\ () in general. The latter has some ABI
+ * quirks where error and counter value are used as a return code
+ * (which is wrong to do since ranges may overlap). This issue is
+ * fixed with bpf_perf_event_read_value(), which at the same time
+ * provides more features over the **bpf_perf_event_read**\ ()
+ * interface. Please refer to the description of
+ * **bpf_perf_event_read_value**\ () for details.
+ * Return
+ * The value of the perf event counter read from the map, or a
+ * negative error code in case of failure.
+ *
+ * int bpf_redirect(u32 ifindex, u64 flags)
+ * Description
+ * Redirect the packet to another net device of index *ifindex*.
+ * This helper is somewhat similar to **bpf_clone_redirect**\
+ * (), except that the packet is not cloned, which provides
+ * increased performance.
+ *
+ * Except for XDP, both ingress and egress interfaces can be used
+ * for redirection. The **BPF_F_INGRESS** value in *flags* is used
+ * to make the distinction (ingress path is selected if the flag
+ * is present, egress path otherwise). Currently, XDP only
+ * supports redirection to the egress interface, and accepts no
+ * flag at all.
+ *
+ * The same effect can be attained with the more generic
+ * **bpf_redirect_map**\ (), which requires specific maps to be
+ * used but offers better performance.
+ * Return
+ * For XDP, the helper returns **XDP_REDIRECT** on success or
+ * **XDP_ABORTED** on error. For other program types, the values
+ * are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on
+ * error.
+ *
+ * u32 bpf_get_route_realm(struct sk_buff *skb)
+ * Description
+ * Retrieve the realm or the route, that is to say the
+ * **tclassid** field of the destination for the *skb*. The
+ * indentifier retrieved is a user-provided tag, similar to the
+ * one used with the net_cls cgroup (see description for
+ * **bpf_get_cgroup_classid**\ () helper), but here this tag is
+ * held by a route (a destination entry), not by a task.
+ *
+ * Retrieving this identifier works with the clsact TC egress hook
+ * (see also **tc-bpf(8)**), or alternatively on conventional
+ * classful egress qdiscs, but not on TC ingress path. In case of
+ * clsact TC egress hook, this has the advantage that, internally,
+ * the destination entry has not been dropped yet in the transmit
+ * path. Therefore, the destination entry does not need to be
+ * artificially held via **netif_keep_dst**\ () for a classful
+ * qdisc until the *skb* is freed.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_IP_ROUTE_CLASSID** configuration option.
+ * Return
+ * The realm of the route for the packet associated to *skb*, or 0
+ * if none was found.
+ *
+ * int bpf_perf_event_output(struct pt_reg *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * Description
+ * Write raw *data* blob into a special BPF perf event held by
+ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ * event must have the following attributes: **PERF_SAMPLE_RAW**
+ * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ * **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ * The *flags* are used to indicate the index in *map* for which
+ * the value must be put, masked with **BPF_F_INDEX_MASK**.
+ * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ * to indicate that the index of the current CPU core should be
+ * used.
+ *
+ * The value to write, of *size*, is passed through eBPF stack and
+ * pointed by *data*.
+ *
+ * The context of the program *ctx* needs also be passed to the
+ * helper.
+ *
+ * On user space, a program willing to read the values needs to
+ * call **perf_event_open**\ () on the perf event (either for
+ * one or for all CPUs) and to store the file descriptor into the
+ * *map*. This must be done before the eBPF program can send data
+ * into it. An example is available in file
+ * *samples/bpf/trace_output_user.c* in the Linux kernel source
+ * tree (the eBPF program counterpart is in
+ * *samples/bpf/trace_output_kern.c*).
+ *
+ * **bpf_perf_event_output**\ () achieves better performance
+ * than **bpf_trace_printk**\ () for sharing data with user
+ * space, and is much better suitable for streaming data from eBPF
+ * programs.
+ *
+ * Note that this helper is not restricted to tracing use cases
+ * and can be used with programs attached to TC or XDP as well,
+ * where it allows for passing data to user space listeners. Data
+ * can be:
+ *
+ * * Only custom structs,
+ * * Only the packet payload, or
+ * * A combination of both.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len)
+ * Description
+ * This helper was provided as an easy way to load data from a
+ * packet. It can be used to load *len* bytes from *offset* from
+ * the packet associated to *skb*, into the buffer pointed by
+ * *to*.
+ *
+ * Since Linux 4.7, usage of this helper has mostly been replaced
+ * by "direct packet access", enabling packet data to be
+ * manipulated with *skb*\ **->data** and *skb*\ **->data_end**
+ * pointing respectively to the first byte of packet data and to
+ * the byte after the last byte of packet data. However, it
+ * remains useful if one wishes to read large quantities of data
+ * at once from a packet into the eBPF stack.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_stackid(struct pt_reg *ctx, struct bpf_map *map, u64 flags)
+ * Description
+ * Walk a user or a kernel stack and return its id. To achieve
+ * this, the helper needs *ctx*, which is a pointer to the context
+ * on which the tracing program is executed, and a pointer to a
+ * *map* of type **BPF_MAP_TYPE_STACK_TRACE**.
+ *
+ * The last argument, *flags*, holds the number of stack frames to
+ * skip (from 0 to 255), masked with
+ * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ * a combination of the following flags:
+ *
+ * **BPF_F_USER_STACK**
+ * Collect a user space stack instead of a kernel stack.
+ * **BPF_F_FAST_STACK_CMP**
+ * Compare stacks by hash only.
+ * **BPF_F_REUSE_STACKID**
+ * If two different stacks hash into the same *stackid*,
+ * discard the old one.
+ *
+ * The stack id retrieved is a 32 bit long integer handle which
+ * can be further combined with other data (including other stack
+ * ids) and used as a key into maps. This can be useful for
+ * generating a variety of graphs (such as flame graphs or off-cpu
+ * graphs).
+ *
+ * For walking a stack, this helper is an improvement over
+ * **bpf_probe_read**\ (), which can be used with unrolled loops
+ * but is not efficient and consumes a lot of eBPF instructions.
+ * Instead, **bpf_get_stackid**\ () can collect up to
+ * **PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that
+ * this limit can be controlled with the **sysctl** program, and
+ * that it should be manually increased in order to profile long
+ * user stacks (such as stacks for Java programs). To do so, use:
+ *
+ * ::
+ *
+ * # sysctl kernel.perf_event_max_stack=<new value>
+ *
+ * Return
+ * The positive or null stack id on success, or a negative error
+ * in case of failure.
+ *
+ * s64 bpf_csum_diff(__be32 *from, u32 from_size, __be32 *to, u32 to_size, __wsum seed)
+ * Description
+ * Compute a checksum difference, from the raw buffer pointed by
+ * *from*, of length *from_size* (that must be a multiple of 4),
+ * towards the raw buffer pointed by *to*, of size *to_size*
+ * (same remark). An optional *seed* can be added to the value
+ * (this can be cascaded, the seed may come from a previous call
+ * to the helper).
+ *
+ * This is flexible enough to be used in several ways:
+ *
+ * * With *from_size* == 0, *to_size* > 0 and *seed* set to
+ * checksum, it can be used when pushing new data.
+ * * With *from_size* > 0, *to_size* == 0 and *seed* set to
+ * checksum, it can be used when removing data from a packet.
+ * * With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it
+ * can be used to compute a diff. Note that *from_size* and
+ * *to_size* do not need to be equal.
+ *
+ * This helper can be used in combination with
+ * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to
+ * which one can feed in the difference computed with
+ * **bpf_csum_diff**\ ().
+ * Return
+ * The checksum result, or a negative error code in case of
+ * failure.
+ *
+ * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
+ * Description
+ * Retrieve tunnel options metadata for the packet associated to
+ * *skb*, and store the raw tunnel option data to the buffer *opt*
+ * of *size*.
+ *
+ * This helper can be used with encapsulation devices that can
+ * operate in "collect metadata" mode (please refer to the related
+ * note in the description of **bpf_skb_get_tunnel_key**\ () for
+ * more details). A particular example where this can be used is
+ * in combination with the Geneve encapsulation protocol, where it
+ * allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper)
+ * and retrieving arbitrary TLVs (Type-Length-Value headers) from
+ * the eBPF program. This allows for full customization of these
+ * headers.
+ * Return
+ * The size of the option data retrieved.
+ *
+ * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
+ * Description
+ * Set tunnel options metadata for the packet associated to *skb*
+ * to the option data contained in the raw buffer *opt* of *size*.
+ *
+ * See also the description of the **bpf_skb_get_tunnel_opt**\ ()
+ * helper for additional information.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags)
+ * Description
+ * Change the protocol of the *skb* to *proto*. Currently
+ * supported are transition from IPv4 to IPv6, and from IPv6 to
+ * IPv4. The helper takes care of the groundwork for the
+ * transition, including resizing the socket buffer. The eBPF
+ * program is expected to fill the new headers, if any, via
+ * **skb_store_bytes**\ () and to recompute the checksums with
+ * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\
+ * (). The main case for this helper is to perform NAT64
+ * operations out of an eBPF program.
+ *
+ * Internally, the GSO type is marked as dodgy so that headers are
+ * checked and segments are recalculated by the GSO/GRO engine.
+ * The size for GSO target is adapted as well.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_change_type(struct sk_buff *skb, u32 type)
+ * Description
+ * Change the packet type for the packet associated to *skb*. This
+ * comes down to setting *skb*\ **->pkt_type** to *type*, except
+ * the eBPF program does not have a write access to *skb*\
+ * **->pkt_type** beside this helper. Using a helper here allows
+ * for graceful handling of errors.
+ *
+ * The major use case is to change incoming *skb*s to
+ * **PACKET_HOST** in a programmatic way instead of having to
+ * recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for
+ * example.
+ *
+ * Note that *type* only allows certain values. At this time, they
+ * are:
+ *
+ * **PACKET_HOST**
+ * Packet is for us.
+ * **PACKET_BROADCAST**
+ * Send packet to all.
+ * **PACKET_MULTICAST**
+ * Send packet to group.
+ * **PACKET_OTHERHOST**
+ * Send packet to someone else.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index)
+ * Description
+ * Check whether *skb* is a descendant of the cgroup2 held by
+ * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+ * Return
+ * The return value depends on the result of the test, and can be:
+ *
+ * * 0, if the *skb* failed the cgroup2 descendant test.
+ * * 1, if the *skb* succeeded the cgroup2 descendant test.
+ * * A negative error code, if an error occurred.
+ *
+ * u32 bpf_get_hash_recalc(struct sk_buff *skb)
+ * Description
+ * Retrieve the hash of the packet, *skb*\ **->hash**. If it is
+ * not set, in particular if the hash was cleared due to mangling,
+ * recompute this hash. Later accesses to the hash can be done
+ * directly with *skb*\ **->hash**.
+ *
+ * Calling **bpf_set_hash_invalid**\ (), changing a packet
+ * prototype with **bpf_skb_change_proto**\ (), or calling
+ * **bpf_skb_store_bytes**\ () with the
+ * **BPF_F_INVALIDATE_HASH** are actions susceptible to clear
+ * the hash and to trigger a new computation for the next call to
+ * **bpf_get_hash_recalc**\ ().
+ * Return
+ * The 32-bit hash.
*
* u64 bpf_get_current_task(void)
- * Returns current task_struct
- * Return: current
- *
- * int bpf_probe_write_user(void *dst, void *src, int len)
- * safely attempt to write to a location
- * @dst: destination address in userspace
- * @src: source address on stack
- * @len: number of bytes to copy
- * Return: 0 on success or negative error
- *
- * int bpf_current_task_under_cgroup(map, index)
- * Check cgroup2 membership of current task
- * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
- * @index: index of the cgroup in the bpf_map
- * Return:
- * == 0 current failed the cgroup2 descendant test
- * == 1 current succeeded the cgroup2 descendant test
- * < 0 error
- *
- * int bpf_skb_change_tail(skb, len, flags)
- * The helper will resize the skb to the given new size, to be used f.e.
- * with control messages.
- * @skb: pointer to skb
- * @len: new skb length
- * @flags: reserved
- * Return: 0 on success or negative error
- *
- * int bpf_skb_pull_data(skb, len)
- * The helper will pull in non-linear data in case the skb is non-linear
- * and not all of len are part of the linear section. Only needed for
- * read/write with direct packet access.
- * @skb: pointer to skb
- * @len: len to make read/writeable
- * Return: 0 on success or negative error
- *
- * s64 bpf_csum_update(skb, csum)
- * Adds csum into skb->csum in case of CHECKSUM_COMPLETE.
- * @skb: pointer to skb
- * @csum: csum to add
- * Return: csum on success or negative error
- *
- * void bpf_set_hash_invalid(skb)
- * Invalidate current skb->hash.
- * @skb: pointer to skb
- *
- * int bpf_get_numa_node_id()
- * Return: Id of current NUMA node.
- *
- * int bpf_skb_change_head()
- * Grows headroom of skb and adjusts MAC header offset accordingly.
- * Will extends/reallocae as required automatically.
- * May change skb data pointer and will thus invalidate any check
- * performed for direct packet access.
- * @skb: pointer to skb
- * @len: length of header to be pushed in front
- * @flags: Flags (unused for now)
- * Return: 0 on success or negative error
- *
- * int bpf_xdp_adjust_head(xdp_md, delta)
- * Adjust the xdp_md.data by delta
- * @xdp_md: pointer to xdp_md
- * @delta: An positive/negative integer to be added to xdp_md.data
- * Return: 0 on success or negative on error
+ * Return
+ * A pointer to the current task struct.
+ *
+ * int bpf_probe_write_user(void *dst, const void *src, u32 len)
+ * Description
+ * Attempt in a safe way to write *len* bytes from the buffer
+ * *src* to *dst* in memory. It only works for threads that are in
+ * user context, and *dst* must be a valid user space address.
+ *
+ * This helper should not be used to implement any kind of
+ * security mechanism because of TOC-TOU attacks, but rather to
+ * debug, divert, and manipulate execution of semi-cooperative
+ * processes.
+ *
+ * Keep in mind that this feature is meant for experiments, and it
+ * has a risk of crashing the system and running programs.
+ * Therefore, when an eBPF program using this helper is attached,
+ * a warning including PID and process name is printed to kernel
+ * logs.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index)
+ * Description
+ * Check whether the probe is being run is the context of a given
+ * subset of the cgroup2 hierarchy. The cgroup2 to test is held by
+ * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+ * Return
+ * The return value depends on the result of the test, and can be:
+ *
+ * * 0, if the *skb* task belongs to the cgroup2.
+ * * 1, if the *skb* task does not belong to the cgroup2.
+ * * A negative error code, if an error occurred.
+ *
+ * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
+ * Description
+ * Resize (trim or grow) the packet associated to *skb* to the
+ * new *len*. The *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * The basic idea is that the helper performs the needed work to
+ * change the size of the packet, then the eBPF program rewrites
+ * the rest via helpers like **bpf_skb_store_bytes**\ (),
+ * **bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ ()
+ * and others. This helper is a slow path utility intended for
+ * replies with control messages. And because it is targeted for
+ * slow path, the helper itself can afford to be slow: it
+ * implicitly linearizes, unclones and drops offloads from the
+ * *skb*.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_pull_data(struct sk_buff *skb, u32 len)
+ * Description
+ * Pull in non-linear data in case the *skb* is non-linear and not
+ * all of *len* are part of the linear section. Make *len* bytes
+ * from *skb* readable and writable. If a zero value is passed for
+ * *len*, then the whole length of the *skb* is pulled.
+ *
+ * This helper is only needed for reading and writing with direct
+ * packet access.
+ *
+ * For direct packet access, testing that offsets to access
+ * are within packet boundaries (test on *skb*\ **->data_end**) is
+ * susceptible to fail if offsets are invalid, or if the requested
+ * data is in non-linear parts of the *skb*. On failure the
+ * program can just bail out, or in the case of a non-linear
+ * buffer, use a helper to make the data available. The
+ * **bpf_skb_load_bytes**\ () helper is a first solution to access
+ * the data. Another one consists in using **bpf_skb_pull_data**
+ * to pull in once the non-linear parts, then retesting and
+ * eventually access the data.
+ *
+ * At the same time, this also makes sure the *skb* is uncloned,
+ * which is a necessary condition for direct write. As this needs
+ * to be an invariant for the write part only, the verifier
+ * detects writes and adds a prologue that is calling
+ * **bpf_skb_pull_data()** to effectively unclone the *skb* from
+ * the very beginning in case it is indeed cloned.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * s64 bpf_csum_update(struct sk_buff *skb, __wsum csum)
+ * Description
+ * Add the checksum *csum* into *skb*\ **->csum** in case the
+ * driver has supplied a checksum for the entire packet into that
+ * field. Return an error otherwise. This helper is intended to be
+ * used in combination with **bpf_csum_diff**\ (), in particular
+ * when the checksum needs to be updated after data has been
+ * written into the packet through direct packet access.
+ * Return
+ * The checksum on success, or a negative error code in case of
+ * failure.
+ *
+ * void bpf_set_hash_invalid(struct sk_buff *skb)
+ * Description
+ * Invalidate the current *skb*\ **->hash**. It can be used after
+ * mangling on headers through direct packet access, in order to
+ * indicate that the hash is outdated and to trigger a
+ * recalculation the next time the kernel tries to access this
+ * hash or when the **bpf_get_hash_recalc**\ () helper is called.
+ *
+ * int bpf_get_numa_node_id(void)
+ * Description
+ * Return the id of the current NUMA node. The primary use case
+ * for this helper is the selection of sockets for the local NUMA
+ * node, when the program is attached to sockets using the
+ * **SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**),
+ * but the helper is also available to other eBPF program types,
+ * similarly to **bpf_get_smp_processor_id**\ ().
+ * Return
+ * The id of current NUMA node.
+ *
+ * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags)
+ * Description
+ * Grows headroom of packet associated to *skb* and adjusts the
+ * offset of the MAC header accordingly, adding *len* bytes of
+ * space. It automatically extends and reallocates memory as
+ * required.
+ *
+ * This helper can be used on a layer 3 *skb* to push a MAC header
+ * for redirection into a layer 2 device.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta)
+ * Description
+ * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that
+ * it is possible to use a negative value for *delta*. This helper
+ * can be used to prepare the packet for pushing or popping
+ * headers.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
*
* int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
- * Copy a NUL terminated string from unsafe address. In case the string
- * length is smaller than size, the target is not padded with further NUL
- * bytes. In case the string length is larger than size, just count-1
- * bytes are copied and the last byte is set to NUL.
- * @dst: destination address
- * @size: maximum number of bytes to copy, including the trailing NUL
- * @unsafe_ptr: unsafe address
- * Return:
- * > 0 length of the string including the trailing NUL on success
- * < 0 error
- *
- * u64 bpf_get_socket_cookie(skb)
- * Get the cookie for the socket stored inside sk_buff.
- * @skb: pointer to skb
- * Return: 8 Bytes non-decreasing number on success or 0 if the socket
- * field is missing inside sk_buff
- *
- * u32 bpf_get_socket_uid(skb)
- * Get the owner uid of the socket stored inside sk_buff.
- * @skb: pointer to skb
- * Return: uid of the socket owner on success or overflowuid if failed.
- *
- * u32 bpf_set_hash(skb, hash)
- * Set full skb->hash.
- * @skb: pointer to skb
- * @hash: hash to set
- *
- * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen)
- * Calls setsockopt. Not all opts are available, only those with
- * integer optvals plus TCP_CONGESTION.
- * Supported levels: SOL_SOCKET and IPPROTO_TCP
- * @bpf_socket: pointer to bpf_socket
- * @level: SOL_SOCKET or IPPROTO_TCP
- * @optname: option name
- * @optval: pointer to option value
- * @optlen: length of optval in bytes
- * Return: 0 or negative error
- *
- * int bpf_getsockopt(bpf_socket, level, optname, optval, optlen)
- * Calls getsockopt. Not all opts are available.
- * Supported levels: IPPROTO_TCP
- * @bpf_socket: pointer to bpf_socket
- * @level: IPPROTO_TCP
- * @optname: option name
- * @optval: pointer to option value
- * @optlen: length of optval in bytes
- * Return: 0 or negative error
- *
- * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags)
- * Set callback flags for sock_ops
- * @bpf_sock_ops: pointer to bpf_sock_ops_kern struct
- * @flags: flags value
- * Return: 0 for no error
- * -EINVAL if there is no full tcp socket
- * bits in flags that are not supported by current kernel
- *
- * int bpf_skb_adjust_room(skb, len_diff, mode, flags)
- * Grow or shrink room in sk_buff.
- * @skb: pointer to skb
- * @len_diff: (signed) amount of room to grow/shrink
- * @mode: operation mode (enum bpf_adj_room_mode)
- * @flags: reserved for future use
- * Return: 0 on success or negative error code
- *
- * int bpf_sk_redirect_map(map, key, flags)
- * Redirect skb to a sock in map using key as a lookup key for the
- * sock in map.
- * @map: pointer to sockmap
- * @key: key to lookup sock in map
- * @flags: reserved for future use
- * Return: SK_PASS
- *
- * int bpf_sock_map_update(skops, map, key, flags)
- * @skops: pointer to bpf_sock_ops
- * @map: pointer to sockmap to update
- * @key: key to insert/update sock in map
- * @flags: same flags as map update elem
- *
- * int bpf_xdp_adjust_meta(xdp_md, delta)
- * Adjust the xdp_md.data_meta by delta
- * @xdp_md: pointer to xdp_md
- * @delta: An positive/negative integer to be added to xdp_md.data_meta
- * Return: 0 on success or negative on error
- *
- * int bpf_perf_event_read_value(map, flags, buf, buf_size)
- * read perf event counter value and perf event enabled/running time
- * @map: pointer to perf_event_array map
- * @flags: index of event in the map or bitmask flags
- * @buf: buf to fill
- * @buf_size: size of the buf
- * Return: 0 on success or negative error code
- *
- * int bpf_perf_prog_read_value(ctx, buf, buf_size)
- * read perf prog attached perf event counter and enabled/running time
- * @ctx: pointer to ctx
- * @buf: buf to fill
- * @buf_size: size of the buf
- * Return : 0 on success or negative error code
- *
- * int bpf_override_return(pt_regs, rc)
- * @pt_regs: pointer to struct pt_regs
- * @rc: the return value to set
- *
- * int bpf_msg_redirect_map(map, key, flags)
- * Redirect msg to a sock in map using key as a lookup key for the
- * sock in map.
- * @map: pointer to sockmap
- * @key: key to lookup sock in map
- * @flags: reserved for future use
- * Return: SK_PASS
- *
- * int bpf_bind(ctx, addr, addr_len)
- * Bind socket to address. Only binding to IP is supported, no port can be
- * set in addr.
- * @ctx: pointer to context of type bpf_sock_addr
- * @addr: pointer to struct sockaddr to bind socket to
- * @addr_len: length of sockaddr structure
- * Return: 0 on success or negative error code
+ * Description
+ * Copy a NUL terminated string from an unsafe address
+ * *unsafe_ptr* to *dst*. The *size* should include the
+ * terminating NUL byte. In case the string length is smaller than
+ * *size*, the target is not padded with further NUL bytes. If the
+ * string length is larger than *size*, just *size*-1 bytes are
+ * copied and the last byte is set to NUL.
+ *
+ * On success, the length of the copied string is returned. This
+ * makes this helper useful in tracing programs for reading
+ * strings, and more importantly to get its length at runtime. See
+ * the following snippet:
+ *
+ * ::
+ *
+ * SEC("kprobe/sys_open")
+ * void bpf_sys_open(struct pt_regs *ctx)
+ * {
+ * char buf[PATHLEN]; // PATHLEN is defined to 256
+ * int res = bpf_probe_read_str(buf, sizeof(buf),
+ * ctx->di);
+ *
+ * // Consume buf, for example push it to
+ * // userspace via bpf_perf_event_output(); we
+ * // can use res (the string length) as event
+ * // size, after checking its boundaries.
+ * }
+ *
+ * In comparison, using **bpf_probe_read()** helper here instead
+ * to read the string would require to estimate the length at
+ * compile time, and would often result in copying more memory
+ * than necessary.
+ *
+ * Another useful use case is when parsing individual process
+ * arguments or individual environment variables navigating
+ * *current*\ **->mm->arg_start** and *current*\
+ * **->mm->env_start**: using this helper and the return value,
+ * one can quickly iterate at the right offset of the memory area.
+ * Return
+ * On success, the strictly positive length of the string,
+ * including the trailing NUL character. On error, a negative
+ * value.
+ *
+ * u64 bpf_get_socket_cookie(struct sk_buff *skb)
+ * Description
+ * If the **struct sk_buff** pointed by *skb* has a known socket,
+ * retrieve the cookie (generated by the kernel) of this socket.
+ * If no cookie has been set yet, generate a new cookie. Once
+ * generated, the socket cookie remains stable for the life of the
+ * socket. This helper can be useful for monitoring per socket
+ * networking traffic statistics as it provides a unique socket
+ * identifier per namespace.
+ * Return
+ * A 8-byte long non-decreasing number on success, or 0 if the
+ * socket field is missing inside *skb*.
+ *
+ * u32 bpf_get_socket_uid(struct sk_buff *skb)
+ * Return
+ * The owner UID of the socket associated to *skb*. If the socket
+ * is **NULL**, or if it is not a full socket (i.e. if it is a
+ * time-wait or a request socket instead), **overflowuid** value
+ * is returned (note that **overflowuid** might also be the actual
+ * UID value for the socket).
+ *
+ * u32 bpf_set_hash(struct sk_buff *skb, u32 hash)
+ * Description
+ * Set the full hash for *skb* (set the field *skb*\ **->hash**)
+ * to value *hash*.
+ * Return
+ * 0
+ *
+ * int bpf_setsockopt(struct bpf_sock_ops_kern *bpf_socket, int level, int optname, char *optval, int optlen)
+ * Description
+ * Emulate a call to **setsockopt()** on the socket associated to
+ * *bpf_socket*, which must be a full socket. The *level* at
+ * which the option resides and the name *optname* of the option
+ * must be specified, see **setsockopt(2)** for more information.
+ * The option value of length *optlen* is pointed by *optval*.
+ *
+ * This helper actually implements a subset of **setsockopt()**.
+ * It supports the following *level*\ s:
+ *
+ * * **SOL_SOCKET**, which supports the following *optname*\ s:
+ * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
+ * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**.
+ * * **IPPROTO_TCP**, which supports the following *optname*\ s:
+ * **TCP_CONGESTION**, **TCP_BPF_IW**,
+ * **TCP_BPF_SNDCWND_CLAMP**.
+ * * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+ * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_adjust_room(struct sk_buff *skb, u32 len_diff, u32 mode, u64 flags)
+ * Description
+ * Grow or shrink the room for data in the packet associated to
+ * *skb* by *len_diff*, and according to the selected *mode*.
+ *
+ * There is a single supported mode at this time:
+ *
+ * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
+ * (room space is added or removed below the layer 3 header).
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ * Description
+ * Redirect the packet to the endpoint referenced by *map* at
+ * index *key*. Depending on its type, this *map* can contain
+ * references to net devices (for forwarding packets through other
+ * ports), or to CPUs (for redirecting XDP frames to another CPU;
+ * but this is only implemented for native XDP (with driver
+ * support) as of this writing).
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * When used to redirect packets to net devices, this helper
+ * provides a high performance increase over **bpf_redirect**\ ().
+ * This is due to various implementation details of the underlying
+ * mechanisms, one of which is the fact that **bpf_redirect_map**\
+ * () tries to send packet as a "bulk" to the device.
+ * Return
+ * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
+ *
+ * int bpf_sk_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ * Description
+ * Redirect the packet to the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress path otherwise). This is the only flag supported for now.
+ * Return
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_sock_map_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * Add an entry to, or update a *map* referencing sockets. The
+ * *skops* is used as a new value for the entry associated to
+ * *key*. *flags* is one of:
+ *
+ * **BPF_NOEXIST**
+ * The entry for *key* must not exist in the map.
+ * **BPF_EXIST**
+ * The entry for *key* must already exist in the map.
+ * **BPF_ANY**
+ * No condition on the existence of the entry for *key*.
+ *
+ * If the *map* has eBPF programs (parser and verdict), those will
+ * be inherited by the socket being added. If the socket is
+ * already attached to eBPF programs, this results in an error.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta)
+ * Description
+ * Adjust the address pointed by *xdp_md*\ **->data_meta** by
+ * *delta* (which can be positive or negative). Note that this
+ * operation modifies the address stored in *xdp_md*\ **->data**,
+ * so the latter must be loaded only after the helper has been
+ * called.
+ *
+ * The use of *xdp_md*\ **->data_meta** is optional and programs
+ * are not required to use it. The rationale is that when the
+ * packet is processed with XDP (e.g. as DoS filter), it is
+ * possible to push further meta data along with it before passing
+ * to the stack, and to give the guarantee that an ingress eBPF
+ * program attached as a TC classifier on the same device can pick
+ * this up for further post-processing. Since TC works with socket
+ * buffers, it remains possible to set from XDP the **mark** or
+ * **priority** pointers, or other pointers for the socket buffer.
+ * Having this scratch space generic and programmable allows for
+ * more flexibility as the user is free to store whatever meta
+ * data they need.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size)
+ * Description
+ * Read the value of a perf event counter, and store it into *buf*
+ * of size *buf_size*. This helper relies on a *map* of type
+ * **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event
+ * counter is selected when *map* is updated with perf event file
+ * descriptors. The *map* is an array whose size is the number of
+ * available CPUs, and each cell contains a value relative to one
+ * CPU. The value to retrieve is indicated by *flags*, that
+ * contains the index of the CPU to look up, masked with
+ * **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+ * **BPF_F_CURRENT_CPU** to indicate that the value for the
+ * current CPU should be retrieved.
+ *
+ * This helper behaves in a way close to
+ * **bpf_perf_event_read**\ () helper, save that instead of
+ * just returning the value observed, it fills the *buf*
+ * structure. This allows for additional data to be retrieved: in
+ * particular, the enabled and running times (in *buf*\
+ * **->enabled** and *buf*\ **->running**, respectively) are
+ * copied. In general, **bpf_perf_event_read_value**\ () is
+ * recommended over **bpf_perf_event_read**\ (), which has some
+ * ABI issues and provides fewer functionalities.
+ *
+ * These values are interesting, because hardware PMU (Performance
+ * Monitoring Unit) counters are limited resources. When there are
+ * more PMU based perf events opened than available counters,
+ * kernel will multiplex these events so each event gets certain
+ * percentage (but not all) of the PMU time. In case that
+ * multiplexing happens, the number of samples or counter value
+ * will not reflect the case compared to when no multiplexing
+ * occurs. This makes comparison between different runs difficult.
+ * Typically, the counter value should be normalized before
+ * comparing to other experiments. The usual normalization is done
+ * as follows.
+ *
+ * ::
+ *
+ * normalized_counter = counter * t_enabled / t_running
+ *
+ * Where t_enabled is the time enabled for event and t_running is
+ * the time running for event since last normalization. The
+ * enabled and running times are accumulated since the perf event
+ * open. To achieve scaling factor between two invocations of an
+ * eBPF program, users can can use CPU id as the key (which is
+ * typical for perf array usage model) to remember the previous
+ * value and do the calculation inside the eBPF program.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_perf_prog_read_value(struct bpf_perf_event_data_kern *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
+ * Description
+ * For en eBPF program attached to a perf event, retrieve the
+ * value of the event counter associated to *ctx* and store it in
+ * the structure pointed by *buf* and of size *buf_size*. Enabled
+ * and running times are also stored in the structure (see
+ * description of helper **bpf_perf_event_read_value**\ () for
+ * more details).
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_getsockopt(struct bpf_sock_ops_kern *bpf_socket, int level, int optname, char *optval, int optlen)
+ * Description
+ * Emulate a call to **getsockopt()** on the socket associated to
+ * *bpf_socket*, which must be a full socket. The *level* at
+ * which the option resides and the name *optname* of the option
+ * must be specified, see **getsockopt(2)** for more information.
+ * The retrieved value is stored in the structure pointed by
+ * *opval* and of length *optlen*.
+ *
+ * This helper actually implements a subset of **getsockopt()**.
+ * It supports the following *level*\ s:
+ *
+ * * **IPPROTO_TCP**, which supports *optname*
+ * **TCP_CONGESTION**.
+ * * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+ * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_override_return(struct pt_reg *regs, u64 rc)
+ * Description
+ * Used for error injection, this helper uses kprobes to override
+ * the return value of the probed function, and to set it to *rc*.
+ * The first argument is the context *regs* on which the kprobe
+ * works.
+ *
+ * This helper works by setting setting the PC (program counter)
+ * to an override function which is run in place of the original
+ * probed function. This means the probed function is not run at
+ * all. The replacement function just returns with the required
+ * value.
+ *
+ * This helper has security implications, and thus is subject to
+ * restrictions. It is only available if the kernel was compiled
+ * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration
+ * option, and in this case it only works on functions tagged with
+ * **ALLOW_ERROR_INJECTION** in the kernel code.
+ *
+ * Also, the helper is only available for the architectures having
+ * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing,
+ * x86 architecture is the only one to support this feature.
+ * Return
+ * 0
+ *
+ * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops_kern *bpf_sock, int argval)
+ * Description
+ * Attempt to set the value of the **bpf_sock_ops_cb_flags** field
+ * for the full TCP socket associated to *bpf_sock_ops* to
+ * *argval*.
+ *
+ * The primary use of this field is to determine if there should
+ * be calls to eBPF programs of type
+ * **BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP
+ * code. A program of the same type can change its value, per
+ * connection and as necessary, when the connection is
+ * established. This field is directly accessible for reading, but
+ * this helper must be used for updates in order to return an
+ * error if an eBPF program tries to set a callback that is not
+ * supported in the current kernel.
+ *
+ * The supported callback values that *argval* can combine are:
+ *
+ * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
+ * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
+ * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
+ *
+ * Here are some examples of where one could call such eBPF
+ * program:
+ *
+ * * When RTO fires.
+ * * When a packet is retransmitted.
+ * * When the connection terminates.
+ * * When a packet is sent.
+ * * When a packet is received.
+ * Return
+ * Code **-EINVAL** if the socket is not a full TCP socket;
+ * otherwise, a positive number containing the bits that could not
+ * be set is returned (which comes down to 0 if all bits were set
+ * as required).
+ *
+ * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags)
+ * Description
+ * This helper is used in programs implementing policies at the
+ * socket level. If the message *msg* is allowed to pass (i.e. if
+ * the verdict eBPF program returns **SK_PASS**), redirect it to
+ * the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress path otherwise). This is the only flag supported for now.
+ * Return
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes)
+ * Description
+ * For socket policies, apply the verdict of the eBPF program to
+ * the next *bytes* (number of bytes) of message *msg*.
+ *
+ * For example, this helper can be used in the following cases:
+ *
+ * * A single **sendmsg**\ () or **sendfile**\ () system call
+ * contains multiple logical messages that the eBPF program is
+ * supposed to read and for which it should apply a verdict.
+ * * An eBPF program only cares to read the first *bytes* of a
+ * *msg*. If the message has a large payload, then setting up
+ * and calling the eBPF program repeatedly for all bytes, even
+ * though the verdict is already known, would create unnecessary
+ * overhead.
+ *
+ * When called from within an eBPF program, the helper sets a
+ * counter internal to the BPF infrastructure, that is used to
+ * apply the last verdict to the next *bytes*. If *bytes* is
+ * smaller than the current data being processed from a
+ * **sendmsg**\ () or **sendfile**\ () system call, the first
+ * *bytes* will be sent and the eBPF program will be re-run with
+ * the pointer for start of data pointing to byte number *bytes*
+ * **+ 1**. If *bytes* is larger than the current data being
+ * processed, then the eBPF verdict will be applied to multiple
+ * **sendmsg**\ () or **sendfile**\ () calls until *bytes* are
+ * consumed.
+ *
+ * Note that if a socket closes with the internal counter holding
+ * a non-zero value, this is not a problem because data is not
+ * being buffered for *bytes* and is sent as it is received.
+ * Return
+ * 0
+ *
+ * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes)
+ * Description
+ * For socket policies, prevent the execution of the verdict eBPF
+ * program for message *msg* until *bytes* (byte number) have been
+ * accumulated.
+ *
+ * This can be used when one needs a specific number of bytes
+ * before a verdict can be assigned, even if the data spans
+ * multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme
+ * case would be a user calling **sendmsg**\ () repeatedly with
+ * 1-byte long message segments. Obviously, this is bad for
+ * performance, but it is still valid. If the eBPF program needs
+ * *bytes* bytes to validate a header, this helper can be used to
+ * prevent the eBPF program to be called again until *bytes* have
+ * been accumulated.
+ * Return
+ * 0
+ *
+ * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags)
+ * Description
+ * For socket policies, pull in non-linear data from user space
+ * for *msg* and set pointers *msg*\ **->data** and *msg*\
+ * **->data_end** to *start* and *end* bytes offsets into *msg*,
+ * respectively.
+ *
+ * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
+ * *msg* it can only parse data that the (**data**, **data_end**)
+ * pointers have already consumed. For **sendmsg**\ () hooks this
+ * is likely the first scatterlist element. But for calls relying
+ * on the **sendpage** handler (e.g. **sendfile**\ ()) this will
+ * be the range (**0**, **0**) because the data is shared with
+ * user space and by default the objective is to avoid allowing
+ * user space to modify data while (or after) eBPF verdict is
+ * being decided. This helper can be used to pull in data and to
+ * set the start and end pointer to given values. Data will be
+ * copied if necessary (i.e. if data was not linear and if start
+ * and end pointers do not point to the same chunk).
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_bind(struct bpf_sock_addr_kern *ctx, struct sockaddr *addr, int addr_len)
+ * Description
+ * Bind the socket associated to *ctx* to the address pointed by
+ * *addr*, of length *addr_len*. This allows for making outgoing
+ * connection from the desired IP address, which can be useful for
+ * example when all processes inside a cgroup should use one
+ * single IP address on a host that has multiple IP configured.
+ *
+ * This helper works for IPv4 and IPv6, TCP and UDP sockets. The
+ * domain (*addr*\ **->sa_family**) must be **AF_INET** (or
+ * **AF_INET6**). Looking for a free port to bind to can be
+ * expensive, therefore binding to port is not permitted by the
+ * helper: *addr*\ **->sin_port** (or **sin6_port**, respectively)
+ * must be set to zero.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
+ * Description
+ * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
+ * only possible to shrink the packet as of this writing,
+ * therefore *delta* must be a negative integer.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags)
+ * Description
+ * Retrieve the XFRM state (IP transform framework, see also
+ * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*.
+ *
+ * The retrieved value is stored in the **struct bpf_xfrm_state**
+ * pointed by *xfrm_state* and of length *size*.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_XFRM** configuration option.
+ * Return
+ * 0 on success, or a negative error in case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -821,7 +1833,9 @@ union bpf_attr {
FN(msg_apply_bytes), \
FN(msg_cork_bytes), \
FN(msg_pull_data), \
- FN(bind),
+ FN(bind), \
+ FN(xdp_adjust_tail), \
+ FN(skb_get_xfrm_state),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -927,6 +1941,19 @@ struct bpf_tunnel_key {
__u32 tunnel_label;
};
+/* user accessible mirror of in-kernel xfrm_state.
+ * new fields can only be added to the end of this structure
+ */
+struct bpf_xfrm_state {
+ __u32 reqid;
+ __u32 spi; /* Stored in network byte order */
+ __u16 family;
+ union {
+ __u32 remote_ipv4; /* Stored in network byte order */
+ __u32 remote_ipv6[4]; /* Stored in network byte order */
+ };
+};
+
/* Generic BPF return codes which all BPF program types may support.
* The values are binary compatible with their TC_ACT_* counter-part to
* provide backwards compatibility with existing SCHED_CLS and SCHED_ACT
@@ -1017,6 +2044,7 @@ struct bpf_prog_info {
__aligned_u64 map_ids;
char name[BPF_OBJ_NAME_LEN];
__u32 ifindex;
+ __u32 gpl_compatible:1;
__u64 netns_dev;
__u64 netns_ino;
} __attribute__((aligned(8)));
diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
new file mode 100644
index 000000000000..bcb56ee47014
--- /dev/null
+++ b/tools/include/uapi/linux/btf.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* Copyright (c) 2018 Facebook */
+#ifndef _UAPI__LINUX_BTF_H__
+#define _UAPI__LINUX_BTF_H__
+
+#include <linux/types.h>
+
+#define BTF_MAGIC 0xeB9F
+#define BTF_VERSION 1
+
+struct btf_header {
+ __u16 magic;
+ __u8 version;
+ __u8 flags;
+
+ __u32 parent_label;
+ __u32 parent_name;
+
+ /* All offsets are in bytes relative to the end of this header */
+ __u32 label_off; /* offset of label section */
+ __u32 object_off; /* offset of data object section*/
+ __u32 func_off; /* offset of function section */
+ __u32 type_off; /* offset of type section */
+ __u32 str_off; /* offset of string section */
+ __u32 str_len; /* length of string section */
+};
+
+/* Max # of type identifier */
+#define BTF_MAX_TYPE 0x7fffffff
+/* Max offset into the string section */
+#define BTF_MAX_NAME_OFFSET 0x7fffffff
+/* Max # of struct/union/enum members or func args */
+#define BTF_MAX_VLEN 0xffff
+
+/* The type id is referring to a parent BTF */
+#define BTF_TYPE_PARENT(id) (((id) >> 31) & 0x1)
+#define BTF_TYPE_ID(id) ((id) & BTF_MAX_TYPE)
+
+/* String is in the ELF string section */
+#define BTF_STR_TBL_ELF_ID(ref) (((ref) >> 31) & 0x1)
+#define BTF_STR_OFFSET(ref) ((ref) & BTF_MAX_NAME_OFFSET)
+
+struct btf_type {
+ __u32 name_off;
+ /* "info" bits arrangement
+ * bits 0-15: vlen (e.g. # of struct's members)
+ * bits 16-23: unused
+ * bits 24-28: kind (e.g. int, ptr, array...etc)
+ * bits 29-30: unused
+ * bits 31: root
+ */
+ __u32 info;
+ /* "size" is used by INT, ENUM, STRUCT and UNION.
+ * "size" tells the size of the type it is describing.
+ *
+ * "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT.
+ * "type" is a type_id referring to another type.
+ */
+ union {
+ __u32 size;
+ __u32 type;
+ };
+};
+
+#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f)
+#define BTF_INFO_ISROOT(info) (!!(((info) >> 24) & 0x80))
+#define BTF_INFO_VLEN(info) ((info) & 0xffff)
+
+#define BTF_KIND_UNKN 0 /* Unknown */
+#define BTF_KIND_INT 1 /* Integer */
+#define BTF_KIND_PTR 2 /* Pointer */
+#define BTF_KIND_ARRAY 3 /* Array */
+#define BTF_KIND_STRUCT 4 /* Struct */
+#define BTF_KIND_UNION 5 /* Union */
+#define BTF_KIND_ENUM 6 /* Enumeration */
+#define BTF_KIND_FWD 7 /* Forward */
+#define BTF_KIND_TYPEDEF 8 /* Typedef */
+#define BTF_KIND_VOLATILE 9 /* Volatile */
+#define BTF_KIND_CONST 10 /* Const */
+#define BTF_KIND_RESTRICT 11 /* Restrict */
+#define BTF_KIND_MAX 11
+#define NR_BTF_KINDS 12
+
+/* For some specific BTF_KIND, "struct btf_type" is immediately
+ * followed by extra data.
+ */
+
+/* BTF_KIND_INT is followed by a u32 and the following
+ * is the 32 bits arrangement:
+ */
+#define BTF_INT_ENCODING(VAL) (((VAL) & 0xff000000) >> 24)
+#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16)
+#define BTF_INT_BITS(VAL) ((VAL) & 0x0000ffff)
+
+/* Attributes stored in the BTF_INT_ENCODING */
+#define BTF_INT_SIGNED 0x1
+#define BTF_INT_CHAR 0x2
+#define BTF_INT_BOOL 0x4
+#define BTF_INT_VARARGS 0x8
+
+/* BTF_KIND_ENUM is followed by multiple "struct btf_enum".
+ * The exact number of btf_enum is stored in the vlen (of the
+ * info in "struct btf_type").
+ */
+struct btf_enum {
+ __u32 name_off;
+ __s32 val;
+};
+
+/* BTF_KIND_ARRAY is followed by one "struct btf_array" */
+struct btf_array {
+ __u32 type;
+ __u32 index_type;
+ __u32 nelems;
+};
+
+/* BTF_KIND_STRUCT and BTF_KIND_UNION are followed
+ * by multiple "struct btf_member". The exact number
+ * of btf_member is stored in the vlen (of the info in
+ * "struct btf_type").
+ */
+struct btf_member {
+ __u32 name_off;
+ __u32 type;
+ __u32 offset; /* offset in bits */
+};
+
+#endif /* _UAPI__LINUX_BTF_H__ */
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index 64c679d67109..6070e655042d 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1 +1 @@
-libbpf-y := libbpf.o bpf.o nlattr.o
+libbpf-y := libbpf.o bpf.o nlattr.o btf.o
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index acbb3f8b3bec..76b36cc16e7f 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -73,43 +73,76 @@ static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
return syscall(__NR_bpf, cmd, attr, size);
}
-int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
- int key_size, int value_size, int max_entries,
- __u32 map_flags, int node)
+int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
{
- __u32 name_len = name ? strlen(name) : 0;
+ __u32 name_len = create_attr->name ? strlen(create_attr->name) : 0;
union bpf_attr attr;
memset(&attr, '\0', sizeof(attr));
- attr.map_type = map_type;
- attr.key_size = key_size;
- attr.value_size = value_size;
- attr.max_entries = max_entries;
- attr.map_flags = map_flags;
- memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1));
+ attr.map_type = create_attr->map_type;
+ attr.key_size = create_attr->key_size;
+ attr.value_size = create_attr->value_size;
+ attr.max_entries = create_attr->max_entries;
+ attr.map_flags = create_attr->map_flags;
+ memcpy(attr.map_name, create_attr->name,
+ min(name_len, BPF_OBJ_NAME_LEN - 1));
+ attr.numa_node = create_attr->numa_node;
+ attr.btf_fd = create_attr->btf_fd;
+ attr.btf_key_id = create_attr->btf_key_id;
+ attr.btf_value_id = create_attr->btf_value_id;
+
+ return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+}
+int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
+ int key_size, int value_size, int max_entries,
+ __u32 map_flags, int node)
+{
+ struct bpf_create_map_attr map_attr = {};
+
+ map_attr.name = name;
+ map_attr.map_type = map_type;
+ map_attr.map_flags = map_flags;
+ map_attr.key_size = key_size;
+ map_attr.value_size = value_size;
+ map_attr.max_entries = max_entries;
if (node >= 0) {
- attr.map_flags |= BPF_F_NUMA_NODE;
- attr.numa_node = node;
+ map_attr.numa_node = node;
+ map_attr.map_flags |= BPF_F_NUMA_NODE;
}
- return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+ return bpf_create_map_xattr(&map_attr);
}
int bpf_create_map(enum bpf_map_type map_type, int key_size,
int value_size, int max_entries, __u32 map_flags)
{
- return bpf_create_map_node(map_type, NULL, key_size, value_size,
- max_entries, map_flags, -1);
+ struct bpf_create_map_attr map_attr = {};
+
+ map_attr.map_type = map_type;
+ map_attr.map_flags = map_flags;
+ map_attr.key_size = key_size;
+ map_attr.value_size = value_size;
+ map_attr.max_entries = max_entries;
+
+ return bpf_create_map_xattr(&map_attr);
}
int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
int key_size, int value_size, int max_entries,
__u32 map_flags)
{
- return bpf_create_map_node(map_type, name, key_size, value_size,
- max_entries, map_flags, -1);
+ struct bpf_create_map_attr map_attr = {};
+
+ map_attr.name = name;
+ map_attr.map_type = map_type;
+ map_attr.map_flags = map_flags;
+ map_attr.key_size = key_size;
+ map_attr.value_size = value_size;
+ map_attr.max_entries = max_entries;
+
+ return bpf_create_map_xattr(&map_attr);
}
int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
@@ -573,3 +606,28 @@ cleanup:
close(sock);
return ret;
}
+
+int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
+ bool do_log)
+{
+ union bpf_attr attr = {};
+ int fd;
+
+ attr.btf = ptr_to_u64(btf);
+ attr.btf_size = btf_size;
+
+retry:
+ if (do_log && log_buf && log_buf_size) {
+ attr.btf_log_level = 1;
+ attr.btf_log_size = log_buf_size;
+ attr.btf_log_buf = ptr_to_u64(log_buf);
+ }
+
+ fd = sys_bpf(BPF_BTF_LOAD, &attr, sizeof(attr));
+ if (fd == -1 && !do_log && log_buf && log_buf_size) {
+ do_log = true;
+ goto retry;
+ }
+
+ return fd;
+}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 39f6a0d64a3b..553b11ad52b3 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -24,8 +24,23 @@
#define __BPF_BPF_H
#include <linux/bpf.h>
+#include <stdbool.h>
#include <stddef.h>
+struct bpf_create_map_attr {
+ const char *name;
+ enum bpf_map_type map_type;
+ __u32 map_flags;
+ __u32 key_size;
+ __u32 value_size;
+ __u32 max_entries;
+ __u32 numa_node;
+ __u32 btf_fd;
+ __u32 btf_key_id;
+ __u32 btf_value_id;
+};
+
+int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr);
int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
int key_size, int value_size, int max_entries,
__u32 map_flags, int node);
@@ -87,4 +102,6 @@ int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len);
int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
__u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt);
int bpf_raw_tracepoint_open(const char *name, int prog_fd);
+int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
+ bool do_log);
#endif
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
new file mode 100644
index 000000000000..2bac710e3194
--- /dev/null
+++ b/tools/lib/bpf/btf.c
@@ -0,0 +1,374 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <linux/err.h>
+#include <linux/btf.h>
+#include "btf.h"
+#include "bpf.h"
+
+#define elog(fmt, ...) { if (err_log) err_log(fmt, ##__VA_ARGS__); }
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#define min(a, b) ((a) < (b) ? (a) : (b))
+
+#define BTF_MAX_NR_TYPES 65535
+
+static struct btf_type btf_void;
+
+struct btf {
+ union {
+ struct btf_header *hdr;
+ void *data;
+ };
+ struct btf_type **types;
+ const char *strings;
+ void *nohdr_data;
+ uint32_t nr_types;
+ uint32_t types_size;
+ uint32_t data_size;
+ int fd;
+};
+
+static const char *btf_name_by_offset(const struct btf *btf, uint32_t offset)
+{
+ if (!BTF_STR_TBL_ELF_ID(offset) &&
+ BTF_STR_OFFSET(offset) < btf->hdr->str_len)
+ return &btf->strings[BTF_STR_OFFSET(offset)];
+ else
+ return NULL;
+}
+
+static int btf_add_type(struct btf *btf, struct btf_type *t)
+{
+ if (btf->types_size - btf->nr_types < 2) {
+ struct btf_type **new_types;
+ u32 expand_by, new_size;
+
+ if (btf->types_size == BTF_MAX_NR_TYPES)
+ return -E2BIG;
+
+ expand_by = max(btf->types_size >> 2, 16);
+ new_size = min(BTF_MAX_NR_TYPES, btf->types_size + expand_by);
+
+ new_types = realloc(btf->types, sizeof(*new_types) * new_size);
+ if (!new_types)
+ return -ENOMEM;
+
+ if (btf->nr_types == 0)
+ new_types[0] = &btf_void;
+
+ btf->types = new_types;
+ btf->types_size = new_size;
+ }
+
+ btf->types[++(btf->nr_types)] = t;
+
+ return 0;
+}
+
+static int btf_parse_hdr(struct btf *btf, btf_print_fn_t err_log)
+{
+ const struct btf_header *hdr = btf->hdr;
+ u32 meta_left;
+
+ if (btf->data_size < sizeof(struct btf_header)) {
+ elog("BTF header not found\n");
+ return -EINVAL;
+ }
+
+ if (hdr->magic != BTF_MAGIC) {
+ elog("Invalid BTF magic:%x\n", hdr->magic);
+ return -EINVAL;
+ }
+
+ if (hdr->version != BTF_VERSION) {
+ elog("Unsupported BTF version:%u\n", hdr->version);
+ return -ENOTSUP;
+ }
+
+ if (hdr->flags) {
+ elog("Unsupported BTF flags:%x\n", hdr->flags);
+ return -ENOTSUP;
+ }
+
+ meta_left = btf->data_size - sizeof(*hdr);
+ if (!meta_left) {
+ elog("BTF has no data\n");
+ return -EINVAL;
+ }
+
+ if (meta_left < hdr->type_off) {
+ elog("Invalid BTF type section offset:%u\n", hdr->type_off);
+ return -EINVAL;
+ }
+
+ if (meta_left < hdr->str_off) {
+ elog("Invalid BTF string section offset:%u\n", hdr->str_off);
+ return -EINVAL;
+ }
+
+ if (hdr->type_off >= hdr->str_off) {
+ elog("BTF type section offset >= string section offset. No type?\n");
+ return -EINVAL;
+ }
+
+ if (hdr->type_off & 0x02) {
+ elog("BTF type section is not aligned to 4 bytes\n");
+ return -EINVAL;
+ }
+
+ btf->nohdr_data = btf->hdr + 1;
+
+ return 0;
+}
+
+static int btf_parse_str_sec(struct btf *btf, btf_print_fn_t err_log)
+{
+ const struct btf_header *hdr = btf->hdr;
+ const char *start = btf->nohdr_data + hdr->str_off;
+ const char *end = start + btf->hdr->str_len;
+
+ if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET ||
+ start[0] || end[-1]) {
+ elog("Invalid BTF string section\n");
+ return -EINVAL;
+ }
+
+ btf->strings = start;
+
+ return 0;
+}
+
+static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log)
+{
+ struct btf_header *hdr = btf->hdr;
+ void *nohdr_data = btf->nohdr_data;
+ void *next_type = nohdr_data + hdr->type_off;
+ void *end_type = nohdr_data + hdr->str_off;
+
+ while (next_type < end_type) {
+ struct btf_type *t = next_type;
+ uint16_t vlen = BTF_INFO_VLEN(t->info);
+ int err;
+
+ next_type += sizeof(*t);
+ switch (BTF_INFO_KIND(t->info)) {
+ case BTF_KIND_INT:
+ next_type += sizeof(int);
+ break;
+ case BTF_KIND_ARRAY:
+ next_type += sizeof(struct btf_array);
+ break;
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ next_type += vlen * sizeof(struct btf_member);
+ break;
+ case BTF_KIND_ENUM:
+ next_type += vlen * sizeof(struct btf_enum);
+ break;
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_PTR:
+ case BTF_KIND_FWD:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ break;
+ default:
+ elog("Unsupported BTF_KIND:%u\n",
+ BTF_INFO_KIND(t->info));
+ return -EINVAL;
+ }
+
+ err = btf_add_type(btf, t);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static const struct btf_type *btf_type_by_id(const struct btf *btf,
+ uint32_t type_id)
+{
+ if (type_id > btf->nr_types)
+ return NULL;
+
+ return btf->types[type_id];
+}
+
+static bool btf_type_is_void(const struct btf_type *t)
+{
+ return t == &btf_void || BTF_INFO_KIND(t->info) == BTF_KIND_FWD;
+}
+
+static bool btf_type_is_void_or_null(const struct btf_type *t)
+{
+ return !t || btf_type_is_void(t);
+}
+
+static int64_t btf_type_size(const struct btf_type *t)
+{
+ switch (BTF_INFO_KIND(t->info)) {
+ case BTF_KIND_INT:
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ case BTF_KIND_ENUM:
+ return t->size;
+ case BTF_KIND_PTR:
+ return sizeof(void *);
+ default:
+ return -EINVAL;
+ }
+}
+
+#define MAX_RESOLVE_DEPTH 32
+
+int64_t btf__resolve_size(const struct btf *btf, uint32_t type_id)
+{
+ const struct btf_array *array;
+ const struct btf_type *t;
+ uint32_t nelems = 1;
+ int64_t size = -1;
+ int i;
+
+ t = btf_type_by_id(btf, type_id);
+ for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t);
+ i++) {
+ size = btf_type_size(t);
+ if (size >= 0)
+ break;
+
+ switch (BTF_INFO_KIND(t->info)) {
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ type_id = t->type;
+ break;
+ case BTF_KIND_ARRAY:
+ array = (const struct btf_array *)(t + 1);
+ if (nelems && array->nelems > UINT32_MAX / nelems)
+ return -E2BIG;
+ nelems *= array->nelems;
+ type_id = array->type;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ t = btf_type_by_id(btf, type_id);
+ }
+
+ if (size < 0)
+ return -EINVAL;
+
+ if (nelems && size > UINT32_MAX / nelems)
+ return -E2BIG;
+
+ return nelems * size;
+}
+
+int32_t btf__find_by_name(const struct btf *btf, const char *type_name)
+{
+ uint32_t i;
+
+ if (!strcmp(type_name, "void"))
+ return 0;
+
+ for (i = 1; i <= btf->nr_types; i++) {
+ const struct btf_type *t = btf->types[i];
+ const char *name = btf_name_by_offset(btf, t->name_off);
+
+ if (name && !strcmp(type_name, name))
+ return i;
+ }
+
+ return -ENOENT;
+}
+
+void btf__free(struct btf *btf)
+{
+ if (!btf)
+ return;
+
+ if (btf->fd != -1)
+ close(btf->fd);
+
+ free(btf->data);
+ free(btf->types);
+ free(btf);
+}
+
+struct btf *btf__new(uint8_t *data, uint32_t size,
+ btf_print_fn_t err_log)
+{
+ uint32_t log_buf_size = 0;
+ char *log_buf = NULL;
+ struct btf *btf;
+ int err;
+
+ btf = calloc(1, sizeof(struct btf));
+ if (!btf)
+ return ERR_PTR(-ENOMEM);
+
+ btf->fd = -1;
+
+ if (err_log) {
+ log_buf = malloc(BPF_LOG_BUF_SIZE);
+ if (!log_buf) {
+ err = -ENOMEM;
+ goto done;
+ }
+ *log_buf = 0;
+ log_buf_size = BPF_LOG_BUF_SIZE;
+ }
+
+ btf->data = malloc(size);
+ if (!btf->data) {
+ err = -ENOMEM;
+ goto done;
+ }
+
+ memcpy(btf->data, data, size);
+ btf->data_size = size;
+
+ btf->fd = bpf_load_btf(btf->data, btf->data_size,
+ log_buf, log_buf_size, false);
+
+ if (btf->fd == -1) {
+ err = -errno;
+ elog("Error loading BTF: %s(%d)\n", strerror(errno), errno);
+ if (log_buf && *log_buf)
+ elog("%s\n", log_buf);
+ goto done;
+ }
+
+ err = btf_parse_hdr(btf, err_log);
+ if (err)
+ goto done;
+
+ err = btf_parse_str_sec(btf, err_log);
+ if (err)
+ goto done;
+
+ err = btf_parse_type_sec(btf, err_log);
+
+done:
+ free(log_buf);
+
+ if (err) {
+ btf__free(btf);
+ return ERR_PTR(err);
+ }
+
+ return btf;
+}
+
+int btf__fd(const struct btf *btf)
+{
+ return btf->fd;
+}
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
new file mode 100644
index 000000000000..74bb344035bb
--- /dev/null
+++ b/tools/lib/bpf/btf.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+
+#ifndef __BPF_BTF_H
+#define __BPF_BTF_H
+
+#include <stdint.h>
+
+#define BTF_ELF_SEC ".BTF"
+
+struct btf;
+
+typedef int (*btf_print_fn_t)(const char *, ...)
+ __attribute__((format(printf, 1, 2)));
+
+void btf__free(struct btf *btf);
+struct btf *btf__new(uint8_t *data, uint32_t size, btf_print_fn_t err_log);
+int32_t btf__find_by_name(const struct btf *btf, const char *type_name);
+int64_t btf__resolve_size(const struct btf *btf, uint32_t type_id);
+int btf__fd(const struct btf *btf);
+
+#endif
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 5922443063f0..7bcdca13083a 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -45,6 +45,7 @@
#include "libbpf.h"
#include "bpf.h"
+#include "btf.h"
#ifndef EM_BPF
#define EM_BPF 247
@@ -212,6 +213,8 @@ struct bpf_map {
char *name;
size_t offset;
struct bpf_map_def def;
+ uint32_t btf_key_id;
+ uint32_t btf_value_id;
void *priv;
bpf_map_clear_priv_t clear_priv;
};
@@ -256,6 +259,8 @@ struct bpf_object {
*/
struct list_head list;
+ struct btf *btf;
+
void *priv;
bpf_object_clear_priv_t clear_priv;
@@ -819,7 +824,15 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
data->d_size);
else if (strcmp(name, "maps") == 0)
obj->efile.maps_shndx = idx;
- else if (sh.sh_type == SHT_SYMTAB) {
+ else if (strcmp(name, BTF_ELF_SEC) == 0) {
+ obj->btf = btf__new(data->d_buf, data->d_size,
+ __pr_debug);
+ if (IS_ERR(obj->btf)) {
+ pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
+ BTF_ELF_SEC, PTR_ERR(obj->btf));
+ obj->btf = NULL;
+ }
+ } else if (sh.sh_type == SHT_SYMTAB) {
if (obj->efile.symbols) {
pr_warning("bpf: multiple SYMTAB in %s\n",
obj->path);
@@ -996,33 +1009,126 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
return 0;
}
+static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
+{
+ struct bpf_map_def *def = &map->def;
+ const size_t max_name = 256;
+ int64_t key_size, value_size;
+ int32_t key_id, value_id;
+ char name[max_name];
+
+ /* Find key type by name from BTF */
+ if (snprintf(name, max_name, "%s_key", map->name) == max_name) {
+ pr_warning("map:%s length of BTF key_type:%s_key is too long\n",
+ map->name, map->name);
+ return -EINVAL;
+ }
+
+ key_id = btf__find_by_name(btf, name);
+ if (key_id < 0) {
+ pr_debug("map:%s key_type:%s cannot be found in BTF\n",
+ map->name, name);
+ return key_id;
+ }
+
+ key_size = btf__resolve_size(btf, key_id);
+ if (key_size < 0) {
+ pr_warning("map:%s key_type:%s cannot get the BTF type_size\n",
+ map->name, name);
+ return key_size;
+ }
+
+ if (def->key_size != key_size) {
+ pr_warning("map:%s key_type:%s has BTF type_size:%ld != key_size:%u\n",
+ map->name, name, key_size, def->key_size);
+ return -EINVAL;
+ }
+
+ /* Find value type from BTF */
+ if (snprintf(name, max_name, "%s_value", map->name) == max_name) {
+ pr_warning("map:%s length of BTF value_type:%s_value is too long\n",
+ map->name, map->name);
+ return -EINVAL;
+ }
+
+ value_id = btf__find_by_name(btf, name);
+ if (value_id < 0) {
+ pr_debug("map:%s value_type:%s cannot be found in BTF\n",
+ map->name, name);
+ return value_id;
+ }
+
+ value_size = btf__resolve_size(btf, value_id);
+ if (value_size < 0) {
+ pr_warning("map:%s value_type:%s cannot get the BTF type_size\n",
+ map->name, name);
+ return value_size;
+ }
+
+ if (def->value_size != value_size) {
+ pr_warning("map:%s value_type:%s has BTF type_size:%ld != value_size:%u\n",
+ map->name, name, value_size, def->value_size);
+ return -EINVAL;
+ }
+
+ map->btf_key_id = key_id;
+ map->btf_value_id = value_id;
+
+ return 0;
+}
+
static int
bpf_object__create_maps(struct bpf_object *obj)
{
+ struct bpf_create_map_attr create_attr = {};
unsigned int i;
+ int err;
for (i = 0; i < obj->nr_maps; i++) {
- struct bpf_map_def *def = &obj->maps[i].def;
- int *pfd = &obj->maps[i].fd;
-
- *pfd = bpf_create_map_name(def->type,
- obj->maps[i].name,
- def->key_size,
- def->value_size,
- def->max_entries,
- def->map_flags);
+ struct bpf_map *map = &obj->maps[i];
+ struct bpf_map_def *def = &map->def;
+ int *pfd = &map->fd;
+
+ create_attr.name = map->name;
+ create_attr.map_type = def->type;
+ create_attr.map_flags = def->map_flags;
+ create_attr.key_size = def->key_size;
+ create_attr.value_size = def->value_size;
+ create_attr.max_entries = def->max_entries;
+ create_attr.btf_fd = 0;
+ create_attr.btf_key_id = 0;
+ create_attr.btf_value_id = 0;
+
+ if (obj->btf && !bpf_map_find_btf_info(map, obj->btf)) {
+ create_attr.btf_fd = btf__fd(obj->btf);
+ create_attr.btf_key_id = map->btf_key_id;
+ create_attr.btf_value_id = map->btf_value_id;
+ }
+
+ *pfd = bpf_create_map_xattr(&create_attr);
+ if (*pfd < 0 && create_attr.btf_key_id) {
+ pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
+ map->name, strerror(errno), errno);
+ create_attr.btf_fd = 0;
+ create_attr.btf_key_id = 0;
+ create_attr.btf_value_id = 0;
+ map->btf_key_id = 0;
+ map->btf_value_id = 0;
+ *pfd = bpf_create_map_xattr(&create_attr);
+ }
+
if (*pfd < 0) {
size_t j;
- int err = *pfd;
+ err = *pfd;
pr_warning("failed to create map (name: '%s'): %s\n",
- obj->maps[i].name,
+ map->name,
strerror(errno));
for (j = 0; j < i; j++)
zclose(obj->maps[j].fd);
return err;
}
- pr_debug("create map %s: fd=%d\n", obj->maps[i].name, *pfd);
+ pr_debug("create map %s: fd=%d\n", map->name, *pfd);
}
return 0;
@@ -1641,6 +1747,7 @@ void bpf_object__close(struct bpf_object *obj)
bpf_object__elf_finish(obj);
bpf_object__unload(obj);
+ btf__free(obj->btf);
for (i = 0; i < obj->nr_maps; i++) {
zfree(&obj->maps[i].name);
@@ -1692,6 +1799,11 @@ unsigned int bpf_object__kversion(struct bpf_object *obj)
return obj ? obj->kern_version : 0;
}
+int bpf_object__btf_fd(const struct bpf_object *obj)
+{
+ return obj->btf ? btf__fd(obj->btf) : -1;
+}
+
int bpf_object__set_priv(struct bpf_object *obj, void *priv,
bpf_object_clear_priv_t clear_priv)
{
@@ -1845,11 +1957,12 @@ BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS);
BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT);
BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
+BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
-static void bpf_program__set_expected_attach_type(struct bpf_program *prog,
- enum bpf_attach_type type)
+void bpf_program__set_expected_attach_type(struct bpf_program *prog,
+ enum bpf_attach_type type)
{
prog->expected_attach_type = type;
}
@@ -1859,6 +1972,9 @@ static void bpf_program__set_expected_attach_type(struct bpf_program *prog,
#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_FULL(string, ptype, 0)
+#define BPF_S_PROG_SEC(string, ptype) \
+ BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK, ptype)
+
#define BPF_SA_PROG_SEC(string, ptype) \
BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, ptype)
@@ -1874,6 +1990,7 @@ static const struct {
BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS),
BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT),
BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT),
+ BPF_PROG_SEC("raw_tracepoint/", BPF_PROG_TYPE_RAW_TRACEPOINT),
BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP),
BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
BPF_PROG_SEC("cgroup/skb", BPF_PROG_TYPE_CGROUP_SKB),
@@ -1889,10 +2006,13 @@ static const struct {
BPF_SA_PROG_SEC("cgroup/bind6", BPF_CGROUP_INET6_BIND),
BPF_SA_PROG_SEC("cgroup/connect4", BPF_CGROUP_INET4_CONNECT),
BPF_SA_PROG_SEC("cgroup/connect6", BPF_CGROUP_INET6_CONNECT),
+ BPF_S_PROG_SEC("cgroup/post_bind4", BPF_CGROUP_INET4_POST_BIND),
+ BPF_S_PROG_SEC("cgroup/post_bind6", BPF_CGROUP_INET6_POST_BIND),
};
#undef BPF_PROG_SEC
#undef BPF_PROG_SEC_FULL
+#undef BPF_S_PROG_SEC
#undef BPF_SA_PROG_SEC
static int bpf_program__identify_section(struct bpf_program *prog)
@@ -1929,6 +2049,16 @@ const char *bpf_map__name(struct bpf_map *map)
return map ? map->name : NULL;
}
+uint32_t bpf_map__btf_key_id(const struct bpf_map *map)
+{
+ return map ? map->btf_key_id : 0;
+}
+
+uint32_t bpf_map__btf_value_id(const struct bpf_map *map)
+{
+ return map ? map->btf_value_id : 0;
+}
+
int bpf_map__set_priv(struct bpf_map *map, void *priv,
bpf_map_clear_priv_t clear_priv)
{
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index a3a62a583f27..197f9ce2248c 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -78,6 +78,7 @@ int bpf_object__load(struct bpf_object *obj);
int bpf_object__unload(struct bpf_object *obj);
const char *bpf_object__name(struct bpf_object *obj);
unsigned int bpf_object__kversion(struct bpf_object *obj);
+int bpf_object__btf_fd(const struct bpf_object *obj);
struct bpf_object *bpf_object__next(struct bpf_object *prev);
#define bpf_object__for_each_safe(pos, tmp) \
@@ -185,15 +186,19 @@ int bpf_program__nth_fd(struct bpf_program *prog, int n);
*/
int bpf_program__set_socket_filter(struct bpf_program *prog);
int bpf_program__set_tracepoint(struct bpf_program *prog);
+int bpf_program__set_raw_tracepoint(struct bpf_program *prog);
int bpf_program__set_kprobe(struct bpf_program *prog);
int bpf_program__set_sched_cls(struct bpf_program *prog);
int bpf_program__set_sched_act(struct bpf_program *prog);
int bpf_program__set_xdp(struct bpf_program *prog);
int bpf_program__set_perf_event(struct bpf_program *prog);
void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type);
+void bpf_program__set_expected_attach_type(struct bpf_program *prog,
+ enum bpf_attach_type type);
bool bpf_program__is_socket_filter(struct bpf_program *prog);
bool bpf_program__is_tracepoint(struct bpf_program *prog);
+bool bpf_program__is_raw_tracepoint(struct bpf_program *prog);
bool bpf_program__is_kprobe(struct bpf_program *prog);
bool bpf_program__is_sched_cls(struct bpf_program *prog);
bool bpf_program__is_sched_act(struct bpf_program *prog);
@@ -239,6 +244,8 @@ bpf_map__next(struct bpf_map *map, struct bpf_object *obj);
int bpf_map__fd(struct bpf_map *map);
const struct bpf_map_def *bpf_map__def(struct bpf_map *map);
const char *bpf_map__name(struct bpf_map *map);
+uint32_t bpf_map__btf_key_id(const struct bpf_map *map);
+uint32_t bpf_map__btf_value_id(const struct bpf_map *map);
typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
int bpf_map__set_priv(struct bpf_map *map, void *priv,
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 5e1ab2f0eb79..3e3b3ced3f7c 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -15,3 +15,4 @@ test_libbpf_open
test_sock
test_sock_addr
urandom_read
+test_btf
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 0a315ddabbf4..b64a7a39cbc8 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -24,14 +24,15 @@ urandom_read: urandom_read.c
# Order correspond to 'make run_tests' order
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
- test_sock test_sock_addr
+ test_sock test_btf test_sockmap
TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
- sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o
+ sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \
+ test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
@@ -39,10 +40,11 @@ TEST_PROGS := test_kmod.sh \
test_xdp_redirect.sh \
test_xdp_meta.sh \
test_offload.py \
- test_sock_addr.sh
+ test_sock_addr.sh \
+ test_tunnel.sh
# Compile but not part of 'make run_tests'
-TEST_GEN_PROGS_EXTENDED = test_libbpf_open
+TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr
include ../lib.mk
@@ -55,6 +57,7 @@ $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
$(OUTPUT)/test_sock: cgroup_helpers.c
$(OUTPUT)/test_sock_addr: cgroup_helpers.c
+$(OUTPUT)/test_sockmap: cgroup_helpers.c
.PHONY: force
@@ -66,6 +69,8 @@ $(BPFOBJ): force
CLANG ?= clang
LLC ?= llc
+LLVM_OBJCOPY ?= llvm-objcopy
+BTF_PAHOLE ?= pahole
PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
@@ -83,9 +88,26 @@ CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \
$(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline
$(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
+BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help |& grep dwarfris)
+BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help |& grep BTF)
+BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --version |& grep LLVM)
+
+ifneq ($(BTF_LLC_PROBE),)
+ifneq ($(BTF_PAHOLE_PROBE),)
+ifneq ($(BTF_OBJCOPY_PROBE),)
+ CLANG_FLAGS += -g
+ LLC_FLAGS += -mattr=dwarfris
+ DWARF2BTF = y
+endif
+endif
+endif
+
$(OUTPUT)/%.o: %.c
$(CLANG) $(CLANG_FLAGS) \
-O2 -target bpf -emit-llvm -c $< -o - | \
- $(LLC) -march=bpf -mcpu=$(CPU) -filetype=obj -o $@
+ $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
+ifeq ($(DWARF2BTF),y)
+ $(BTF_PAHOLE) -J $@
+endif
EXTRA_CLEAN := $(TEST_CUSTOM_PROGS)
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index d8223d99f96d..69d7b918e66a 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -96,6 +96,11 @@ static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) =
(void *) BPF_FUNC_msg_pull_data;
static int (*bpf_bind)(void *ctx, void *addr, int addr_len) =
(void *) BPF_FUNC_bind;
+static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) =
+ (void *) BPF_FUNC_xdp_adjust_tail;
+static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state,
+ int size, int flags) =
+ (void *) BPF_FUNC_skb_get_xfrm_state;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
@@ -129,6 +134,8 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag
(void *) BPF_FUNC_l3_csum_replace;
static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
(void *) BPF_FUNC_l4_csum_replace;
+static int (*bpf_csum_diff)(void *from, int from_size, void *to, int to_size, int seed) =
+ (void *) BPF_FUNC_csum_diff;
static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
(void *) BPF_FUNC_skb_under_cgroup;
static int (*bpf_skb_change_head)(void *, int len, int flags) =
diff --git a/tools/testing/selftests/bpf/test_adjust_tail.c b/tools/testing/selftests/bpf/test_adjust_tail.c
new file mode 100644
index 000000000000..4cd5e860c903
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_adjust_tail.c
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2018 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+SEC("xdp_adjust_tail")
+int _xdp_adjust_tail(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ int offset = 0;
+
+ if (data_end - data == 54)
+ offset = 256;
+ else
+ offset = 20;
+ if (bpf_xdp_adjust_tail(xdp, 0 - offset))
+ return XDP_DROP;
+ return XDP_TX;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
new file mode 100644
index 000000000000..7b39b1f712a1
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -0,0 +1,1669 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/err.h>
+#include <bpf/bpf.h>
+#include <sys/resource.h>
+#include <libelf.h>
+#include <gelf.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <bpf/libbpf.h>
+#include <bpf/btf.h>
+
+#include "bpf_rlimit.h"
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#define __printf(a, b) __attribute__((format(printf, a, b)))
+
+__printf(1, 2)
+static int __base_pr(const char *format, ...)
+{
+ va_list args;
+ int err;
+
+ va_start(args, format);
+ err = vfprintf(stderr, format, args);
+ va_end(args);
+ return err;
+}
+
+#define BTF_INFO_ENC(kind, root, vlen) \
+ ((!!(root) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
+
+#define BTF_TYPE_ENC(name, info, size_or_type) \
+ (name), (info), (size_or_type)
+
+#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \
+ ((encoding) << 24 | (bits_offset) << 16 | (nr_bits))
+#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \
+ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \
+ BTF_INT_ENC(encoding, bits_offset, bits)
+
+#define BTF_ARRAY_ENC(type, index_type, nr_elems) \
+ (type), (index_type), (nr_elems)
+#define BTF_TYPE_ARRAY_ENC(type, index_type, nr_elems) \
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 0), \
+ BTF_ARRAY_ENC(type, index_type, nr_elems)
+
+#define BTF_MEMBER_ENC(name, type, bits_offset) \
+ (name), (type), (bits_offset)
+#define BTF_ENUM_ENC(name, val) (name), (val)
+
+#define BTF_TYPEDEF_ENC(name, type) \
+ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), type)
+
+#define BTF_PTR_ENC(name, type) \
+ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), type)
+
+#define BTF_END_RAW 0xdeadbeef
+#define NAME_TBD 0xdeadb33f
+
+#define MAX_NR_RAW_TYPES 1024
+#define BTF_LOG_BUF_SIZE 65535
+
+#ifndef ARRAY_SIZE
+# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+static struct args {
+ unsigned int raw_test_num;
+ unsigned int file_test_num;
+ unsigned int get_info_test_num;
+ bool raw_test;
+ bool file_test;
+ bool get_info_test;
+ bool pprint_test;
+ bool always_log;
+} args;
+
+static char btf_log_buf[BTF_LOG_BUF_SIZE];
+
+static struct btf_header hdr_tmpl = {
+ .magic = BTF_MAGIC,
+ .version = BTF_VERSION,
+};
+
+struct btf_raw_test {
+ const char *descr;
+ const char *str_sec;
+ const char *map_name;
+ __u32 raw_types[MAX_NR_RAW_TYPES];
+ __u32 str_sec_size;
+ enum bpf_map_type map_type;
+ __u32 key_size;
+ __u32 value_size;
+ __u32 key_id;
+ __u32 value_id;
+ __u32 max_entries;
+ bool btf_load_err;
+ bool map_create_err;
+ int type_off_delta;
+ int str_off_delta;
+ int str_len_delta;
+};
+
+static struct btf_raw_test raw_tests[] = {
+/* enum E {
+ * E0,
+ * E1,
+ * };
+ *
+ * struct A {
+ * int m;
+ * unsigned long long n;
+ * char o;
+ * [3 bytes hole]
+ * int p[8];
+ * int q[4][8];
+ * enum E r;
+ * };
+ */
+{
+ .descr = "struct test #1",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* unsigned long long */
+ BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */
+ /* char */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */
+ /* int[8] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */
+ /* struct A { */ /* [5] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 6), 180),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* unsigned long long n;*/
+ BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */
+ BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */
+ BTF_MEMBER_ENC(NAME_TBD, 6, 384),/* int q[4][8] */
+ BTF_MEMBER_ENC(NAME_TBD, 7, 1408), /* enum E r */
+ /* } */
+ /* int[4][8] */
+ BTF_TYPE_ARRAY_ENC(4, 1, 4), /* [6] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), sizeof(int)),
+ BTF_ENUM_ENC(NAME_TBD, 0),
+ BTF_ENUM_ENC(NAME_TBD, 1),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n\0o\0p\0q\0r\0E\0E0\0E1",
+ .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0q\0r\0E\0E0\0E1"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "struct_test1_map",
+ .key_size = sizeof(int),
+ .value_size = 180,
+ .key_id = 1,
+ .value_id = 5,
+ .max_entries = 4,
+},
+
+/* typedef struct b Struct_B;
+ *
+ * struct A {
+ * int m;
+ * struct b n[4];
+ * const Struct_B o[4];
+ * };
+ *
+ * struct B {
+ * int m;
+ * int n;
+ * };
+ */
+{
+ .descr = "struct test #2",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* struct b [4] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(4, 1, 4),
+
+ /* struct A { */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 3), 68),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct B n[4] */
+ BTF_MEMBER_ENC(NAME_TBD, 8, 288),/* const Struct_B o[4];*/
+ /* } */
+
+ /* struct B { */ /* [4] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
+ BTF_MEMBER_ENC(NAME_TBD, 1, 32),/* int n; */
+ /* } */
+
+ /* const int */ /* [5] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1),
+ /* typedef struct b Struct_B */ /* [6] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 4),
+ /* const Struct_B */ /* [7] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 6),
+ /* const Struct_B [4] */ /* [8] */
+ BTF_TYPE_ARRAY_ENC(7, 1, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n\0o\0B\0m\0n\0Struct_B",
+ .str_sec_size = sizeof("\0A\0m\0n\0o\0B\0m\0n\0Struct_B"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "struct_test2_map",
+ .key_size = sizeof(int),
+ .value_size = 68,
+ .key_id = 1,
+ .value_id = 3,
+ .max_entries = 4,
+},
+
+/* Test member exceeds the size of struct.
+ *
+ * struct A {
+ * int m;
+ * int n;
+ * };
+ */
+{
+ .descr = "size check test #1",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* struct A { */ /* [2] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2 - 1),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* int n; */
+ /* } */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n",
+ .str_sec_size = sizeof("\0A\0m\0n"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "size_check1_map",
+ .key_size = sizeof(int),
+ .value_size = 1,
+ .key_id = 1,
+ .value_id = 2,
+ .max_entries = 4,
+ .btf_load_err = true,
+},
+
+/* Test member exeeds the size of struct
+ *
+ * struct A {
+ * int m;
+ * int n[2];
+ * };
+ */
+{
+ .descr = "size check test #2",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)),
+ /* int[2] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 2),
+ /* struct A { */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 3 - 1),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* int n[2]; */
+ /* } */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n",
+ .str_sec_size = sizeof("\0A\0m\0n"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "size_check2_map",
+ .key_size = sizeof(int),
+ .value_size = 1,
+ .key_id = 1,
+ .value_id = 3,
+ .max_entries = 4,
+ .btf_load_err = true,
+
+},
+
+/* Test member exeeds the size of struct
+ *
+ * struct A {
+ * int m;
+ * void *n;
+ * };
+ */
+{
+ .descr = "size check test #3",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)),
+ /* void* */ /* [2] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
+ /* struct A { */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) + sizeof(void *) - 1),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* void *n; */
+ /* } */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n",
+ .str_sec_size = sizeof("\0A\0m\0n"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "size_check3_map",
+ .key_size = sizeof(int),
+ .value_size = 1,
+ .key_id = 1,
+ .value_id = 3,
+ .max_entries = 4,
+ .btf_load_err = true,
+},
+
+/* Test member exceeds the size of struct
+ *
+ * enum E {
+ * E0,
+ * E1,
+ * };
+ *
+ * struct A {
+ * int m;
+ * enum E n;
+ * };
+ */
+{
+ .descr = "size check test #4",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)),
+ /* enum E { */ /* [2] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), sizeof(int)),
+ BTF_ENUM_ENC(NAME_TBD, 0),
+ BTF_ENUM_ENC(NAME_TBD, 1),
+ /* } */
+ /* struct A { */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2 - 1),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* enum E n; */
+ /* } */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0E\0E0\0E1\0A\0m\0n",
+ .str_sec_size = sizeof("\0E\0E0\0E1\0A\0m\0n"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "size_check4_map",
+ .key_size = sizeof(int),
+ .value_size = 1,
+ .key_id = 1,
+ .value_id = 3,
+ .max_entries = 4,
+ .btf_load_err = true,
+},
+
+/* typedef const void * const_void_ptr;
+ * struct A {
+ * const_void_ptr m;
+ * };
+ */
+{
+ .descr = "void test #1",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* const void */ /* [2] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+ /* const void* */ /* [3] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),
+ /* typedef const void * const_void_ptr */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),
+ /* struct A { */ /* [4] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)),
+ /* const_void_ptr m; */
+ BTF_MEMBER_ENC(NAME_TBD, 3, 0),
+ /* } */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0const_void_ptr\0A\0m",
+ .str_sec_size = sizeof("\0const_void_ptr\0A\0m"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "void_test1_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(void *),
+ .key_id = 1,
+ .value_id = 4,
+ .max_entries = 4,
+},
+
+/* struct A {
+ * const void m;
+ * };
+ */
+{
+ .descr = "void test #2",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* const void */ /* [2] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+ /* struct A { */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 8),
+ /* const void m; */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0),
+ /* } */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m",
+ .str_sec_size = sizeof("\0A\0m"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "void_test2_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(void *),
+ .key_id = 1,
+ .value_id = 3,
+ .max_entries = 4,
+ .btf_load_err = true,
+},
+
+/* typedef const void * const_void_ptr;
+ * const_void_ptr[4]
+ */
+{
+ .descr = "void test #3",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* const void */ /* [2] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+ /* const void* */ /* [3] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),
+ /* typedef const void * const_void_ptr */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),
+ /* const_void_ptr[4] */ /* [4] */
+ BTF_TYPE_ARRAY_ENC(3, 1, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0const_void_ptr",
+ .str_sec_size = sizeof("\0const_void_ptr"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "void_test3_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(void *) * 4,
+ .key_id = 1,
+ .value_id = 4,
+ .max_entries = 4,
+},
+
+/* const void[4] */
+{
+ .descr = "void test #4",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* const void */ /* [2] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+ /* const void[4] */ /* [3] */
+ BTF_TYPE_ARRAY_ENC(2, 1, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m",
+ .str_sec_size = sizeof("\0A\0m"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "void_test4_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(void *) * 4,
+ .key_id = 1,
+ .value_id = 3,
+ .max_entries = 4,
+ .btf_load_err = true,
+},
+
+/* Array_A <------------------+
+ * elem_type == Array_B |
+ * | |
+ * | |
+ * Array_B <-------- + |
+ * elem_type == Array A --+
+ */
+{
+ .descr = "loop test #1",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* Array_A */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(3, 1, 8),
+ /* Array_B */ /* [3] */
+ BTF_TYPE_ARRAY_ENC(2, 1, 8),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "loop_test1_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(sizeof(int) * 8),
+ .key_id = 1,
+ .value_id = 2,
+ .max_entries = 4,
+ .btf_load_err = true,
+},
+
+/* typedef is _before_ the BTF type of Array_A and Array_B
+ *
+ * typedef Array_B int_array;
+ *
+ * Array_A <------------------+
+ * elem_type == int_array |
+ * | |
+ * | |
+ * Array_B <-------- + |
+ * elem_type == Array_A --+
+ */
+{
+ .descr = "loop test #2",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* typedef Array_B int_array */
+ BTF_TYPEDEF_ENC(1, 4), /* [2] */
+ /* Array_A */
+ BTF_TYPE_ARRAY_ENC(2, 1, 8), /* [3] */
+ /* Array_B */
+ BTF_TYPE_ARRAY_ENC(3, 1, 8), /* [4] */
+
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int_array\0",
+ .str_sec_size = sizeof("\0int_array"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "loop_test2_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(sizeof(int) * 8),
+ .key_id = 1,
+ .value_id = 2,
+ .max_entries = 4,
+ .btf_load_err = true,
+},
+
+/* Array_A <------------------+
+ * elem_type == Array_B |
+ * | |
+ * | |
+ * Array_B <-------- + |
+ * elem_type == Array_A --+
+ */
+{
+ .descr = "loop test #3",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* Array_A */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(3, 1, 8),
+ /* Array_B */ /* [3] */
+ BTF_TYPE_ARRAY_ENC(2, 1, 8),
+
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "loop_test3_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(sizeof(int) * 8),
+ .key_id = 1,
+ .value_id = 2,
+ .max_entries = 4,
+ .btf_load_err = true,
+},
+
+/* typedef is _between_ the BTF type of Array_A and Array_B
+ *
+ * typedef Array_B int_array;
+ *
+ * Array_A <------------------+
+ * elem_type == int_array |
+ * | |
+ * | |
+ * Array_B <-------- + |
+ * elem_type == Array_A --+
+ */
+{
+ .descr = "loop test #4",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* Array_A */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(3, 1, 8),
+ /* typedef Array_B int_array */ /* [3] */
+ BTF_TYPEDEF_ENC(NAME_TBD, 4),
+ /* Array_B */ /* [4] */
+ BTF_TYPE_ARRAY_ENC(2, 1, 8),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int_array\0",
+ .str_sec_size = sizeof("\0int_array"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "loop_test4_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(sizeof(int) * 8),
+ .key_id = 1,
+ .value_id = 2,
+ .max_entries = 4,
+ .btf_load_err = true,
+},
+
+/* typedef struct B Struct_B
+ *
+ * struct A {
+ * int x;
+ * Struct_B y;
+ * };
+ *
+ * struct B {
+ * int x;
+ * struct A y;
+ * };
+ */
+{
+ .descr = "loop test #5",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* struct A */ /* [2] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int x; */
+ BTF_MEMBER_ENC(NAME_TBD, 3, 32),/* Struct_B y; */
+ /* typedef struct B Struct_B */
+ BTF_TYPEDEF_ENC(NAME_TBD, 4), /* [3] */
+ /* struct B */ /* [4] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int x; */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct A y; */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0x\0y\0Struct_B\0B\0x\0y",
+ .str_sec_size = sizeof("\0A\0x\0y\0Struct_B\0B\0x\0y"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "loop_test5_map",
+ .key_size = sizeof(int),
+ .value_size = 8,
+ .key_id = 1,
+ .value_id = 2,
+ .max_entries = 4,
+ .btf_load_err = true,
+},
+
+/* struct A {
+ * int x;
+ * struct A array_a[4];
+ * };
+ */
+{
+ .descr = "loop test #6",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ARRAY_ENC(3, 1, 4), /* [2] */
+ /* struct A */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int x; */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct A array_a[4]; */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0x\0y",
+ .str_sec_size = sizeof("\0A\0x\0y"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "loop_test6_map",
+ .key_size = sizeof(int),
+ .value_size = 8,
+ .key_id = 1,
+ .value_id = 2,
+ .max_entries = 4,
+ .btf_load_err = true,
+},
+
+{
+ .descr = "loop test #7",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* struct A { */ /* [2] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)),
+ /* const void *m; */
+ BTF_MEMBER_ENC(NAME_TBD, 3, 0),
+ /* CONST type_id=3 */ /* [3] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4),
+ /* PTR type_id=2 */ /* [4] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m",
+ .str_sec_size = sizeof("\0A\0m"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "loop_test7_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(void *),
+ .key_id = 1,
+ .value_id = 2,
+ .max_entries = 4,
+ .btf_load_err = true,
+},
+
+{
+ .descr = "loop test #8",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* struct A { */ /* [2] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)),
+ /* const void *m; */
+ BTF_MEMBER_ENC(NAME_TBD, 4, 0),
+ /* struct B { */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)),
+ /* const void *n; */
+ BTF_MEMBER_ENC(NAME_TBD, 6, 0),
+ /* CONST type_id=5 */ /* [4] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 5),
+ /* PTR type_id=6 */ /* [5] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 6),
+ /* CONST type_id=7 */ /* [6] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 7),
+ /* PTR type_id=4 */ /* [7] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0B\0n",
+ .str_sec_size = sizeof("\0A\0m\0B\0n"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "loop_test8_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(void *),
+ .key_id = 1,
+ .value_id = 2,
+ .max_entries = 4,
+ .btf_load_err = true,
+},
+
+{
+ .descr = "type_off == str_off",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int",
+ .str_sec_size = sizeof("\0int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "hdr_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_id = 1,
+ .value_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .type_off_delta = sizeof(struct btf_type) + sizeof(int) + sizeof("\0int"),
+},
+
+{
+ .descr = "Unaligned type_off",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int",
+ .str_sec_size = sizeof("\0int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "hdr_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_id = 1,
+ .value_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .type_off_delta = 1,
+},
+
+{
+ .descr = "str_off beyonds btf size",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int",
+ .str_sec_size = sizeof("\0int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "hdr_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_id = 1,
+ .value_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .str_off_delta = sizeof("\0int") + 1,
+},
+
+{
+ .descr = "str_len beyonds btf size",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int",
+ .str_sec_size = sizeof("\0int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "hdr_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_id = 1,
+ .value_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .str_len_delta = 1,
+},
+
+{
+ .descr = "String section does not end with null",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int",
+ .str_sec_size = sizeof("\0int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "hdr_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_id = 1,
+ .value_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .str_len_delta = -1,
+},
+
+{
+ .descr = "Empty string section",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int",
+ .str_sec_size = sizeof("\0int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "hdr_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_id = 1,
+ .value_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .str_len_delta = 0 - (int)sizeof("\0int"),
+},
+
+}; /* struct btf_raw_test raw_tests[] */
+
+static const char *get_next_str(const char *start, const char *end)
+{
+ return start < end - 1 ? start + 1 : NULL;
+}
+
+static int get_type_sec_size(const __u32 *raw_types)
+{
+ int i;
+
+ for (i = MAX_NR_RAW_TYPES - 1;
+ i >= 0 && raw_types[i] != BTF_END_RAW;
+ i--)
+ ;
+
+ return i < 0 ? i : i * sizeof(raw_types[0]);
+}
+
+static void *btf_raw_create(const struct btf_header *hdr,
+ const __u32 *raw_types,
+ const char *str,
+ unsigned int str_sec_size,
+ unsigned int *btf_size)
+{
+ const char *next_str = str, *end_str = str + str_sec_size;
+ unsigned int size_needed, offset;
+ struct btf_header *ret_hdr;
+ int i, type_sec_size;
+ uint32_t *ret_types;
+ void *raw_btf;
+
+ type_sec_size = get_type_sec_size(raw_types);
+ if (type_sec_size < 0) {
+ fprintf(stderr, "Cannot get nr_raw_types\n");
+ return NULL;
+ }
+
+ size_needed = sizeof(*hdr) + type_sec_size + str_sec_size;
+ raw_btf = malloc(size_needed);
+ if (!raw_btf) {
+ fprintf(stderr, "Cannot allocate memory for raw_btf\n");
+ return NULL;
+ }
+
+ /* Copy header */
+ memcpy(raw_btf, hdr, sizeof(*hdr));
+ offset = sizeof(*hdr);
+
+ /* Copy type section */
+ ret_types = raw_btf + offset;
+ for (i = 0; i < type_sec_size / sizeof(raw_types[0]); i++) {
+ if (raw_types[i] == NAME_TBD) {
+ next_str = get_next_str(next_str, end_str);
+ if (!next_str) {
+ fprintf(stderr, "Error in getting next_str\n");
+ free(raw_btf);
+ return NULL;
+ }
+ ret_types[i] = next_str - str;
+ next_str += strlen(next_str);
+ } else {
+ ret_types[i] = raw_types[i];
+ }
+ }
+ offset += type_sec_size;
+
+ /* Copy string section */
+ memcpy(raw_btf + offset, str, str_sec_size);
+
+ ret_hdr = (struct btf_header *)raw_btf;
+ ret_hdr->str_off = type_sec_size;
+ ret_hdr->str_len = str_sec_size;
+
+ *btf_size = size_needed;
+
+ return raw_btf;
+}
+
+static int do_test_raw(unsigned int test_num)
+{
+ struct btf_raw_test *test = &raw_tests[test_num - 1];
+ struct bpf_create_map_attr create_attr = {};
+ int map_fd = -1, btf_fd = -1;
+ unsigned int raw_btf_size;
+ struct btf_header *hdr;
+ void *raw_btf;
+ int err;
+
+ fprintf(stderr, "BTF raw test[%u] (%s): ", test_num, test->descr);
+ raw_btf = btf_raw_create(&hdr_tmpl,
+ test->raw_types,
+ test->str_sec,
+ test->str_sec_size,
+ &raw_btf_size);
+
+ if (!raw_btf)
+ return -1;
+
+ hdr = raw_btf;
+
+ hdr->type_off = (int)hdr->type_off + test->type_off_delta;
+ hdr->str_off = (int)hdr->str_off + test->str_off_delta;
+ hdr->str_len = (int)hdr->str_len + test->str_len_delta;
+
+ *btf_log_buf = '\0';
+ btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
+ btf_log_buf, BTF_LOG_BUF_SIZE,
+ args.always_log);
+ free(raw_btf);
+
+ err = ((btf_fd == -1) != test->btf_load_err);
+ if (err)
+ fprintf(stderr, "btf_load_err:%d btf_fd:%d\n",
+ test->btf_load_err, btf_fd);
+
+ if (err || btf_fd == -1)
+ goto done;
+
+ create_attr.name = test->map_name;
+ create_attr.map_type = test->map_type;
+ create_attr.key_size = test->key_size;
+ create_attr.value_size = test->value_size;
+ create_attr.max_entries = test->max_entries;
+ create_attr.btf_fd = btf_fd;
+ create_attr.btf_key_id = test->key_id;
+ create_attr.btf_value_id = test->value_id;
+
+ map_fd = bpf_create_map_xattr(&create_attr);
+
+ err = ((map_fd == -1) != test->map_create_err);
+ if (err)
+ fprintf(stderr, "map_create_err:%d map_fd:%d\n",
+ test->map_create_err, map_fd);
+
+done:
+ if (!err)
+ fprintf(stderr, "OK\n");
+
+ if (*btf_log_buf && (err || args.always_log))
+ fprintf(stderr, "%s\n", btf_log_buf);
+
+ if (btf_fd != -1)
+ close(btf_fd);
+ if (map_fd != -1)
+ close(map_fd);
+
+ return err;
+}
+
+static int test_raw(void)
+{
+ unsigned int i;
+ int err = 0;
+
+ if (args.raw_test_num)
+ return do_test_raw(args.raw_test_num);
+
+ for (i = 1; i <= ARRAY_SIZE(raw_tests); i++)
+ err |= do_test_raw(i);
+
+ return err;
+}
+
+struct btf_get_info_test {
+ const char *descr;
+ const char *str_sec;
+ __u32 raw_types[MAX_NR_RAW_TYPES];
+ __u32 str_sec_size;
+ int info_size_delta;
+};
+
+const struct btf_get_info_test get_info_tests[] = {
+{
+ .descr = "== raw_btf_size+1",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .info_size_delta = 1,
+},
+{
+ .descr = "== raw_btf_size-3",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .info_size_delta = -3,
+},
+};
+
+static int do_test_get_info(unsigned int test_num)
+{
+ const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
+ unsigned int raw_btf_size, user_btf_size, expected_nbytes;
+ uint8_t *raw_btf = NULL, *user_btf = NULL;
+ int btf_fd = -1, err;
+
+ fprintf(stderr, "BTF GET_INFO_BY_ID test[%u] (%s): ",
+ test_num, test->descr);
+
+ raw_btf = btf_raw_create(&hdr_tmpl,
+ test->raw_types,
+ test->str_sec,
+ test->str_sec_size,
+ &raw_btf_size);
+
+ if (!raw_btf)
+ return -1;
+
+ *btf_log_buf = '\0';
+
+ user_btf = malloc(raw_btf_size);
+ if (!user_btf) {
+ fprintf(stderr, "Cannot allocate memory for user_btf\n");
+ err = -1;
+ goto done;
+ }
+
+ btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
+ btf_log_buf, BTF_LOG_BUF_SIZE,
+ args.always_log);
+ if (btf_fd == -1) {
+ fprintf(stderr, "bpf_load_btf:%s(%d)\n",
+ strerror(errno), errno);
+ err = -1;
+ goto done;
+ }
+
+ user_btf_size = (int)raw_btf_size + test->info_size_delta;
+ expected_nbytes = min(raw_btf_size, user_btf_size);
+ if (raw_btf_size > expected_nbytes)
+ memset(user_btf + expected_nbytes, 0xff,
+ raw_btf_size - expected_nbytes);
+
+ err = bpf_obj_get_info_by_fd(btf_fd, user_btf, &user_btf_size);
+ if (err || user_btf_size != raw_btf_size ||
+ memcmp(raw_btf, user_btf, expected_nbytes)) {
+ fprintf(stderr,
+ "err:%d(errno:%d) raw_btf_size:%u user_btf_size:%u expected_nbytes:%u memcmp:%d\n",
+ err, errno,
+ raw_btf_size, user_btf_size, expected_nbytes,
+ memcmp(raw_btf, user_btf, expected_nbytes));
+ err = -1;
+ goto done;
+ }
+
+ while (expected_nbytes < raw_btf_size) {
+ fprintf(stderr, "%u...", expected_nbytes);
+ if (user_btf[expected_nbytes++] != 0xff) {
+ fprintf(stderr, "!= 0xff\n");
+ err = -1;
+ goto done;
+ }
+ }
+
+ fprintf(stderr, "OK\n");
+
+done:
+ if (*btf_log_buf && (err || args.always_log))
+ fprintf(stderr, "%s\n", btf_log_buf);
+
+ free(raw_btf);
+ free(user_btf);
+
+ if (btf_fd != -1)
+ close(btf_fd);
+
+ return err;
+}
+
+static int test_get_info(void)
+{
+ unsigned int i;
+ int err = 0;
+
+ if (args.get_info_test_num)
+ return do_test_get_info(args.get_info_test_num);
+
+ for (i = 1; i <= ARRAY_SIZE(get_info_tests); i++)
+ err |= do_test_get_info(i);
+
+ return err;
+}
+
+struct btf_file_test {
+ const char *file;
+ bool btf_kv_notfound;
+};
+
+static struct btf_file_test file_tests[] = {
+{
+ .file = "test_btf_haskv.o",
+},
+{
+ .file = "test_btf_nokv.o",
+ .btf_kv_notfound = true,
+},
+};
+
+static int file_has_btf_elf(const char *fn)
+{
+ Elf_Scn *scn = NULL;
+ GElf_Ehdr ehdr;
+ int elf_fd;
+ Elf *elf;
+ int ret;
+
+ if (elf_version(EV_CURRENT) == EV_NONE) {
+ fprintf(stderr, "Failed to init libelf\n");
+ return -1;
+ }
+
+ elf_fd = open(fn, O_RDONLY);
+ if (elf_fd == -1) {
+ fprintf(stderr, "Cannot open file %s: %s(%d)\n",
+ fn, strerror(errno), errno);
+ return -1;
+ }
+
+ elf = elf_begin(elf_fd, ELF_C_READ, NULL);
+ if (!elf) {
+ fprintf(stderr, "Failed to read ELF from %s. %s\n", fn,
+ elf_errmsg(elf_errno()));
+ ret = -1;
+ goto done;
+ }
+
+ if (!gelf_getehdr(elf, &ehdr)) {
+ fprintf(stderr, "Failed to get EHDR from %s\n", fn);
+ ret = -1;
+ goto done;
+ }
+
+ while ((scn = elf_nextscn(elf, scn))) {
+ const char *sh_name;
+ GElf_Shdr sh;
+
+ if (gelf_getshdr(scn, &sh) != &sh) {
+ fprintf(stderr,
+ "Failed to get section header from %s\n", fn);
+ ret = -1;
+ goto done;
+ }
+
+ sh_name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name);
+ if (!strcmp(sh_name, BTF_ELF_SEC)) {
+ ret = 1;
+ goto done;
+ }
+ }
+
+ ret = 0;
+
+done:
+ close(elf_fd);
+ elf_end(elf);
+ return ret;
+}
+
+static int do_test_file(unsigned int test_num)
+{
+ const struct btf_file_test *test = &file_tests[test_num - 1];
+ struct bpf_object *obj = NULL;
+ struct bpf_program *prog;
+ struct bpf_map *map;
+ int err;
+
+ fprintf(stderr, "BTF libbpf test[%u] (%s): ", test_num,
+ test->file);
+
+ err = file_has_btf_elf(test->file);
+ if (err == -1)
+ return err;
+
+ if (err == 0) {
+ fprintf(stderr, "SKIP. No ELF %s found\n", BTF_ELF_SEC);
+ return 0;
+ }
+
+ obj = bpf_object__open(test->file);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ err = bpf_object__btf_fd(obj);
+ if (err == -1) {
+ fprintf(stderr, "bpf_object__btf_fd: -1\n");
+ goto done;
+ }
+
+ prog = bpf_program__next(NULL, obj);
+ if (!prog) {
+ fprintf(stderr, "Cannot find bpf_prog\n");
+ err = -1;
+ goto done;
+ }
+
+ bpf_program__set_type(prog, BPF_PROG_TYPE_TRACEPOINT);
+ err = bpf_object__load(obj);
+ if (err < 0) {
+ fprintf(stderr, "bpf_object__load: %d\n", err);
+ goto done;
+ }
+
+ map = bpf_object__find_map_by_name(obj, "btf_map");
+ if (!map) {
+ fprintf(stderr, "btf_map not found\n");
+ err = -1;
+ goto done;
+ }
+
+ err = (bpf_map__btf_key_id(map) == 0 || bpf_map__btf_value_id(map) == 0)
+ != test->btf_kv_notfound;
+ if (err) {
+ fprintf(stderr,
+ "btf_kv_notfound:%u btf_key_id:%u btf_value_id:%u\n",
+ test->btf_kv_notfound,
+ bpf_map__btf_key_id(map),
+ bpf_map__btf_value_id(map));
+ goto done;
+ }
+
+ fprintf(stderr, "OK\n");
+
+done:
+ bpf_object__close(obj);
+ return err;
+}
+
+static int test_file(void)
+{
+ unsigned int i;
+ int err = 0;
+
+ if (args.file_test_num)
+ return do_test_file(args.file_test_num);
+
+ for (i = 1; i <= ARRAY_SIZE(file_tests); i++)
+ err |= do_test_file(i);
+
+ return err;
+}
+
+const char *pprint_enum_str[] = {
+ "ENUM_ZERO",
+ "ENUM_ONE",
+ "ENUM_TWO",
+ "ENUM_THREE",
+};
+
+struct pprint_mapv {
+ uint32_t ui32;
+ uint16_t ui16;
+ /* 2 bytes hole */
+ int32_t si32;
+ uint32_t unused_bits2a:2,
+ bits28:28,
+ unused_bits2b:2;
+ union {
+ uint64_t ui64;
+ uint8_t ui8a[8];
+ };
+ enum {
+ ENUM_ZERO,
+ ENUM_ONE,
+ ENUM_TWO,
+ ENUM_THREE,
+ } aenum;
+};
+
+static struct btf_raw_test pprint_test = {
+ .descr = "BTF pretty print test #1",
+ .raw_types = {
+ /* unsighed char */ /* [1] */
+ BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 8, 1),
+ /* unsigned short */ /* [2] */
+ BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 16, 2),
+ /* unsigned int */ /* [3] */
+ BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4),
+ /* int */ /* [4] */
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ /* unsigned long long */ /* [5] */
+ BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 64, 8),
+ /* 2 bits */ /* [6] */
+ BTF_TYPE_INT_ENC(0, 0, 0, 2, 2),
+ /* 28 bits */ /* [7] */
+ BTF_TYPE_INT_ENC(0, 0, 0, 28, 4),
+ /* uint8_t[8] */ /* [8] */
+ BTF_TYPE_ARRAY_ENC(9, 3, 8),
+ /* typedef unsigned char uint8_t */ /* [9] */
+ BTF_TYPEDEF_ENC(NAME_TBD, 1),
+ /* typedef unsigned short uint16_t */ /* [10] */
+ BTF_TYPEDEF_ENC(NAME_TBD, 2),
+ /* typedef unsigned int uint32_t */ /* [11] */
+ BTF_TYPEDEF_ENC(NAME_TBD, 3),
+ /* typedef int int32_t */ /* [12] */
+ BTF_TYPEDEF_ENC(NAME_TBD, 4),
+ /* typedef unsigned long long uint64_t *//* [13] */
+ BTF_TYPEDEF_ENC(NAME_TBD, 5),
+ /* union (anon) */ /* [14] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 0, 2), 8),
+ BTF_MEMBER_ENC(NAME_TBD, 13, 0),/* uint64_t ui64; */
+ BTF_MEMBER_ENC(NAME_TBD, 8, 0), /* uint8_t ui8a[8]; */
+ /* enum (anon) */ /* [15] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 4), 4),
+ BTF_ENUM_ENC(NAME_TBD, 0),
+ BTF_ENUM_ENC(NAME_TBD, 1),
+ BTF_ENUM_ENC(NAME_TBD, 2),
+ BTF_ENUM_ENC(NAME_TBD, 3),
+ /* struct pprint_mapv */ /* [16] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 8), 28),
+ BTF_MEMBER_ENC(NAME_TBD, 11, 0), /* uint32_t ui32 */
+ BTF_MEMBER_ENC(NAME_TBD, 10, 32), /* uint16_t ui16 */
+ BTF_MEMBER_ENC(NAME_TBD, 12, 64), /* int32_t si32 */
+ BTF_MEMBER_ENC(NAME_TBD, 6, 96), /* unused_bits2a */
+ BTF_MEMBER_ENC(NAME_TBD, 7, 98), /* bits28 */
+ BTF_MEMBER_ENC(NAME_TBD, 6, 126), /* unused_bits2b */
+ BTF_MEMBER_ENC(0, 14, 128), /* union (anon) */
+ BTF_MEMBER_ENC(NAME_TBD, 15, 192), /* aenum */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum",
+ .str_sec_size = sizeof("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "pprint_test",
+ .key_size = sizeof(unsigned int),
+ .value_size = sizeof(struct pprint_mapv),
+ .key_id = 3, /* unsigned int */
+ .value_id = 16, /* struct pprint_mapv */
+ .max_entries = 128 * 1024,
+};
+
+static void set_pprint_mapv(struct pprint_mapv *v, uint32_t i)
+{
+ v->ui32 = i;
+ v->si32 = -i;
+ v->unused_bits2a = 3;
+ v->bits28 = i;
+ v->unused_bits2b = 3;
+ v->ui64 = i;
+ v->aenum = i & 0x03;
+}
+
+static int test_pprint(void)
+{
+ const struct btf_raw_test *test = &pprint_test;
+ struct bpf_create_map_attr create_attr = {};
+ int map_fd = -1, btf_fd = -1;
+ struct pprint_mapv mapv = {};
+ unsigned int raw_btf_size;
+ char expected_line[255];
+ FILE *pin_file = NULL;
+ char pin_path[255];
+ size_t line_len = 0;
+ char *line = NULL;
+ unsigned int key;
+ uint8_t *raw_btf;
+ ssize_t nread;
+ int err;
+
+ fprintf(stderr, "%s......", test->descr);
+ raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types,
+ test->str_sec, test->str_sec_size,
+ &raw_btf_size);
+
+ if (!raw_btf)
+ return -1;
+
+ *btf_log_buf = '\0';
+ btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
+ btf_log_buf, BTF_LOG_BUF_SIZE,
+ args.always_log);
+ free(raw_btf);
+
+ if (btf_fd == -1) {
+ err = -1;
+ fprintf(stderr, "bpf_load_btf: %s(%d)\n",
+ strerror(errno), errno);
+ goto done;
+ }
+
+ create_attr.name = test->map_name;
+ create_attr.map_type = test->map_type;
+ create_attr.key_size = test->key_size;
+ create_attr.value_size = test->value_size;
+ create_attr.max_entries = test->max_entries;
+ create_attr.btf_fd = btf_fd;
+ create_attr.btf_key_id = test->key_id;
+ create_attr.btf_value_id = test->value_id;
+
+ map_fd = bpf_create_map_xattr(&create_attr);
+ if (map_fd == -1) {
+ err = -1;
+ fprintf(stderr, "bpf_creat_map_btf: %s(%d)\n",
+ strerror(errno), errno);
+ goto done;
+ }
+
+ if (snprintf(pin_path, sizeof(pin_path), "%s/%s",
+ "/sys/fs/bpf", test->map_name) == sizeof(pin_path)) {
+ err = -1;
+ fprintf(stderr, "pin_path is too long\n");
+ goto done;
+ }
+
+ err = bpf_obj_pin(map_fd, pin_path);
+ if (err) {
+ fprintf(stderr, "Cannot pin to %s. %s(%d).\n", pin_path,
+ strerror(errno), errno);
+ goto done;
+ }
+
+ for (key = 0; key < test->max_entries; key++) {
+ set_pprint_mapv(&mapv, key);
+ bpf_map_update_elem(map_fd, &key, &mapv, 0);
+ }
+
+ pin_file = fopen(pin_path, "r");
+ if (!pin_file) {
+ err = -1;
+ fprintf(stderr, "fopen(%s): %s(%d)\n", pin_path,
+ strerror(errno), errno);
+ goto done;
+ }
+
+ /* Skip lines start with '#' */
+ while ((nread = getline(&line, &line_len, pin_file)) > 0 &&
+ *line == '#')
+ ;
+
+ if (nread <= 0) {
+ err = -1;
+ fprintf(stderr, "Unexpected EOF\n");
+ goto done;
+ }
+
+ key = 0;
+ do {
+ ssize_t nexpected_line;
+
+ set_pprint_mapv(&mapv, key);
+ nexpected_line = snprintf(expected_line, sizeof(expected_line),
+ "%u: {%u,0,%d,0x%x,0x%x,0x%x,{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s}\n",
+ key,
+ mapv.ui32, mapv.si32,
+ mapv.unused_bits2a, mapv.bits28, mapv.unused_bits2b,
+ mapv.ui64,
+ mapv.ui8a[0], mapv.ui8a[1], mapv.ui8a[2], mapv.ui8a[3],
+ mapv.ui8a[4], mapv.ui8a[5], mapv.ui8a[6], mapv.ui8a[7],
+ pprint_enum_str[mapv.aenum]);
+
+ if (nexpected_line == sizeof(expected_line)) {
+ err = -1;
+ fprintf(stderr, "expected_line is too long\n");
+ goto done;
+ }
+
+ if (strcmp(expected_line, line)) {
+ err = -1;
+ fprintf(stderr, "unexpected pprint output\n");
+ fprintf(stderr, "expected: %s", expected_line);
+ fprintf(stderr, " read: %s", line);
+ goto done;
+ }
+
+ nread = getline(&line, &line_len, pin_file);
+ } while (++key < test->max_entries && nread > 0);
+
+ if (key < test->max_entries) {
+ err = -1;
+ fprintf(stderr, "Unexpected EOF\n");
+ goto done;
+ }
+
+ if (nread > 0) {
+ err = -1;
+ fprintf(stderr, "Unexpected extra pprint output: %s\n", line);
+ goto done;
+ }
+
+ err = 0;
+
+done:
+ if (!err)
+ fprintf(stderr, "OK\n");
+ if (*btf_log_buf && (err || args.always_log))
+ fprintf(stderr, "%s\n", btf_log_buf);
+ if (btf_fd != -1)
+ close(btf_fd);
+ if (map_fd != -1)
+ close(map_fd);
+ if (pin_file)
+ fclose(pin_file);
+ unlink(pin_path);
+ free(line);
+
+ return err;
+}
+
+static void usage(const char *cmd)
+{
+ fprintf(stderr, "Usage: %s [-l] [[-r test_num (1 - %zu)] | [-g test_num (1 - %zu)] | [-f test_num (1 - %zu)] | [-p]]\n",
+ cmd, ARRAY_SIZE(raw_tests), ARRAY_SIZE(get_info_tests),
+ ARRAY_SIZE(file_tests));
+}
+
+static int parse_args(int argc, char **argv)
+{
+ const char *optstr = "lpf:r:g:";
+ int opt;
+
+ while ((opt = getopt(argc, argv, optstr)) != -1) {
+ switch (opt) {
+ case 'l':
+ args.always_log = true;
+ break;
+ case 'f':
+ args.file_test_num = atoi(optarg);
+ args.file_test = true;
+ break;
+ case 'r':
+ args.raw_test_num = atoi(optarg);
+ args.raw_test = true;
+ break;
+ case 'g':
+ args.get_info_test_num = atoi(optarg);
+ args.get_info_test = true;
+ break;
+ case 'p':
+ args.pprint_test = true;
+ break;
+ case 'h':
+ usage(argv[0]);
+ exit(0);
+ default:
+ usage(argv[0]);
+ return -1;
+ }
+ }
+
+ if (args.raw_test_num &&
+ (args.raw_test_num < 1 ||
+ args.raw_test_num > ARRAY_SIZE(raw_tests))) {
+ fprintf(stderr, "BTF raw test number must be [1 - %zu]\n",
+ ARRAY_SIZE(raw_tests));
+ return -1;
+ }
+
+ if (args.file_test_num &&
+ (args.file_test_num < 1 ||
+ args.file_test_num > ARRAY_SIZE(file_tests))) {
+ fprintf(stderr, "BTF file test number must be [1 - %zu]\n",
+ ARRAY_SIZE(file_tests));
+ return -1;
+ }
+
+ if (args.get_info_test_num &&
+ (args.get_info_test_num < 1 ||
+ args.get_info_test_num > ARRAY_SIZE(get_info_tests))) {
+ fprintf(stderr, "BTF get info test number must be [1 - %zu]\n",
+ ARRAY_SIZE(get_info_tests));
+ return -1;
+ }
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int err = 0;
+
+ err = parse_args(argc, argv);
+ if (err)
+ return err;
+
+ if (args.always_log)
+ libbpf_set_print(__base_pr, __base_pr, __base_pr);
+
+ if (args.raw_test)
+ err |= test_raw();
+
+ if (args.get_info_test)
+ err |= test_get_info();
+
+ if (args.file_test)
+ err |= test_file();
+
+ if (args.pprint_test)
+ err |= test_pprint();
+
+ if (args.raw_test || args.get_info_test || args.file_test ||
+ args.pprint_test)
+ return err;
+
+ err |= test_raw();
+ err |= test_get_info();
+ err |= test_file();
+
+ return err;
+}
diff --git a/tools/testing/selftests/bpf/test_btf_haskv.c b/tools/testing/selftests/bpf/test_btf_haskv.c
new file mode 100644
index 000000000000..8c7ca096ecf2
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_btf_haskv.c
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+struct ipv_counts {
+ unsigned int v4;
+ unsigned int v6;
+};
+
+typedef int btf_map_key;
+typedef struct ipv_counts btf_map_value;
+btf_map_key dumm_key;
+btf_map_value dummy_value;
+
+struct bpf_map_def SEC("maps") btf_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(struct ipv_counts),
+ .max_entries = 4,
+};
+
+struct dummy_tracepoint_args {
+ unsigned long long pad;
+ struct sock *sock;
+};
+
+SEC("dummy_tracepoint")
+int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+{
+ struct ipv_counts *counts;
+ int key = 0;
+
+ if (!arg->sock)
+ return 0;
+
+ counts = bpf_map_lookup_elem(&btf_map, &key);
+ if (!counts)
+ return 0;
+
+ counts->v6++;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_btf_nokv.c b/tools/testing/selftests/bpf/test_btf_nokv.c
new file mode 100644
index 000000000000..0ed8e088eebf
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_btf_nokv.c
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+struct ipv_counts {
+ unsigned int v4;
+ unsigned int v6;
+};
+
+struct bpf_map_def SEC("maps") btf_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(struct ipv_counts),
+ .max_entries = 4,
+};
+
+struct dummy_tracepoint_args {
+ unsigned long long pad;
+ struct sock *sock;
+};
+
+SEC("dummy_tracepoint")
+int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+{
+ struct ipv_counts *counts;
+ int key = 0;
+
+ if (!arg->sock)
+ return 0;
+
+ counts = bpf_map_lookup_elem(&btf_map, &key);
+ if (!counts)
+ return 0;
+
+ counts->v6++;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 4123d0ab90ba..fac581f1c57f 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -166,6 +166,37 @@ out:
bpf_object__close(obj);
}
+static void test_xdp_adjust_tail(void)
+{
+ const char *file = "./test_adjust_tail.o";
+ struct bpf_object *obj;
+ char buf[128];
+ __u32 duration, retval, size;
+ int err, prog_fd;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ if (err) {
+ error_cnt++;
+ return;
+ }
+
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+ buf, &size, &retval, &duration);
+
+ CHECK(err || errno || retval != XDP_DROP,
+ "ipv4", "err %d errno %d retval %d size %d\n",
+ err, errno, retval, size);
+
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
+ buf, &size, &retval, &duration);
+ CHECK(err || errno || retval != XDP_TX || size != 54,
+ "ipv6", "err %d errno %d retval %d size %d\n",
+ err, errno, retval, size);
+ bpf_object__close(obj);
+}
+
+
+
#define MAGIC_VAL 0x1234
#define NUM_ITER 100000
#define VIP_NUM 5
@@ -1177,6 +1208,7 @@ int main(void)
{
test_pkt_access();
test_xdp();
+ test_xdp_adjust_tail();
test_l4lb_all();
test_xdp_noinline();
test_tcp_estats();
diff --git a/samples/sockmap/sockmap_user.c b/tools/testing/selftests/bpf/test_sockmap.c
index 6f2334912283..29c022d23f4e 100644
--- a/samples/sockmap/sockmap_user.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -1,14 +1,5 @@
-/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io
#include <stdio.h>
#include <stdlib.h>
#include <sys/socket.h>
@@ -25,6 +16,7 @@
#include <fcntl.h>
#include <sys/wait.h>
#include <time.h>
+#include <sched.h>
#include <sys/time.h>
#include <sys/resource.h>
@@ -41,19 +33,31 @@
#include <getopt.h>
-#include "../bpf/bpf_load.h"
-#include "../bpf/bpf_util.h"
-#include "../bpf/libbpf.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_util.h"
+#include "bpf_rlimit.h"
+#include "cgroup_helpers.h"
int running;
-void running_handler(int a);
+static void running_handler(int a);
/* randomly selected ports for testing on lo */
#define S1_PORT 10000
#define S2_PORT 10001
+#define BPF_FILENAME "test_sockmap_kern.o"
+#define CG_PATH "/sockmap"
+
/* global sockets */
int s1, s2, c1, c2, p1, p2;
+int test_cnt;
+int passed;
+int failed;
+int map_fd[8];
+struct bpf_map *maps[8];
+int prog_fd[11];
int txmsg_pass;
int txmsg_noisy;
@@ -107,7 +111,7 @@ static void usage(char *argv[])
printf("\n");
}
-static int sockmap_init_sockets(void)
+static int sockmap_init_sockets(int verbose)
{
int i, err, one = 1;
struct sockaddr_in addr;
@@ -207,9 +211,11 @@ static int sockmap_init_sockets(void)
return errno;
}
- printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
- printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
- c1, s1, c2, s2);
+ if (verbose) {
+ printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
+ printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
+ c1, s1, c2, s2);
+ }
return 0;
}
@@ -226,6 +232,9 @@ struct sockmap_options {
bool sendpage;
bool data_test;
bool drop_expected;
+ int iov_count;
+ int iov_length;
+ int rate;
};
static int msg_loop_sendpage(int fd, int iov_length, int cnt,
@@ -324,17 +333,19 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
clock_gettime(CLOCK_MONOTONIC, &s->end);
} else {
int slct, recv, max_fd = fd;
+ int fd_flags = O_NONBLOCK;
struct timeval timeout;
float total_bytes;
fd_set w;
+ fcntl(fd, fd_flags);
total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
err = clock_gettime(CLOCK_MONOTONIC, &s->start);
if (err < 0)
perror("recv start time: ");
while (s->bytes_recvd < total_bytes) {
- timeout.tv_sec = 1;
- timeout.tv_usec = 0;
+ timeout.tv_sec = 0;
+ timeout.tv_usec = 10;
/* FD sets */
FD_ZERO(&w);
@@ -346,7 +357,8 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
clock_gettime(CLOCK_MONOTONIC, &s->end);
goto out_errno;
} else if (!slct) {
- fprintf(stderr, "unexpected timeout\n");
+ if (opt->verbose)
+ fprintf(stderr, "unexpected timeout\n");
errno = -EIO;
clock_gettime(CLOCK_MONOTONIC, &s->end);
goto out_errno;
@@ -409,12 +421,14 @@ static inline float recvdBps(struct msg_stats s)
return s.bytes_recvd / (s.end.tv_sec - s.start.tv_sec);
}
-static int sendmsg_test(int iov_count, int iov_buf, int cnt,
- struct sockmap_options *opt)
+static int sendmsg_test(struct sockmap_options *opt)
{
float sent_Bps = 0, recvd_Bps = 0;
int rx_fd, txpid, rxpid, err = 0;
struct msg_stats s = {0};
+ int iov_count = opt->iov_count;
+ int iov_buf = opt->iov_length;
+ int cnt = opt->rate;
int status;
errno = 0;
@@ -433,7 +447,7 @@ static int sendmsg_test(int iov_count, int iov_buf, int cnt,
iov_count = 1;
err = msg_loop(rx_fd, iov_count, iov_buf,
cnt, &s, false, opt);
- if (err)
+ if (err && opt->verbose)
fprintf(stderr,
"msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
iov_count, iov_buf, cnt, err);
@@ -443,10 +457,11 @@ static int sendmsg_test(int iov_count, int iov_buf, int cnt,
sent_Bps = sentBps(s);
recvd_Bps = recvdBps(s);
}
- fprintf(stdout,
- "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s\n",
- s.bytes_sent, sent_Bps, sent_Bps/giga,
- s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
+ if (opt->verbose)
+ fprintf(stdout,
+ "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s\n",
+ s.bytes_sent, sent_Bps, sent_Bps/giga,
+ s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
exit(1);
} else if (rxpid == -1) {
perror("msg_loop_rx: ");
@@ -470,10 +485,11 @@ static int sendmsg_test(int iov_count, int iov_buf, int cnt,
sent_Bps = sentBps(s);
recvd_Bps = recvdBps(s);
}
- fprintf(stdout,
- "tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
- s.bytes_sent, sent_Bps, sent_Bps/giga,
- s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
+ if (opt->verbose)
+ fprintf(stdout,
+ "tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
+ s.bytes_sent, sent_Bps, sent_Bps/giga,
+ s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
exit(1);
} else if (txpid == -1) {
perror("msg_loop_tx: ");
@@ -568,102 +584,9 @@ enum {
SENDPAGE,
};
-int main(int argc, char **argv)
+static int run_options(struct sockmap_options *options, int cg_fd, int test)
{
- int iov_count = 1, length = 1024, rate = 1, tx_prog_fd;
- struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
- int opt, longindex, err, cg_fd = 0;
- struct sockmap_options options = {0};
- int test = PING_PONG;
- char filename[256];
-
- while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:",
- long_options, &longindex)) != -1) {
- switch (opt) {
- case 's':
- txmsg_start = atoi(optarg);
- break;
- case 'e':
- txmsg_end = atoi(optarg);
- break;
- case 'a':
- txmsg_apply = atoi(optarg);
- break;
- case 'k':
- txmsg_cork = atoi(optarg);
- break;
- case 'c':
- cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
- if (cg_fd < 0) {
- fprintf(stderr,
- "ERROR: (%i) open cg path failed: %s\n",
- cg_fd, optarg);
- return cg_fd;
- }
- break;
- case 'r':
- rate = atoi(optarg);
- break;
- case 'v':
- options.verbose = 1;
- break;
- case 'i':
- iov_count = atoi(optarg);
- break;
- case 'l':
- length = atoi(optarg);
- break;
- case 'd':
- options.data_test = true;
- break;
- case 't':
- if (strcmp(optarg, "ping") == 0) {
- test = PING_PONG;
- } else if (strcmp(optarg, "sendmsg") == 0) {
- test = SENDMSG;
- } else if (strcmp(optarg, "base") == 0) {
- test = BASE;
- } else if (strcmp(optarg, "base_sendpage") == 0) {
- test = BASE_SENDPAGE;
- } else if (strcmp(optarg, "sendpage") == 0) {
- test = SENDPAGE;
- } else {
- usage(argv);
- return -1;
- }
- break;
- case 0:
- break;
- case 'h':
- default:
- usage(argv);
- return -1;
- }
- }
-
- if (!cg_fd) {
- fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n",
- argv[0]);
- return -1;
- }
-
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
- }
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-
- running = 1;
-
- /* catch SIGINT */
- signal(SIGINT, running_handler);
-
- if (load_bpf_file(filename)) {
- fprintf(stderr, "load_bpf_file: (%s) %s\n",
- filename, strerror(errno));
- return 1;
- }
+ int i, key, next_key, err, tx_prog_fd = -1, zero = 0;
/* If base test skip BPF setup */
if (test == BASE || test == BASE_SENDPAGE)
@@ -673,8 +596,9 @@ int main(int argc, char **argv)
err = bpf_prog_attach(prog_fd[0], map_fd[0],
BPF_SK_SKB_STREAM_PARSER, 0);
if (err) {
- fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n",
- err, strerror(errno));
+ fprintf(stderr,
+ "ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n",
+ prog_fd[0], map_fd[0], err, strerror(errno));
return err;
}
@@ -695,7 +619,7 @@ int main(int argc, char **argv)
}
run:
- err = sockmap_init_sockets();
+ err = sockmap_init_sockets(options->verbose);
if (err) {
fprintf(stderr, "ERROR: test socket failed: %d\n", err);
goto out;
@@ -729,7 +653,7 @@ run:
fprintf(stderr,
"ERROR: bpf_prog_attach (txmsg): %d (%s)\n",
err, strerror(errno));
- return err;
+ goto out;
}
err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY);
@@ -737,7 +661,7 @@ run:
fprintf(stderr,
"ERROR: bpf_map_update_elem (txmsg): %d (%s\n",
err, strerror(errno));
- return err;
+ goto out;
}
if (txmsg_redir || txmsg_redir_noisy)
@@ -750,7 +674,7 @@ run:
fprintf(stderr,
"ERROR: bpf_map_update_elem (txmsg): %d (%s\n",
err, strerror(errno));
- return err;
+ goto out;
}
if (txmsg_apply) {
@@ -760,7 +684,7 @@ run:
fprintf(stderr,
"ERROR: bpf_map_update_elem (apply_bytes): %d (%s\n",
err, strerror(errno));
- return err;
+ goto out;
}
}
@@ -771,7 +695,7 @@ run:
fprintf(stderr,
"ERROR: bpf_map_update_elem (cork_bytes): %d (%s\n",
err, strerror(errno));
- return err;
+ goto out;
}
}
@@ -782,7 +706,7 @@ run:
fprintf(stderr,
"ERROR: bpf_map_update_elem (txmsg_start): %d (%s)\n",
err, strerror(errno));
- return err;
+ goto out;
}
}
@@ -794,7 +718,7 @@ run:
fprintf(stderr,
"ERROR: bpf_map_update_elem (txmsg_end): %d (%s)\n",
err, strerror(errno));
- return err;
+ goto out;
}
}
@@ -832,11 +756,13 @@ run:
}
if (txmsg_skb) {
- int skb_fd = (test == SENDMSG || test == SENDPAGE) ? p2 : p1;
+ int skb_fd = (test == SENDMSG || test == SENDPAGE) ?
+ p2 : p1;
int ingress = BPF_F_INGRESS;
i = 0;
- err = bpf_map_update_elem(map_fd[7], &i, &ingress, BPF_ANY);
+ err = bpf_map_update_elem(map_fd[7],
+ &i, &ingress, BPF_ANY);
if (err) {
fprintf(stderr,
"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
@@ -844,7 +770,8 @@ run:
}
i = 3;
- err = bpf_map_update_elem(map_fd[0], &i, &skb_fd, BPF_ANY);
+ err = bpf_map_update_elem(map_fd[0],
+ &i, &skb_fd, BPF_ANY);
if (err) {
fprintf(stderr,
"ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
@@ -854,36 +781,679 @@ run:
}
if (txmsg_drop)
- options.drop_expected = true;
+ options->drop_expected = true;
if (test == PING_PONG)
- err = forever_ping_pong(rate, &options);
+ err = forever_ping_pong(options->rate, options);
else if (test == SENDMSG) {
- options.base = false;
- options.sendpage = false;
- err = sendmsg_test(iov_count, length, rate, &options);
+ options->base = false;
+ options->sendpage = false;
+ err = sendmsg_test(options);
} else if (test == SENDPAGE) {
- options.base = false;
- options.sendpage = true;
- err = sendmsg_test(iov_count, length, rate, &options);
+ options->base = false;
+ options->sendpage = true;
+ err = sendmsg_test(options);
} else if (test == BASE) {
- options.base = true;
- options.sendpage = false;
- err = sendmsg_test(iov_count, length, rate, &options);
+ options->base = true;
+ options->sendpage = false;
+ err = sendmsg_test(options);
} else if (test == BASE_SENDPAGE) {
- options.base = true;
- options.sendpage = true;
- err = sendmsg_test(iov_count, length, rate, &options);
+ options->base = true;
+ options->sendpage = true;
+ err = sendmsg_test(options);
} else
fprintf(stderr, "unknown test\n");
out:
+ /* Detatch and zero all the maps */
bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS);
+ bpf_prog_detach2(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER);
+ bpf_prog_detach2(prog_fd[1], map_fd[0], BPF_SK_SKB_STREAM_VERDICT);
+ if (tx_prog_fd >= 0)
+ bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT);
+
+ for (i = 0; i < 8; i++) {
+ key = next_key = 0;
+ bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
+ while (bpf_map_get_next_key(map_fd[i], &key, &next_key) == 0) {
+ bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
+ key = next_key;
+ }
+ }
+
close(s1);
close(s2);
close(p1);
close(p2);
close(c1);
close(c2);
+ return err;
+}
+
+static char *test_to_str(int test)
+{
+ switch (test) {
+ case SENDMSG:
+ return "sendmsg";
+ case SENDPAGE:
+ return "sendpage";
+ }
+ return "unknown";
+}
+
+#define OPTSTRING 60
+static void test_options(char *options)
+{
+ memset(options, 0, OPTSTRING);
+
+ if (txmsg_pass)
+ strncat(options, "pass,", OPTSTRING);
+ if (txmsg_noisy)
+ strncat(options, "pass_noisy,", OPTSTRING);
+ if (txmsg_redir)
+ strncat(options, "redir,", OPTSTRING);
+ if (txmsg_redir_noisy)
+ strncat(options, "redir_noisy,", OPTSTRING);
+ if (txmsg_drop)
+ strncat(options, "drop,", OPTSTRING);
+ if (txmsg_apply)
+ strncat(options, "apply,", OPTSTRING);
+ if (txmsg_cork)
+ strncat(options, "cork,", OPTSTRING);
+ if (txmsg_start)
+ strncat(options, "start,", OPTSTRING);
+ if (txmsg_end)
+ strncat(options, "end,", OPTSTRING);
+ if (txmsg_ingress)
+ strncat(options, "ingress,", OPTSTRING);
+ if (txmsg_skb)
+ strncat(options, "skb,", OPTSTRING);
+}
+
+static int __test_exec(int cgrp, int test, struct sockmap_options *opt)
+{
+ char *options = calloc(60, sizeof(char));
+ int err;
+
+ if (test == SENDPAGE)
+ opt->sendpage = true;
+ else
+ opt->sendpage = false;
+
+ if (txmsg_drop)
+ opt->drop_expected = true;
+ else
+ opt->drop_expected = false;
+
+ test_options(options);
+
+ fprintf(stdout,
+ "[TEST %i]: (%i, %i, %i, %s, %s): ",
+ test_cnt, opt->rate, opt->iov_count, opt->iov_length,
+ test_to_str(test), options);
+ fflush(stdout);
+ err = run_options(opt, cgrp, test);
+ fprintf(stdout, "%s\n", !err ? "PASS" : "FAILED");
+ test_cnt++;
+ !err ? passed++ : failed++;
+ free(options);
+ return err;
+}
+
+static int test_exec(int cgrp, struct sockmap_options *opt)
+{
+ int err = __test_exec(cgrp, SENDMSG, opt);
+
+ if (err)
+ goto out;
+
+ err = __test_exec(cgrp, SENDPAGE, opt);
+out:
+ return err;
+}
+
+static int test_loop(int cgrp)
+{
+ struct sockmap_options opt;
+
+ int err, i, l, r;
+
+ opt.verbose = 0;
+ opt.base = false;
+ opt.sendpage = false;
+ opt.data_test = false;
+ opt.drop_expected = false;
+ opt.iov_count = 0;
+ opt.iov_length = 0;
+ opt.rate = 0;
+
+ r = 1;
+ for (i = 1; i < 100; i += 33) {
+ for (l = 1; l < 100; l += 33) {
+ opt.rate = r;
+ opt.iov_count = i;
+ opt.iov_length = l;
+ err = test_exec(cgrp, &opt);
+ if (err)
+ goto out;
+ }
+ }
+ sched_yield();
+out:
+ return err;
+}
+
+static int test_txmsg(int cgrp)
+{
+ int err;
+
+ txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0;
+ txmsg_apply = txmsg_cork = 0;
+ txmsg_ingress = txmsg_skb = 0;
+
+ txmsg_pass = 1;
+ err = test_loop(cgrp);
+ txmsg_pass = 0;
+ if (err)
+ goto out;
+
+ txmsg_redir = 1;
+ err = test_loop(cgrp);
+ txmsg_redir = 0;
+ if (err)
+ goto out;
+
+ txmsg_drop = 1;
+ err = test_loop(cgrp);
+ txmsg_drop = 0;
+ if (err)
+ goto out;
+
+ txmsg_redir = 1;
+ txmsg_ingress = 1;
+ err = test_loop(cgrp);
+ txmsg_redir = 0;
+ txmsg_ingress = 0;
+ if (err)
+ goto out;
+out:
+ txmsg_pass = 0;
+ txmsg_redir = 0;
+ txmsg_drop = 0;
+ return err;
+}
+
+static int test_send(struct sockmap_options *opt, int cgrp)
+{
+ int err;
+
+ opt->iov_length = 1;
+ opt->iov_count = 1;
+ opt->rate = 1;
+ err = test_exec(cgrp, opt);
+ if (err)
+ goto out;
+
+ opt->iov_length = 1;
+ opt->iov_count = 1024;
+ opt->rate = 1;
+ err = test_exec(cgrp, opt);
+ if (err)
+ goto out;
+
+ opt->iov_length = 1024;
+ opt->iov_count = 1;
+ opt->rate = 1;
+ err = test_exec(cgrp, opt);
+ if (err)
+ goto out;
+
+ opt->iov_length = 1;
+ opt->iov_count = 1;
+ opt->rate = 1024;
+ err = test_exec(cgrp, opt);
+ if (err)
+ goto out;
+
+ opt->iov_length = 256;
+ opt->iov_count = 1024;
+ opt->rate = 10;
+ err = test_exec(cgrp, opt);
+ if (err)
+ goto out;
+
+ opt->rate = 100;
+ opt->iov_count = 1;
+ opt->iov_length = 5;
+ err = test_exec(cgrp, opt);
+ if (err)
+ goto out;
+out:
+ sched_yield();
+ return err;
+}
+
+static int test_mixed(int cgrp)
+{
+ struct sockmap_options opt = {0};
+ int err;
+
+ txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0;
+ txmsg_apply = txmsg_cork = 0;
+ txmsg_start = txmsg_end = 0;
+ /* Test small and large iov_count values with pass/redir/apply/cork */
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 1;
+ txmsg_cork = 0;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 0;
+ txmsg_cork = 1;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 1;
+ txmsg_cork = 1;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 1024;
+ txmsg_cork = 0;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 0;
+ txmsg_cork = 1024;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 1024;
+ txmsg_cork = 1024;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_cork = 4096;
+ txmsg_apply = 4096;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_apply = 1;
+ txmsg_cork = 0;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_apply = 0;
+ txmsg_cork = 1;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_apply = 1024;
+ txmsg_cork = 0;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_apply = 0;
+ txmsg_cork = 1024;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_apply = 1024;
+ txmsg_cork = 1024;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_cork = 4096;
+ txmsg_apply = 4096;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+out:
+ return err;
+}
+
+static int test_start_end(int cgrp)
+{
+ struct sockmap_options opt = {0};
+ int err, i;
+
+ /* Test basic start/end with lots of iov_count and iov_lengths */
+ txmsg_start = 1;
+ txmsg_end = 2;
+ err = test_txmsg(cgrp);
+ if (err)
+ goto out;
+
+ /* Test start/end with cork */
+ opt.rate = 16;
+ opt.iov_count = 1;
+ opt.iov_length = 100;
+ txmsg_cork = 1600;
+
+ for (i = 99; i <= 1600; i += 500) {
+ txmsg_start = 0;
+ txmsg_end = i;
+ err = test_exec(cgrp, &opt);
+ if (err)
+ goto out;
+ }
+
+ /* Test start/end with cork but pull data in middle */
+ for (i = 199; i <= 1600; i += 500) {
+ txmsg_start = 100;
+ txmsg_end = i;
+ err = test_exec(cgrp, &opt);
+ if (err)
+ goto out;
+ }
+
+ /* Test start/end with cork pulling last sg entry */
+ txmsg_start = 1500;
+ txmsg_end = 1600;
+ err = test_exec(cgrp, &opt);
+ if (err)
+ goto out;
+
+ /* Test start/end pull of single byte in last page */
+ txmsg_start = 1111;
+ txmsg_end = 1112;
+ err = test_exec(cgrp, &opt);
+ if (err)
+ goto out;
+
+ /* Test start/end with end < start */
+ txmsg_start = 1111;
+ txmsg_end = 0;
+ err = test_exec(cgrp, &opt);
+ if (err)
+ goto out;
+
+ /* Test start/end with end > data */
+ txmsg_start = 0;
+ txmsg_end = 1601;
+ err = test_exec(cgrp, &opt);
+ if (err)
+ goto out;
+
+ /* Test start/end with start > data */
+ txmsg_start = 1601;
+ txmsg_end = 1600;
+ err = test_exec(cgrp, &opt);
+
+out:
+ txmsg_start = 0;
+ txmsg_end = 0;
+ sched_yield();
+ return err;
+}
+
+char *map_names[] = {
+ "sock_map",
+ "sock_map_txmsg",
+ "sock_map_redir",
+ "sock_apply_bytes",
+ "sock_cork_bytes",
+ "sock_pull_bytes",
+ "sock_redir_flags",
+ "sock_skb_opts",
+};
+
+int prog_attach_type[] = {
+ BPF_SK_SKB_STREAM_PARSER,
+ BPF_SK_SKB_STREAM_VERDICT,
+ BPF_CGROUP_SOCK_OPS,
+ BPF_SK_MSG_VERDICT,
+ BPF_SK_MSG_VERDICT,
+ BPF_SK_MSG_VERDICT,
+ BPF_SK_MSG_VERDICT,
+ BPF_SK_MSG_VERDICT,
+ BPF_SK_MSG_VERDICT,
+ BPF_SK_MSG_VERDICT,
+};
+
+int prog_type[] = {
+ BPF_PROG_TYPE_SK_SKB,
+ BPF_PROG_TYPE_SK_SKB,
+ BPF_PROG_TYPE_SOCK_OPS,
+ BPF_PROG_TYPE_SK_MSG,
+ BPF_PROG_TYPE_SK_MSG,
+ BPF_PROG_TYPE_SK_MSG,
+ BPF_PROG_TYPE_SK_MSG,
+ BPF_PROG_TYPE_SK_MSG,
+ BPF_PROG_TYPE_SK_MSG,
+ BPF_PROG_TYPE_SK_MSG,
+};
+
+static int populate_progs(void)
+{
+ char *bpf_file = BPF_FILENAME;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int i = 0;
+ long err;
+
+ obj = bpf_object__open(bpf_file);
+ err = libbpf_get_error(obj);
+ if (err) {
+ char err_buf[256];
+
+ libbpf_strerror(err, err_buf, sizeof(err_buf));
+ printf("Unable to load eBPF objects in file '%s' : %s\n",
+ bpf_file, err_buf);
+ return -1;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ bpf_program__set_type(prog, prog_type[i]);
+ bpf_program__set_expected_attach_type(prog,
+ prog_attach_type[i]);
+ i++;
+ }
+
+ i = bpf_object__load(obj);
+ i = 0;
+ bpf_object__for_each_program(prog, obj) {
+ prog_fd[i] = bpf_program__fd(prog);
+ i++;
+ }
+
+ for (i = 0; i < sizeof(map_fd)/sizeof(int); i++) {
+ maps[i] = bpf_object__find_map_by_name(obj, map_names[i]);
+ map_fd[i] = bpf_map__fd(maps[i]);
+ if (map_fd[i] < 0) {
+ fprintf(stderr, "load_bpf_file: (%i) %s\n",
+ map_fd[i], strerror(errno));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int test_suite(void)
+{
+ int cg_fd, err;
+
+ err = populate_progs();
+ if (err < 0) {
+ fprintf(stderr, "ERROR: (%i) load bpf failed\n", err);
+ return err;
+ }
+
+ if (setup_cgroup_environment()) {
+ fprintf(stderr, "ERROR: cgroup env failed\n");
+ return -EINVAL;
+ }
+
+ cg_fd = create_and_get_cgroup(CG_PATH);
+ if (cg_fd < 0) {
+ fprintf(stderr,
+ "ERROR: (%i) open cg path failed: %s\n",
+ cg_fd, optarg);
+ return cg_fd;
+ }
+
+ /* Tests basic commands and APIs with range of iov values */
+ txmsg_start = txmsg_end = 0;
+ err = test_txmsg(cg_fd);
+ if (err)
+ goto out;
+
+ /* Tests interesting combinations of APIs used together */
+ err = test_mixed(cg_fd);
+ if (err)
+ goto out;
+
+ /* Tests pull_data API using start/end API */
+ err = test_start_end(cg_fd);
+ if (err)
+ goto out;
+
+out:
+ printf("Summary: %i PASSED %i FAILED\n", passed, failed);
+ close(cg_fd);
+ return err;
+}
+
+int main(int argc, char **argv)
+{
+ struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
+ int iov_count = 1, length = 1024, rate = 1;
+ struct sockmap_options options = {0};
+ int opt, longindex, err, cg_fd = 0;
+ char *bpf_file = BPF_FILENAME;
+ int test = PING_PONG;
+
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
+
+ if (argc < 2)
+ return test_suite();
+
+ while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:",
+ long_options, &longindex)) != -1) {
+ switch (opt) {
+ case 's':
+ txmsg_start = atoi(optarg);
+ break;
+ case 'e':
+ txmsg_end = atoi(optarg);
+ break;
+ case 'a':
+ txmsg_apply = atoi(optarg);
+ break;
+ case 'k':
+ txmsg_cork = atoi(optarg);
+ break;
+ case 'c':
+ cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
+ if (cg_fd < 0) {
+ fprintf(stderr,
+ "ERROR: (%i) open cg path failed: %s\n",
+ cg_fd, optarg);
+ return cg_fd;
+ }
+ break;
+ case 'r':
+ rate = atoi(optarg);
+ break;
+ case 'v':
+ options.verbose = 1;
+ break;
+ case 'i':
+ iov_count = atoi(optarg);
+ break;
+ case 'l':
+ length = atoi(optarg);
+ break;
+ case 'd':
+ options.data_test = true;
+ break;
+ case 't':
+ if (strcmp(optarg, "ping") == 0) {
+ test = PING_PONG;
+ } else if (strcmp(optarg, "sendmsg") == 0) {
+ test = SENDMSG;
+ } else if (strcmp(optarg, "base") == 0) {
+ test = BASE;
+ } else if (strcmp(optarg, "base_sendpage") == 0) {
+ test = BASE_SENDPAGE;
+ } else if (strcmp(optarg, "sendpage") == 0) {
+ test = SENDPAGE;
+ } else {
+ usage(argv);
+ return -1;
+ }
+ break;
+ case 0:
+ break;
+ case 'h':
+ default:
+ usage(argv);
+ return -1;
+ }
+ }
+
+ if (!cg_fd) {
+ fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n",
+ argv[0]);
+ return -1;
+ }
+
+ err = populate_progs();
+ if (err) {
+ fprintf(stderr, "populate program: (%s) %s\n",
+ bpf_file, strerror(errno));
+ return 1;
+ }
+ running = 1;
+
+ /* catch SIGINT */
+ signal(SIGINT, running_handler);
+
+ options.iov_count = iov_count;
+ options.iov_length = length;
+ options.rate = rate;
+
+ err = run_options(&options, cg_fd, test);
close(cg_fd);
return err;
}
diff --git a/samples/sockmap/sockmap_kern.c b/tools/testing/selftests/bpf/test_sockmap_kern.c
index 9ff8bc5dc206..33de97e2b6b6 100644
--- a/samples/sockmap/sockmap_kern.c
+++ b/tools/testing/selftests/bpf/test_sockmap_kern.c
@@ -1,20 +1,19 @@
-/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
-#include <uapi/linux/bpf.h>
-#include <uapi/linux/if_ether.h>
-#include <uapi/linux/if_packet.h>
-#include <uapi/linux/ip.h>
-#include "../../tools/testing/selftests/bpf/bpf_helpers.h"
-#include "../../tools/testing/selftests/bpf/bpf_endian.h"
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include <sys/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
/* Sockmap sample program connects a client and a backend together
* using cgroups.
@@ -337,5 +336,5 @@ int bpf_prog10(struct sk_msg_md *msg)
return SK_DROP;
}
-
+int _version SEC("version") = 1;
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
new file mode 100755
index 000000000000..aeb2901f21f4
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tunnel.sh
@@ -0,0 +1,729 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# End-to-end eBPF tunnel test suite
+# The script tests BPF network tunnel implementation.
+#
+# Topology:
+# ---------
+# root namespace | at_ns0 namespace
+# |
+# ----------- | -----------
+# | tnl dev | | | tnl dev | (overlay network)
+# ----------- | -----------
+# metadata-mode | native-mode
+# with bpf |
+# |
+# ---------- | ----------
+# | veth1 | --------- | veth0 | (underlay network)
+# ---------- peer ----------
+#
+#
+# Device Configuration
+# --------------------
+# Root namespace with metadata-mode tunnel + BPF
+# Device names and addresses:
+# veth1 IP: 172.16.1.200, IPv6: 00::22 (underlay)
+# tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200 (overlay)
+#
+# Namespace at_ns0 with native tunnel
+# Device names and addresses:
+# veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay)
+# tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100 (overlay)
+#
+#
+# End-to-end ping packet flow
+# ---------------------------
+# Most of the tests start by namespace creation, device configuration,
+# then ping the underlay and overlay network. When doing 'ping 10.1.1.100'
+# from root namespace, the following operations happen:
+# 1) Route lookup shows 10.1.1.100/24 belongs to tnl dev, fwd to tnl dev.
+# 2) Tnl device's egress BPF program is triggered and set the tunnel metadata,
+# with remote_ip=172.16.1.200 and others.
+# 3) Outer tunnel header is prepended and route the packet to veth1's egress
+# 4) veth0's ingress queue receive the tunneled packet at namespace at_ns0
+# 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet
+# 6) Forward the packet to the overlay tnl dev
+
+PING_ARG="-c 3 -w 10 -q"
+ret=0
+GREEN='\033[0;92m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+config_device()
+{
+ ip netns add at_ns0
+ ip link add veth0 type veth peer name veth1
+ ip link set veth0 netns at_ns0
+ ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
+ ip netns exec at_ns0 ip link set dev veth0 up
+ ip link set dev veth1 up mtu 1500
+ ip addr add dev veth1 172.16.1.200/24
+}
+
+add_gre_tunnel()
+{
+ # at_ns0 namespace
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE seq key 2 \
+ local 172.16.1.100 remote 172.16.1.200
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+ # root namespace
+ ip link add dev $DEV type $TYPE key 2 external
+ ip link set dev $DEV up
+ ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ip6gretap_tunnel()
+{
+
+ # assign ipv6 address
+ ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+ ip netns exec at_ns0 ip link set dev veth0 up
+ ip addr add dev veth1 ::22/96
+ ip link set dev veth1 up
+
+ # at_ns0 namespace
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE seq flowlabel 0xbcdef key 2 \
+ local ::11 remote ::22
+
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+ ip netns exec at_ns0 ip addr add dev $DEV_NS fc80::100/96
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+ # root namespace
+ ip link add dev $DEV type $TYPE external
+ ip addr add dev $DEV 10.1.1.200/24
+ ip addr add dev $DEV fc80::200/24
+ ip link set dev $DEV up
+}
+
+add_erspan_tunnel()
+{
+ # at_ns0 namespace
+ if [ "$1" == "v1" ]; then
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE seq key 2 \
+ local 172.16.1.100 remote 172.16.1.200 \
+ erspan_ver 1 erspan 123
+ else
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE seq key 2 \
+ local 172.16.1.100 remote 172.16.1.200 \
+ erspan_ver 2 erspan_dir egress erspan_hwid 3
+ fi
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+ # root namespace
+ ip link add dev $DEV type $TYPE external
+ ip link set dev $DEV up
+ ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ip6erspan_tunnel()
+{
+
+ # assign ipv6 address
+ ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+ ip netns exec at_ns0 ip link set dev veth0 up
+ ip addr add dev veth1 ::22/96
+ ip link set dev veth1 up
+
+ # at_ns0 namespace
+ if [ "$1" == "v1" ]; then
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE seq key 2 \
+ local ::11 remote ::22 \
+ erspan_ver 1 erspan 123
+ else
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE seq key 2 \
+ local ::11 remote ::22 \
+ erspan_ver 2 erspan_dir egress erspan_hwid 7
+ fi
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+ # root namespace
+ ip link add dev $DEV type $TYPE external
+ ip addr add dev $DEV 10.1.1.200/24
+ ip link set dev $DEV up
+}
+
+add_vxlan_tunnel()
+{
+ # Set static ARP entry here because iptables set-mark works
+ # on L3 packet, as a result not applying to ARP packets,
+ # causing errors at get_tunnel_{key/opt}.
+
+ # at_ns0 namespace
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE \
+ id 2 dstport 4789 gbp remote 172.16.1.200
+ ip netns exec at_ns0 \
+ ip link set dev $DEV_NS address 52:54:00:d9:01:00 up
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+ ip netns exec at_ns0 arp -s 10.1.1.200 52:54:00:d9:02:00
+ ip netns exec at_ns0 iptables -A OUTPUT -j MARK --set-mark 0x800FF
+
+ # root namespace
+ ip link add dev $DEV type $TYPE external gbp dstport 4789
+ ip link set dev $DEV address 52:54:00:d9:02:00 up
+ ip addr add dev $DEV 10.1.1.200/24
+ arp -s 10.1.1.100 52:54:00:d9:01:00
+}
+
+add_ip6vxlan_tunnel()
+{
+ #ip netns exec at_ns0 ip -4 addr del 172.16.1.100 dev veth0
+ ip netns exec at_ns0 ip -6 addr add ::11/96 dev veth0
+ ip netns exec at_ns0 ip link set dev veth0 up
+ #ip -4 addr del 172.16.1.200 dev veth1
+ ip -6 addr add dev veth1 ::22/96
+ ip link set dev veth1 up
+
+ # at_ns0 namespace
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE id 22 dstport 4789 \
+ local ::11 remote ::22
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+ # root namespace
+ ip link add dev $DEV type $TYPE external dstport 4789
+ ip addr add dev $DEV 10.1.1.200/24
+ ip link set dev $DEV up
+}
+
+add_geneve_tunnel()
+{
+ # at_ns0 namespace
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE \
+ id 2 dstport 6081 remote 172.16.1.200
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+ # root namespace
+ ip link add dev $DEV type $TYPE dstport 6081 external
+ ip link set dev $DEV up
+ ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ip6geneve_tunnel()
+{
+ ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+ ip netns exec at_ns0 ip link set dev veth0 up
+ ip addr add dev veth1 ::22/96
+ ip link set dev veth1 up
+
+ # at_ns0 namespace
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE id 22 \
+ remote ::22 # geneve has no local option
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+ # root namespace
+ ip link add dev $DEV type $TYPE external
+ ip addr add dev $DEV 10.1.1.200/24
+ ip link set dev $DEV up
+}
+
+add_ipip_tunnel()
+{
+ # at_ns0 namespace
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE \
+ local 172.16.1.100 remote 172.16.1.200
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+ # root namespace
+ ip link add dev $DEV type $TYPE external
+ ip link set dev $DEV up
+ ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ipip6tnl_tunnel()
+{
+ ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+ ip netns exec at_ns0 ip link set dev veth0 up
+ ip addr add dev veth1 ::22/96
+ ip link set dev veth1 up
+
+ # at_ns0 namespace
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE \
+ local ::11 remote ::22
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+ # root namespace
+ ip link add dev $DEV type $TYPE external
+ ip addr add dev $DEV 10.1.1.200/24
+ ip link set dev $DEV up
+}
+
+test_gre()
+{
+ TYPE=gretap
+ DEV_NS=gretap00
+ DEV=gretap11
+ ret=0
+
+ check $TYPE
+ config_device
+ add_gre_tunnel
+ attach_bpf $DEV gre_set_tunnel gre_get_tunnel
+ ping $PING_ARG 10.1.1.100
+ check_err $?
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: $TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6gre()
+{
+ TYPE=ip6gre
+ DEV_NS=ip6gre00
+ DEV=ip6gre11
+ ret=0
+
+ check $TYPE
+ config_device
+ # reuse the ip6gretap function
+ add_ip6gretap_tunnel
+ attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
+ # underlay
+ ping6 $PING_ARG ::11
+ # overlay: ipv4 over ipv6
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ ping $PING_ARG 10.1.1.100
+ check_err $?
+ # overlay: ipv6 over ipv6
+ ip netns exec at_ns0 ping6 $PING_ARG fc80::200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: $TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6gretap()
+{
+ TYPE=ip6gretap
+ DEV_NS=ip6gretap00
+ DEV=ip6gretap11
+ ret=0
+
+ check $TYPE
+ config_device
+ add_ip6gretap_tunnel
+ attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
+ # underlay
+ ping6 $PING_ARG ::11
+ # overlay: ipv4 over ipv6
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ ping $PING_ARG 10.1.1.100
+ check_err $?
+ # overlay: ipv6 over ipv6
+ ip netns exec at_ns0 ping6 $PING_ARG fc80::200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: $TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_erspan()
+{
+ TYPE=erspan
+ DEV_NS=erspan00
+ DEV=erspan11
+ ret=0
+
+ check $TYPE
+ config_device
+ add_erspan_tunnel $1
+ attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel
+ ping $PING_ARG 10.1.1.100
+ check_err $?
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: $TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6erspan()
+{
+ TYPE=ip6erspan
+ DEV_NS=ip6erspan00
+ DEV=ip6erspan11
+ ret=0
+
+ check $TYPE
+ config_device
+ add_ip6erspan_tunnel $1
+ attach_bpf $DEV ip4ip6erspan_set_tunnel ip4ip6erspan_get_tunnel
+ ping6 $PING_ARG ::11
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: $TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_vxlan()
+{
+ TYPE=vxlan
+ DEV_NS=vxlan00
+ DEV=vxlan11
+ ret=0
+
+ check $TYPE
+ config_device
+ add_vxlan_tunnel
+ attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel
+ ping $PING_ARG 10.1.1.100
+ check_err $?
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: $TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6vxlan()
+{
+ TYPE=vxlan
+ DEV_NS=ip6vxlan00
+ DEV=ip6vxlan11
+ ret=0
+
+ check $TYPE
+ config_device
+ add_ip6vxlan_tunnel
+ ip link set dev veth1 mtu 1500
+ attach_bpf $DEV ip6vxlan_set_tunnel ip6vxlan_get_tunnel
+ # underlay
+ ping6 $PING_ARG ::11
+ # ip4 over ip6
+ ping $PING_ARG 10.1.1.100
+ check_err $?
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: ip6$TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
+}
+
+test_geneve()
+{
+ TYPE=geneve
+ DEV_NS=geneve00
+ DEV=geneve11
+ ret=0
+
+ check $TYPE
+ config_device
+ add_geneve_tunnel
+ attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel
+ ping $PING_ARG 10.1.1.100
+ check_err $?
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: $TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6geneve()
+{
+ TYPE=geneve
+ DEV_NS=ip6geneve00
+ DEV=ip6geneve11
+ ret=0
+
+ check $TYPE
+ config_device
+ add_ip6geneve_tunnel
+ attach_bpf $DEV ip6geneve_set_tunnel ip6geneve_get_tunnel
+ ping $PING_ARG 10.1.1.100
+ check_err $?
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: ip6$TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
+}
+
+test_ipip()
+{
+ TYPE=ipip
+ DEV_NS=ipip00
+ DEV=ipip11
+ ret=0
+
+ check $TYPE
+ config_device
+ add_ipip_tunnel
+ ip link set dev veth1 mtu 1500
+ attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel
+ ping $PING_ARG 10.1.1.100
+ check_err $?
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: $TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ipip6()
+{
+ TYPE=ip6tnl
+ DEV_NS=ipip6tnl00
+ DEV=ipip6tnl11
+ ret=0
+
+ check $TYPE
+ config_device
+ add_ipip6tnl_tunnel
+ ip link set dev veth1 mtu 1500
+ attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel
+ # underlay
+ ping6 $PING_ARG ::11
+ # ip4 over ip6
+ ping $PING_ARG 10.1.1.100
+ check_err $?
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: $TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+setup_xfrm_tunnel()
+{
+ auth=0x$(printf '1%.0s' {1..40})
+ enc=0x$(printf '2%.0s' {1..32})
+ spi_in_to_out=0x1
+ spi_out_to_in=0x2
+ # at_ns0 namespace
+ # at_ns0 -> root
+ ip netns exec at_ns0 \
+ ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
+ spi $spi_in_to_out reqid 1 mode tunnel \
+ auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
+ ip netns exec at_ns0 \
+ ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir out \
+ tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
+ mode tunnel
+ # root -> at_ns0
+ ip netns exec at_ns0 \
+ ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
+ spi $spi_out_to_in reqid 2 mode tunnel \
+ auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
+ ip netns exec at_ns0 \
+ ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir in \
+ tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
+ mode tunnel
+ # address & route
+ ip netns exec at_ns0 \
+ ip addr add dev veth0 10.1.1.100/32
+ ip netns exec at_ns0 \
+ ip route add 10.1.1.200 dev veth0 via 172.16.1.200 \
+ src 10.1.1.100
+
+ # root namespace
+ # at_ns0 -> root
+ ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
+ spi $spi_in_to_out reqid 1 mode tunnel \
+ auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
+ ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir in \
+ tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
+ mode tunnel
+ # root -> at_ns0
+ ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
+ spi $spi_out_to_in reqid 2 mode tunnel \
+ auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
+ ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir out \
+ tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
+ mode tunnel
+ # address & route
+ ip addr add dev veth1 10.1.1.200/32
+ ip route add 10.1.1.100 dev veth1 via 172.16.1.100 src 10.1.1.200
+}
+
+test_xfrm_tunnel()
+{
+ config_device
+ #tcpdump -nei veth1 ip &
+ output=$(mktemp)
+ cat /sys/kernel/debug/tracing/trace_pipe | tee $output &
+ setup_xfrm_tunnel
+ tc qdisc add dev veth1 clsact
+ tc filter add dev veth1 proto ip ingress bpf da obj test_tunnel_kern.o \
+ sec xfrm_get_state
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ sleep 1
+ grep "reqid 1" $output
+ check_err $?
+ grep "spi 0x1" $output
+ check_err $?
+ grep "remote ip 0xac100164" $output
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: xfrm tunnel"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: xfrm tunnel"${NC}
+}
+
+attach_bpf()
+{
+ DEV=$1
+ SET=$2
+ GET=$3
+ tc qdisc add dev $DEV clsact
+ tc filter add dev $DEV egress bpf da obj test_tunnel_kern.o sec $SET
+ tc filter add dev $DEV ingress bpf da obj test_tunnel_kern.o sec $GET
+}
+
+cleanup()
+{
+ ip netns delete at_ns0 2> /dev/null
+ ip link del veth1 2> /dev/null
+ ip link del ipip11 2> /dev/null
+ ip link del ipip6tnl11 2> /dev/null
+ ip link del gretap11 2> /dev/null
+ ip link del ip6gre11 2> /dev/null
+ ip link del ip6gretap11 2> /dev/null
+ ip link del vxlan11 2> /dev/null
+ ip link del ip6vxlan11 2> /dev/null
+ ip link del geneve11 2> /dev/null
+ ip link del ip6geneve11 2> /dev/null
+ ip link del erspan11 2> /dev/null
+ ip link del ip6erspan11 2> /dev/null
+}
+
+cleanup_exit()
+{
+ echo "CATCH SIGKILL or SIGINT, cleanup and exit"
+ cleanup
+ exit 0
+}
+
+check()
+{
+ ip link help $1 2>&1 | grep -q "^Usage:"
+ if [ $? -ne 0 ];then
+ echo "SKIP $1: iproute2 not support"
+ cleanup
+ return 1
+ fi
+}
+
+enable_debug()
+{
+ echo 'file ip_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
+ echo 'file ip6_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
+ echo 'file vxlan.c +p' > /sys/kernel/debug/dynamic_debug/control
+ echo 'file geneve.c +p' > /sys/kernel/debug/dynamic_debug/control
+ echo 'file ipip.c +p' > /sys/kernel/debug/dynamic_debug/control
+}
+
+check_err()
+{
+ if [ $ret -eq 0 ]; then
+ ret=$1
+ fi
+}
+
+bpf_tunnel_test()
+{
+ echo "Testing GRE tunnel..."
+ test_gre
+ echo "Testing IP6GRE tunnel..."
+ test_ip6gre
+ echo "Testing IP6GRETAP tunnel..."
+ test_ip6gretap
+ echo "Testing ERSPAN tunnel..."
+ test_erspan v2
+ echo "Testing IP6ERSPAN tunnel..."
+ test_ip6erspan v2
+ echo "Testing VXLAN tunnel..."
+ test_vxlan
+ echo "Testing IP6VXLAN tunnel..."
+ test_ip6vxlan
+ echo "Testing GENEVE tunnel..."
+ test_geneve
+ echo "Testing IP6GENEVE tunnel..."
+ test_ip6geneve
+ echo "Testing IPIP tunnel..."
+ test_ipip
+ echo "Testing IPIP6 tunnel..."
+ test_ipip6
+ echo "Testing IPSec tunnel..."
+ test_xfrm_tunnel
+}
+
+trap cleanup 0 3 6
+trap cleanup_exit 2 9
+
+cleanup
+bpf_tunnel_test
+
+exit 0
diff --git a/samples/bpf/tcbpf2_kern.c b/tools/testing/selftests/bpf/test_tunnel_kern.c
index 9a8db7bd6db4..504df69c83df 100644
--- a/samples/bpf/tcbpf2_kern.c
+++ b/tools/testing/selftests/bpf/test_tunnel_kern.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2016 VMware
* Copyright (c) 2016 Facebook
*
@@ -5,39 +6,41 @@
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
-#define KBUILD_MODNAME "foo"
-#include <uapi/linux/bpf.h>
-#include <uapi/linux/if_ether.h>
-#include <uapi/linux/if_packet.h>
-#include <uapi/linux/ip.h>
-#include <uapi/linux/ipv6.h>
-#include <uapi/linux/in.h>
-#include <uapi/linux/tcp.h>
-#include <uapi/linux/filter.h>
-#include <uapi/linux/pkt_cls.h>
-#include <uapi/linux/erspan.h>
-#include <net/ipv6.h>
+#include <stddef.h>
+#include <string.h>
+#include <arpa/inet.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/types.h>
+#include <linux/tcp.h>
+#include <linux/socket.h>
+#include <linux/pkt_cls.h>
+#include <linux/erspan.h>
#include "bpf_helpers.h"
#include "bpf_endian.h"
-#define _htonl __builtin_bswap32
#define ERROR(ret) do {\
char fmt[] = "ERROR line:%d ret:%d\n";\
bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \
- } while(0)
+ } while (0)
+
+int _version SEC("version") = 1;
struct geneve_opt {
__be16 opt_class;
- u8 type;
- u8 length:5;
- u8 r3:1;
- u8 r2:1;
- u8 r1:1;
- u8 opt_data[8]; /* hard-coded to 8 byte */
+ __u8 type;
+ __u8 length:5;
+ __u8 r3:1;
+ __u8 r2:1;
+ __u8 r1:1;
+ __u8 opt_data[8]; /* hard-coded to 8 byte */
};
struct vxlan_metadata {
- u32 gbp;
+ __u32 gbp;
};
SEC("gre_set_tunnel")
@@ -86,7 +89,7 @@ int _ip6gretap_set_tunnel(struct __sk_buff *skb)
int ret;
__builtin_memset(&key, 0x0, sizeof(key));
- key.remote_ipv6[3] = _htonl(0x11); /* ::11 */
+ key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
key.tunnel_id = 2;
key.tunnel_tos = 0;
key.tunnel_ttl = 64;
@@ -136,7 +139,8 @@ int _erspan_set_tunnel(struct __sk_buff *skb)
key.tunnel_tos = 0;
key.tunnel_ttl = 64;
- ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX);
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_ZERO_CSUM_TX);
if (ret < 0) {
ERROR(ret);
return TC_ACT_SHOT;
@@ -147,8 +151,8 @@ int _erspan_set_tunnel(struct __sk_buff *skb)
md.version = 1;
md.u.index = bpf_htonl(123);
#else
- u8 direction = 1;
- u8 hwid = 7;
+ __u8 direction = 1;
+ __u8 hwid = 7;
md.version = 2;
md.u.md2.dir = direction;
@@ -171,7 +175,7 @@ int _erspan_get_tunnel(struct __sk_buff *skb)
char fmt[] = "key %d remote ip 0x%x erspan version %d\n";
struct bpf_tunnel_key key;
struct erspan_metadata md;
- u32 index;
+ __u32 index;
int ret;
ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
@@ -214,7 +218,7 @@ int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
int ret;
__builtin_memset(&key, 0x0, sizeof(key));
- key.remote_ipv6[3] = _htonl(0x11);
+ key.remote_ipv6[3] = bpf_htonl(0x11);
key.tunnel_id = 2;
key.tunnel_tos = 0;
key.tunnel_ttl = 64;
@@ -229,11 +233,11 @@ int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
__builtin_memset(&md, 0, sizeof(md));
#ifdef ERSPAN_V1
- md.u.index = htonl(123);
+ md.u.index = bpf_htonl(123);
md.version = 1;
#else
- u8 direction = 0;
- u8 hwid = 17;
+ __u8 direction = 0;
+ __u8 hwid = 17;
md.version = 2;
md.u.md2.dir = direction;
@@ -256,10 +260,11 @@ int _ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
char fmt[] = "ip6erspan get key %d remote ip6 ::%x erspan version %d\n";
struct bpf_tunnel_key key;
struct erspan_metadata md;
- u32 index;
+ __u32 index;
int ret;
- ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6);
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
if (ret < 0) {
ERROR(ret);
return TC_ACT_SHOT;
@@ -304,7 +309,8 @@ int _vxlan_set_tunnel(struct __sk_buff *skb)
key.tunnel_tos = 0;
key.tunnel_ttl = 64;
- ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX);
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_ZERO_CSUM_TX);
if (ret < 0) {
ERROR(ret);
return TC_ACT_SHOT;
@@ -346,6 +352,48 @@ int _vxlan_get_tunnel(struct __sk_buff *skb)
return TC_ACT_OK;
}
+SEC("ip6vxlan_set_tunnel")
+int _ip6vxlan_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key;
+ int ret;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+ key.tunnel_id = 22;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("ip6vxlan_get_tunnel")
+int _ip6vxlan_get_tunnel(struct __sk_buff *skb)
+{
+ char fmt[] = "key %d remote ip6 ::%x label %x\n";
+ struct bpf_tunnel_key key;
+ int ret;
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
+
+ return TC_ACT_OK;
+}
+
SEC("geneve_set_tunnel")
int _geneve_set_tunnel(struct __sk_buff *skb)
{
@@ -360,15 +408,16 @@ int _geneve_set_tunnel(struct __sk_buff *skb)
key.tunnel_ttl = 64;
__builtin_memset(&gopt, 0x0, sizeof(gopt));
- gopt.opt_class = 0x102; /* Open Virtual Networking (OVN) */
+ gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
gopt.type = 0x08;
gopt.r1 = 0;
gopt.r2 = 0;
gopt.r3 = 0;
gopt.length = 2; /* 4-byte multiple */
- *(int *) &gopt.opt_data = 0xdeadbeef;
+ *(int *) &gopt.opt_data = bpf_htonl(0xdeadbeef);
- ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX);
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_ZERO_CSUM_TX);
if (ret < 0) {
ERROR(ret);
return TC_ACT_SHOT;
@@ -408,6 +457,71 @@ int _geneve_get_tunnel(struct __sk_buff *skb)
return TC_ACT_OK;
}
+SEC("ip6geneve_set_tunnel")
+int _ip6geneve_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key;
+ struct geneve_opt gopt;
+ int ret;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+ key.tunnel_id = 22;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ __builtin_memset(&gopt, 0x0, sizeof(gopt));
+ gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
+ gopt.type = 0x08;
+ gopt.r1 = 0;
+ gopt.r2 = 0;
+ gopt.r3 = 0;
+ gopt.length = 2; /* 4-byte multiple */
+ *(int *) &gopt.opt_data = bpf_htonl(0xfeedbeef);
+
+ ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("ip6geneve_get_tunnel")
+int _ip6geneve_get_tunnel(struct __sk_buff *skb)
+{
+ char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
+ struct bpf_tunnel_key key;
+ struct geneve_opt gopt;
+ int ret;
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ key.tunnel_id, key.remote_ipv4, gopt.opt_class);
+
+ return TC_ACT_OK;
+}
+
SEC("ipip_set_tunnel")
int _ipip_set_tunnel(struct __sk_buff *skb)
{
@@ -431,9 +545,9 @@ int _ipip_set_tunnel(struct __sk_buff *skb)
if (iph->protocol != IPPROTO_TCP || iph->ihl != 5)
return TC_ACT_SHOT;
- if (tcp->dest == htons(5200))
+ if (tcp->dest == bpf_htons(5200))
key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
- else if (tcp->dest == htons(5201))
+ else if (tcp->dest == bpf_htons(5201))
key.remote_ipv4 = 0xac100165; /* 172.16.1.101 */
else
return TC_ACT_SHOT;
@@ -481,28 +595,12 @@ int _ipip6_set_tunnel(struct __sk_buff *skb)
return TC_ACT_SHOT;
}
- key.remote_ipv6[0] = _htonl(0x2401db00);
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
key.tunnel_ttl = 64;
- if (iph->protocol == IPPROTO_ICMP) {
- key.remote_ipv6[3] = _htonl(1);
- } else {
- if (iph->protocol != IPPROTO_TCP || iph->ihl != 5) {
- ERROR(iph->protocol);
- return TC_ACT_SHOT;
- }
-
- if (tcp->dest == htons(5200)) {
- key.remote_ipv6[3] = _htonl(1);
- } else if (tcp->dest == htons(5201)) {
- key.remote_ipv6[3] = _htonl(2);
- } else {
- ERROR(tcp->dest);
- return TC_ACT_SHOT;
- }
- }
-
- ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6);
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
if (ret < 0) {
ERROR(ret);
return TC_ACT_SHOT;
@@ -518,14 +616,15 @@ int _ipip6_get_tunnel(struct __sk_buff *skb)
struct bpf_tunnel_key key;
char fmt[] = "remote ip6 %x::%x\n";
- ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6);
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
if (ret < 0) {
ERROR(ret);
return TC_ACT_SHOT;
}
- bpf_trace_printk(fmt, sizeof(fmt), _htonl(key.remote_ipv6[0]),
- _htonl(key.remote_ipv6[3]));
+ bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]),
+ bpf_htonl(key.remote_ipv6[3]));
return TC_ACT_OK;
}
@@ -545,28 +644,29 @@ int _ip6ip6_set_tunnel(struct __sk_buff *skb)
return TC_ACT_SHOT;
}
- key.remote_ipv6[0] = _htonl(0x2401db00);
+ key.remote_ipv6[0] = bpf_htonl(0x2401db00);
key.tunnel_ttl = 64;
- if (iph->nexthdr == NEXTHDR_ICMP) {
- key.remote_ipv6[3] = _htonl(1);
+ if (iph->nexthdr == 58 /* NEXTHDR_ICMP */) {
+ key.remote_ipv6[3] = bpf_htonl(1);
} else {
- if (iph->nexthdr != NEXTHDR_TCP) {
+ if (iph->nexthdr != 6 /* NEXTHDR_TCP */) {
ERROR(iph->nexthdr);
return TC_ACT_SHOT;
}
- if (tcp->dest == htons(5200)) {
- key.remote_ipv6[3] = _htonl(1);
- } else if (tcp->dest == htons(5201)) {
- key.remote_ipv6[3] = _htonl(2);
+ if (tcp->dest == bpf_htons(5200)) {
+ key.remote_ipv6[3] = bpf_htonl(1);
+ } else if (tcp->dest == bpf_htons(5201)) {
+ key.remote_ipv6[3] = bpf_htonl(2);
} else {
ERROR(tcp->dest);
return TC_ACT_SHOT;
}
}
- ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6);
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
if (ret < 0) {
ERROR(ret);
return TC_ACT_SHOT;
@@ -582,14 +682,31 @@ int _ip6ip6_get_tunnel(struct __sk_buff *skb)
struct bpf_tunnel_key key;
char fmt[] = "remote ip6 %x::%x\n";
- ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6);
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
if (ret < 0) {
ERROR(ret);
return TC_ACT_SHOT;
}
- bpf_trace_printk(fmt, sizeof(fmt), _htonl(key.remote_ipv6[0]),
- _htonl(key.remote_ipv6[3]));
+ bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]),
+ bpf_htonl(key.remote_ipv6[3]));
+ return TC_ACT_OK;
+}
+
+SEC("xfrm_get_state")
+int _xfrm_get_state(struct __sk_buff *skb)
+{
+ struct bpf_xfrm_state x;
+ char fmt[] = "reqid %d spi 0x%x remote ip 0x%x\n";
+ int ret;
+
+ ret = bpf_skb_get_xfrm_state(skb, 0, &x, sizeof(x), 0);
+ if (ret < 0)
+ return TC_ACT_OK;
+
+ bpf_trace_printk(fmt, sizeof(fmt), x.reqid, bpf_ntohl(x.spi),
+ bpf_ntohl(x.remote_ipv4));
return TC_ACT_OK;
}
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 3e7718b1a9ae..165e9ddfa446 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -64,6 +64,7 @@ struct bpf_test {
struct bpf_insn insns[MAX_INSNS];
int fixup_map1[MAX_FIXUPS];
int fixup_map2[MAX_FIXUPS];
+ int fixup_map3[MAX_FIXUPS];
int fixup_prog[MAX_FIXUPS];
int fixup_map_in_map[MAX_FIXUPS];
const char *errstr;
@@ -88,6 +89,11 @@ struct test_val {
int foo[MAX_ENTRIES];
};
+struct other_val {
+ long long foo;
+ long long bar;
+};
+
static struct bpf_test tests[] = {
{
"add+sub+mul",
@@ -5594,6 +5600,257 @@ static struct bpf_test tests[] = {
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
{
+ "map lookup helper access to map",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 8 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map update helper access to map",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_update_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 10 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map update helper access to map: wrong size",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_update_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .fixup_map3 = { 10 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=8 off=0 size=16",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map helper access to adjusted map (via const imm)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2,
+ offsetof(struct other_val, bar)),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 9 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map helper access to adjusted map (via const imm): out-of-bound 1",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2,
+ sizeof(struct other_val) - 4),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 9 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=16 off=12 size=8",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map helper access to adjusted map (via const imm): out-of-bound 2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 9 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=16 off=-4 size=8",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map helper access to adjusted map (via const reg)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_3,
+ offsetof(struct other_val, bar)),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 10 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map helper access to adjusted map (via const reg): out-of-bound 1",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_3,
+ sizeof(struct other_val) - 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 10 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=16 off=12 size=8",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map helper access to adjusted map (via const reg): out-of-bound 2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_3, -4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 10 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=16 off=-4 size=8",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map helper access to adjusted map (via variable)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
+ offsetof(struct other_val, bar), 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 11 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map helper access to adjusted map (via variable): no max check",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 10 },
+ .result = REJECT,
+ .errstr = "R2 unbounded memory access, make sure to bounds check any array access into a map",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map helper access to adjusted map (via variable): wrong max check",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
+ offsetof(struct other_val, bar) + 1, 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 11 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=16 off=9 size=8",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
"map element value is preserved across register spilling",
.insns = {
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@ -11533,6 +11790,7 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
{
int *fixup_map1 = test->fixup_map1;
int *fixup_map2 = test->fixup_map2;
+ int *fixup_map3 = test->fixup_map3;
int *fixup_prog = test->fixup_prog;
int *fixup_map_in_map = test->fixup_map_in_map;
@@ -11556,6 +11814,14 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
} while (*fixup_map2);
}
+ if (*fixup_map3) {
+ map_fds[1] = create_map(sizeof(struct other_val), 1);
+ do {
+ prog[*fixup_map3].imm = map_fds[1];
+ fixup_map3++;
+ } while (*fixup_map3);
+ }
+
if (*fixup_prog) {
map_fds[2] = create_prog_array();
do {
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index c612d6e38c62..f0e6c35a93ae 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -7,3 +7,7 @@ reuseport_bpf_cpu
reuseport_bpf_numa
reuseport_dualstack
reuseaddr_conflict
+tcp_mmap
+udpgso
+udpgso_bench_rx
+udpgso_bench_tx
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index daf5effec3f0..902820d3e848 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -5,13 +5,18 @@ CFLAGS = -Wall -Wl,--no-as-needed -O2 -g
CFLAGS += -I../../../../usr/include/
TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
-TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh
+TEST_PROGS += fib_tests.sh fib-onlink-tests.sh in_netns.sh pmtu.sh udpgso.sh
+TEST_PROGS += udpgso_bench.sh
TEST_GEN_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
+TEST_GEN_FILES += tcp_mmap tcp_inq
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict
+TEST_GEN_PROGS += udpgso udpgso_bench_tx udpgso_bench_rx
include ../lib.mk
$(OUTPUT)/reuseport_bpf_numa: LDFLAGS += -lnuma
+$(OUTPUT)/tcp_mmap: LDFLAGS += -lpthread
+$(OUTPUT)/tcp_inq: LDFLAGS += -lpthread
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
index 75d922438bc9..d8313d0438b7 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
@@ -1,6 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding"
NUM_NETIFS=4
CHECK_TC="yes"
source lib.sh
@@ -75,14 +76,31 @@ cleanup()
vrf_cleanup
}
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:1::2
+}
+
+learning()
+{
+ learning_test "br0" $swp1 $h1 $h2
+}
+
+flooding()
+{
+ flood_test $swp2 $h1 $h2
+}
+
trap cleanup EXIT
setup_prepare
setup_wait
-ping_test $h1 192.0.2.2
-ping6_test $h1 2001:db8:1::2
-learning_test "br0" $swp1 $h1 $h2
-flood_test $swp2 $h1 $h2
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
index 1cddf06f691d..c15c6c85c984 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
@@ -1,6 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding"
NUM_NETIFS=4
source lib.sh
@@ -73,14 +74,31 @@ cleanup()
vrf_cleanup
}
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:1::2
+}
+
+learning()
+{
+ learning_test "br0" $swp1 $h1 $h2
+}
+
+flooding()
+{
+ flood_test $swp2 $h1 $h2
+}
+
trap cleanup EXIT
setup_prepare
setup_wait
-ping_test $h1 192.0.2.2
-ping6_test $h1 2001:db8:1::2
-learning_test "br0" $swp1 $h1 $h2
-flood_test $swp2 $h1 $h2
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 1ac6c62271f3..91041c49655b 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -321,6 +321,25 @@ simple_if_fini()
vrf_destroy $vrf_name
}
+tunnel_create()
+{
+ local name=$1; shift
+ local type=$1; shift
+ local local=$1; shift
+ local remote=$1; shift
+
+ ip link add name $name type $type \
+ local $local remote $remote "$@"
+ ip link set dev $name up
+}
+
+tunnel_destroy()
+{
+ local name=$1; shift
+
+ ip link del dev $name
+}
+
master_name_get()
{
local if_name=$1
@@ -335,6 +354,15 @@ link_stats_tx_packets_get()
ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]'
}
+tc_rule_stats_get()
+{
+ local dev=$1; shift
+ local pref=$1; shift
+
+ tc -j -s filter show dev $dev ingress pref $pref |
+ jq '.[1].options.actions[].stats.packets'
+}
+
mac_get()
{
local if_name=$1
@@ -353,19 +381,33 @@ bridge_ageing_time_get()
echo $((ageing_time / 100))
}
-forwarding_enable()
+declare -A SYSCTL_ORIG
+sysctl_set()
+{
+ local key=$1; shift
+ local value=$1; shift
+
+ SYSCTL_ORIG[$key]=$(sysctl -n $key)
+ sysctl -qw $key=$value
+}
+
+sysctl_restore()
{
- ipv4_fwd=$(sysctl -n net.ipv4.conf.all.forwarding)
- ipv6_fwd=$(sysctl -n net.ipv6.conf.all.forwarding)
+ local key=$1; shift
- sysctl -q -w net.ipv4.conf.all.forwarding=1
- sysctl -q -w net.ipv6.conf.all.forwarding=1
+ sysctl -qw $key=${SYSCTL_ORIG["$key"]}
+}
+
+forwarding_enable()
+{
+ sysctl_set net.ipv4.conf.all.forwarding 1
+ sysctl_set net.ipv6.conf.all.forwarding 1
}
forwarding_restore()
{
- sysctl -q -w net.ipv6.conf.all.forwarding=$ipv6_fwd
- sysctl -q -w net.ipv4.conf.all.forwarding=$ipv4_fwd
+ sysctl_restore net.ipv6.conf.all.forwarding
+ sysctl_restore net.ipv4.conf.all.forwarding
}
tc_offload_check()
@@ -381,6 +423,83 @@ tc_offload_check()
return 0
}
+slow_path_trap_install()
+{
+ local dev=$1; shift
+ local direction=$1; shift
+
+ if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
+ # For slow-path testing, we need to install a trap to get to
+ # slow path the packets that would otherwise be switched in HW.
+ tc filter add dev $dev $direction pref 1 \
+ flower skip_sw action trap
+ fi
+}
+
+slow_path_trap_uninstall()
+{
+ local dev=$1; shift
+ local direction=$1; shift
+
+ if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
+ tc filter del dev $dev $direction pref 1 flower skip_sw
+ fi
+}
+
+__icmp_capture_add_del()
+{
+ local add_del=$1; shift
+ local pref=$1; shift
+ local vsuf=$1; shift
+ local tundev=$1; shift
+ local filter=$1; shift
+
+ tc filter $add_del dev "$tundev" ingress \
+ proto ip$vsuf pref $pref \
+ flower ip_proto icmp$vsuf $filter \
+ action pass
+}
+
+icmp_capture_install()
+{
+ __icmp_capture_add_del add 100 "" "$@"
+}
+
+icmp_capture_uninstall()
+{
+ __icmp_capture_add_del del 100 "" "$@"
+}
+
+icmp6_capture_install()
+{
+ __icmp_capture_add_del add 100 v6 "$@"
+}
+
+icmp6_capture_uninstall()
+{
+ __icmp_capture_add_del del 100 v6 "$@"
+}
+
+matchall_sink_create()
+{
+ local dev=$1; shift
+
+ tc qdisc add dev $dev clsact
+ tc filter add dev $dev ingress \
+ pref 10000 \
+ matchall \
+ action drop
+}
+
+tests_run()
+{
+ local current_test
+
+ for current_test in ${TESTS:-$ALL_TESTS}; do
+ $current_test
+ done
+}
+
##############################################################################
# Tests
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre.sh b/tools/testing/selftests/net/forwarding/mirror_gre.sh
new file mode 100755
index 000000000000..c6786d1b2b96
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre.sh
@@ -0,0 +1,161 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" when the device to mirror to is a
+# gretap or ip6gretap netdevice. Expect that the packets come out encapsulated,
+# and another gretap / ip6gretap netdevice is then capable of decapsulating the
+# traffic. Test that the payload is what is expected (ICMP ping request or
+# reply, depending on test).
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+ test_gretap_mac
+ test_ip6gretap_mac
+ test_two_spans
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ ip address add dev $swp3 192.0.2.129/28
+ ip address add dev $h3 192.0.2.130/28
+
+ ip address add dev $swp3 2001:db8:2::1/64
+ ip address add dev $h3 2001:db8:2::2/64
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip address del dev $h3 2001:db8:2::2/64
+ ip address del dev $swp3 2001:db8:2::1/64
+
+ ip address del dev $h3 192.0.2.130/28
+ ip address del dev $swp3 192.0.2.129/28
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_span_gre_mac()
+{
+ local tundev=$1; shift
+ local direction=$1; shift
+ local prot=$1; shift
+ local what=$1; shift
+
+ local swp3mac=$(mac_get $swp3)
+ local h3mac=$(mac_get $h3)
+
+ RET=0
+
+ mirror_install $swp1 $direction $tundev "matchall $tcflags"
+ tc qdisc add dev $h3 clsact
+ tc filter add dev $h3 ingress pref 77 prot $prot \
+ flower ip_proto 0x2f src_mac $swp3mac dst_mac $h3mac \
+ action pass
+
+ mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10
+
+ tc filter del dev $h3 ingress pref 77
+ tc qdisc del dev $h3 clsact
+ mirror_uninstall $swp1 $direction
+
+ log_test "$direction $what: envelope MAC ($tcflags)"
+}
+
+test_two_spans()
+{
+ RET=0
+
+ mirror_install $swp1 ingress gt4 "matchall $tcflags"
+ mirror_install $swp1 egress gt6 "matchall $tcflags"
+ quick_test_span_gre_dir gt4 ingress
+ quick_test_span_gre_dir gt6 egress
+
+ mirror_uninstall $swp1 ingress
+ fail_test_span_gre_dir gt4 ingress
+ quick_test_span_gre_dir gt6 egress
+
+ mirror_install $swp1 ingress gt4 "matchall $tcflags"
+ mirror_uninstall $swp1 egress
+ quick_test_span_gre_dir gt4 ingress
+ fail_test_span_gre_dir gt6 egress
+
+ mirror_uninstall $swp1 ingress
+ log_test "two simultaneously configured mirrors ($tcflags)"
+}
+
+test_gretap()
+{
+ full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
+ full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+ full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap"
+ full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap"
+}
+
+test_gretap_mac()
+{
+ test_span_gre_mac gt4 ingress ip "mirror to gretap"
+ test_span_gre_mac gt4 egress ip "mirror to gretap"
+}
+
+test_ip6gretap_mac()
+{
+ test_span_gre_mac gt6 ingress ipv6 "mirror to ip6gretap"
+ test_span_gre_mac gt6 egress ipv6 "mirror to ip6gretap"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
new file mode 100755
index 000000000000..360ca133bead
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
@@ -0,0 +1,226 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +---------------------+ +---------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# +-----|---------------+ +---------------|-----+
+# | |
+# +-----|-------------------------------------------------------------|-----+
+# | SW o--> mirror | |
+# | +---|-------------------------------------------------------------|---+ |
+# | | + $swp1 BR $swp2 + | |
+# | +---------------------------------------------------------------------+ |
+# | |
+# | +---------------------------------------------------------------------+ |
+# | | OL + gt6 (ip6gretap) + gt4 (gretap) | |
+# | | : loc=2001:db8:2::1 : loc=192.0.2.129 | |
+# | | : rem=2001:db8:2::2 : rem=192.0.2.130 | |
+# | | : ttl=100 : ttl=100 | |
+# | | : tos=inherit : tos=inherit | |
+# | +-------------------------:--|-------------------:--|-----------------+ |
+# | : | : | |
+# | +-------------------------:--|-------------------:--|-----------------+ |
+# | | UL : |,---------------------' | |
+# | | + $swp3 : || : | |
+# | | | 192.0.2.129/28 : vv : | |
+# | | | 2001:db8:2::1/64 : + ul (dummy) : | |
+# | +---|---------------------:----------------------:--------------------+ |
+# +-----|---------------------:----------------------:----------------------+
+# | : :
+# +-----|---------------------:----------------------:----------------------+
+# | H3 + $h3 + h3-gt6 (ip6gretap) + h3-gt4 (gretap) |
+# | 192.0.2.130/28 loc=2001:db8:2::2 loc=192.0.2.130 |
+# | 2001:db8:2::2/64 rem=2001:db8:2::1 rem=192.0.2.129 |
+# | ttl=100 ttl=100 |
+# | tos=inherit tos=inherit |
+# | |
+# +-------------------------------------------------------------------------+
+#
+# This tests mirroring to gretap and ip6gretap configured in an overlay /
+# underlay manner, i.e. with a bound dummy device that marks underlay VRF where
+# the encapsulated packed should be routed.
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/28
+}
+
+h3_create()
+{
+ simple_if_init $h3 192.0.2.130/28 2001:db8:2::2/64
+
+ tunnel_create h3-gt4 gretap 192.0.2.130 192.0.2.129
+ ip link set h3-gt4 vrf v$h3
+ matchall_sink_create h3-gt4
+
+ tunnel_create h3-gt6 ip6gretap 2001:db8:2::2 2001:db8:2::1
+ ip link set h3-gt6 vrf v$h3
+ matchall_sink_create h3-gt6
+}
+
+h3_destroy()
+{
+ tunnel_destroy h3-gt6
+ tunnel_destroy h3-gt4
+
+ simple_if_fini $h3 192.0.2.130/28 2001:db8:2::2/64
+}
+
+switch_create()
+{
+ # Bridge between H1 and H2.
+
+ ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 up
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+
+ tc qdisc add dev $swp1 clsact
+
+ # Underlay.
+
+ simple_if_init $swp3 192.0.2.129/28 2001:db8:2::1/64
+
+ ip link add name ul type dummy
+ ip link set dev ul master v$swp3
+ ip link set dev ul up
+
+ # Overlay.
+
+ vrf_create vrf-ol
+ ip link set dev vrf-ol up
+
+ tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \
+ ttl 100 tos inherit dev ul
+ ip link set dev gt4 master vrf-ol
+ ip link set dev gt4 up
+
+ tunnel_create gt6 ip6gretap 2001:db8:2::1 2001:db8:2::2 \
+ ttl 100 tos inherit dev ul allow-localremote
+ ip link set dev gt6 master vrf-ol
+ ip link set dev gt6 up
+}
+
+switch_destroy()
+{
+ vrf_destroy vrf-ol
+
+ tunnel_destroy gt6
+ tunnel_destroy gt4
+
+ simple_if_fini $swp3 192.0.2.129/28 2001:db8:2::1/64
+
+ ip link del dev ul
+
+ tc qdisc del dev $swp1 clsact
+
+ ip link set dev $swp1 down
+ ip link set dev $swp2 down
+ ip link del dev br1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ h3_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+test_gretap()
+{
+ full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap w/ UL"
+ full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap w/ UL"
+}
+
+test_ip6gretap()
+{
+ full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap w/ UL"
+ full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap w/ UL"
+}
+
+test_all()
+{
+ RET=0
+
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
new file mode 100755
index 000000000000..50ab3462af0c
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
@@ -0,0 +1,212 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test how mirrors to gretap and ip6gretap react to changes to relevant
+# configuration.
+
+ALL_TESTS="
+ test_ttl
+ test_tun_up
+ test_egress_up
+ test_remote_ip
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ # This test downs $swp3, which deletes the configured IPv6 address
+ # unless this sysctl is set.
+ sysctl_set net.ipv6.conf.$swp3.keep_addr_on_down 1
+
+ ip address add dev $swp3 192.0.2.129/28
+ ip address add dev $h3 192.0.2.130/28
+
+ ip address add dev $swp3 2001:db8:2::1/64
+ ip address add dev $h3 2001:db8:2::2/64
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip address del dev $h3 2001:db8:2::2/64
+ ip address del dev $swp3 2001:db8:2::1/64
+
+ ip address del dev $h3 192.0.2.130/28
+ ip address del dev $swp3 192.0.2.129/28
+
+ sysctl_restore net.ipv6.conf.$swp3.keep_addr_on_down
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_span_gre_ttl()
+{
+ local tundev=$1; shift
+ local type=$1; shift
+ local prot=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ tc qdisc add dev $h3 clsact
+ tc filter add dev $h3 ingress pref 77 prot $prot \
+ flower ip_ttl 50 action pass
+
+ mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 0
+
+ ip link set dev $tundev type $type ttl 50
+ mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10
+
+ ip link set dev $tundev type $type ttl 100
+ tc filter del dev $h3 ingress pref 77
+ tc qdisc del dev $h3 clsact
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: TTL change ($tcflags)"
+}
+
+test_span_gre_tun_up()
+{
+ local tundev=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ip link set dev $tundev down
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ fail_test_span_gre_dir $tundev ingress
+
+ ip link set dev $tundev up
+
+ quick_test_span_gre_dir $tundev ingress
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: tunnel down/up ($tcflags)"
+}
+
+test_span_gre_egress_up()
+{
+ local tundev=$1; shift
+ local remote_ip=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ip link set dev $swp3 down
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ fail_test_span_gre_dir $tundev ingress
+
+ # After setting the device up, wait for neighbor to get resolved so that
+ # we can expect mirroring to work.
+ ip link set dev $swp3 up
+ while true; do
+ ip neigh sh dev $swp3 $remote_ip nud reachable |
+ grep -q ^
+ if [[ $? -ne 0 ]]; then
+ sleep 1
+ else
+ break
+ fi
+ done
+
+ quick_test_span_gre_dir $tundev ingress
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: egress down/up ($tcflags)"
+}
+
+test_span_gre_remote_ip()
+{
+ local tundev=$1; shift
+ local type=$1; shift
+ local correct_ip=$1; shift
+ local wrong_ip=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ip link set dev $tundev type $type remote $wrong_ip
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ fail_test_span_gre_dir $tundev ingress
+
+ ip link set dev $tundev type $type remote $correct_ip
+ quick_test_span_gre_dir $tundev ingress
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: remote address change ($tcflags)"
+}
+
+test_ttl()
+{
+ test_span_gre_ttl gt4 gretap ip "mirror to gretap"
+ test_span_gre_ttl gt6 ip6gretap ipv6 "mirror to ip6gretap"
+}
+
+test_tun_up()
+{
+ test_span_gre_tun_up gt4 "mirror to gretap"
+ test_span_gre_tun_up gt6 "mirror to ip6gretap"
+}
+
+test_egress_up()
+{
+ test_span_gre_egress_up gt4 192.0.2.130 "mirror to gretap"
+ test_span_gre_egress_up gt6 2001:db8:2::2 "mirror to ip6gretap"
+}
+
+test_remote_ip()
+{
+ test_span_gre_remote_ip gt4 gretap 192.0.2.130 192.0.2.132 "mirror to gretap"
+ test_span_gre_remote_ip gt6 ip6gretap 2001:db8:2::2 2001:db8:2::4 "mirror to ip6gretap"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh
new file mode 100755
index 000000000000..2e54407d8954
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh
@@ -0,0 +1,129 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# This tests flower-triggered mirroring to gretap and ip6gretap netdevices. The
+# interfaces on H1 and H2 have two addresses each. Flower match on one of the
+# addresses is configured with mirror action. It is expected that when pinging
+# this address, mirroring takes place, whereas when pinging the other one,
+# there's no mirroring.
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ ip address add dev $swp3 192.0.2.129/28
+ ip address add dev $h3 192.0.2.130/28
+
+ ip address add dev $swp3 2001:db8:2::1/64
+ ip address add dev $h3 2001:db8:2::2/64
+
+ ip address add dev $h1 192.0.2.3/28
+ ip address add dev $h2 192.0.2.4/28
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip address del dev $h2 192.0.2.4/28
+ ip address del dev $h1 192.0.2.3/28
+
+ ip address del dev $h3 2001:db8:2::2/64
+ ip address del dev $swp3 2001:db8:2::1/64
+
+ ip address del dev $h3 192.0.2.130/28
+ ip address del dev $swp3 192.0.2.129/28
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_span_gre_dir_acl()
+{
+ test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4
+}
+
+full_test_span_gre_dir_acl()
+{
+ local tundev=$1; shift
+ local direction=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+ local match_dip=$1; shift
+ local what=$1; shift
+
+ mirror_install $swp1 $direction $tundev \
+ "protocol ip flower $tcflags dst_ip $match_dip"
+ fail_test_span_gre_dir $tundev $direction
+ test_span_gre_dir_acl "$tundev" "$direction" \
+ "$forward_type" "$backward_type"
+ mirror_uninstall $swp1 $direction
+
+ log_test "$direction $what ($tcflags)"
+}
+
+test_gretap()
+{
+ full_test_span_gre_dir_acl gt4 ingress 8 0 192.0.2.4 "ACL mirror to gretap"
+ full_test_span_gre_dir_acl gt4 egress 0 8 192.0.2.3 "ACL mirror to gretap"
+}
+
+test_ip6gretap()
+{
+ full_test_span_gre_dir_acl gt6 ingress 8 0 192.0.2.4 "ACL mirror to ip6gretap"
+ full_test_span_gre_dir_acl gt6 egress 0 8 192.0.2.3 "ACL mirror to ip6gretap"
+}
+
+test_all()
+{
+ RET=0
+
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
new file mode 100644
index 000000000000..207ffd167dba
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
@@ -0,0 +1,85 @@
+# SPDX-License-Identifier: GPL-2.0
+
+do_test_span_gre_dir_ips()
+{
+ local expect=$1; shift
+ local tundev=$1; shift
+ local direction=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+
+ icmp_capture_install h3-$tundev
+ mirror_test v$h1 $ip1 $ip2 h3-$tundev 100 $expect
+ mirror_test v$h2 $ip2 $ip1 h3-$tundev 100 $expect
+ icmp_capture_uninstall h3-$tundev
+}
+
+quick_test_span_gre_dir_ips()
+{
+ do_test_span_gre_dir_ips 10 "$@"
+}
+
+fail_test_span_gre_dir_ips()
+{
+ do_test_span_gre_dir_ips 0 "$@"
+}
+
+test_span_gre_dir_ips()
+{
+ local tundev=$1; shift
+ local direction=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+
+ quick_test_span_gre_dir_ips "$tundev" "$direction" "$ip1" "$ip2"
+
+ icmp_capture_install h3-$tundev "type $forward_type"
+ mirror_test v$h1 $ip1 $ip2 h3-$tundev 100 10
+ icmp_capture_uninstall h3-$tundev
+
+ icmp_capture_install h3-$tundev "type $backward_type"
+ mirror_test v$h2 $ip2 $ip1 h3-$tundev 100 10
+ icmp_capture_uninstall h3-$tundev
+}
+
+full_test_span_gre_dir_ips()
+{
+ local tundev=$1; shift
+ local direction=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+ local what=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 $direction $tundev "matchall $tcflags"
+ test_span_gre_dir_ips "$tundev" "$direction" "$forward_type" \
+ "$backward_type" "$ip1" "$ip2"
+ mirror_uninstall $swp1 $direction
+
+ log_test "$direction $what ($tcflags)"
+}
+
+quick_test_span_gre_dir()
+{
+ quick_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+fail_test_span_gre_dir()
+{
+ fail_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+test_span_gre_dir()
+{
+ test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+full_test_span_gre_dir()
+{
+ full_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh
new file mode 100755
index 000000000000..fc0508e40fca
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh
@@ -0,0 +1,115 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for mirroring to gretap and ip6gretap, such that the neighbor entry for
+# the tunnel remote address has invalid address at the time that the mirroring
+# is set up. Later on, the neighbor is deleted and it is expected to be
+# reinitialized using the usual ARP process, and the mirroring offload updated.
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ ip address add dev $swp3 192.0.2.129/28
+ ip address add dev $h3 192.0.2.130/28
+
+ ip address add dev $swp3 2001:db8:2::1/64
+ ip address add dev $h3 2001:db8:2::2/64
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip address del dev $h3 2001:db8:2::2/64
+ ip address del dev $swp3 2001:db8:2::1/64
+
+ ip address del dev $h3 192.0.2.130/28
+ ip address del dev $swp3 192.0.2.129/28
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_span_gre_neigh()
+{
+ local addr=$1; shift
+ local tundev=$1; shift
+ local direction=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ip neigh replace dev $swp3 $addr lladdr 00:11:22:33:44:55
+ mirror_install $swp1 $direction $tundev "matchall $tcflags"
+ fail_test_span_gre_dir $tundev ingress
+ ip neigh del dev $swp3 $addr
+ quick_test_span_gre_dir $tundev ingress
+ mirror_uninstall $swp1 $direction
+
+ log_test "$direction $what: neighbor change ($tcflags)"
+}
+
+test_gretap()
+{
+ test_span_gre_neigh 192.0.2.130 gt4 ingress "mirror to gretap"
+ test_span_gre_neigh 192.0.2.130 gt4 egress "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+ test_span_gre_neigh 2001:db8:2::2 gt6 ingress "mirror to ip6gretap"
+ test_span_gre_neigh 2001:db8:2::2 gt6 egress "mirror to ip6gretap"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh
new file mode 100755
index 000000000000..8fa681eb90e7
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh
@@ -0,0 +1,127 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test that gretap and ip6gretap mirroring works when the other tunnel endpoint
+# is reachable through a next-hop route (as opposed to directly-attached route).
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+ sysctl_set net.ipv4.conf.$h3.rp_filter 0
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ ip address add dev $swp3 192.0.2.161/28
+ ip address add dev $h3 192.0.2.162/28
+ ip address add dev gt4 192.0.2.129/32
+ ip address add dev h3-gt4 192.0.2.130/32
+
+ # IPv6 route can't be added after address. Such routes are rejected due
+ # to the gateway address having been configured on the local system. It
+ # works the other way around though.
+ ip address add dev $swp3 2001:db8:4::1/64
+ ip -6 route add 2001:db8:2::2/128 via 2001:db8:4::2
+ ip address add dev $h3 2001:db8:4::2/64
+ ip address add dev gt6 2001:db8:2::1
+ ip address add dev h3-gt6 2001:db8:2::2
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip -6 route del 2001:db8:2::2/128 via 2001:db8:4::2
+ ip address del dev $h3 2001:db8:4::2/64
+ ip address del dev $swp3 2001:db8:4::1/64
+
+ ip address del dev $h3 192.0.2.162/28
+ ip address del dev $swp3 192.0.2.161/28
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+
+ sysctl_restore net.ipv4.conf.$h3.rp_filter
+ sysctl_restore net.ipv4.conf.all.rp_filter
+}
+
+test_gretap()
+{
+ RET=0
+ mirror_install $swp1 ingress gt4 "matchall $tcflags"
+
+ # For IPv4, test that there's no mirroring without the route directing
+ # the traffic to tunnel remote address. Then add it and test that
+ # mirroring starts. For IPv6 we can't test this due to the limitation
+ # that routes for locally-specified IPv6 addresses can't be added.
+ fail_test_span_gre_dir gt4 ingress
+
+ ip route add 192.0.2.130/32 via 192.0.2.162
+ quick_test_span_gre_dir gt4 ingress
+ ip route del 192.0.2.130/32 via 192.0.2.162
+
+ mirror_uninstall $swp1 ingress
+ log_test "mirror to gre with next-hop remote ($tcflags)"
+}
+
+test_ip6gretap()
+{
+ RET=0
+
+ mirror_install $swp1 ingress gt6 "matchall $tcflags"
+ quick_test_span_gre_dir gt6 ingress
+ mirror_uninstall $swp1 ingress
+
+ log_test "mirror to ip6gre with next-hop remote ($tcflags)"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
new file mode 100644
index 000000000000..b3ceda2b4197
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
@@ -0,0 +1,129 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# This is the standard topology for testing mirroring to gretap and ip6gretap
+# netdevices. The tests that use it tweak it in one way or another--importantly,
+# $swp3 and $h3 need to have addresses set up.
+#
+# +---------------------+ +---------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# +-----|---------------+ +---------------|-----+
+# | |
+# +-----|-------------------------------------------------------------|-----+
+# | SW o--> mirror | |
+# | +---|-------------------------------------------------------------|---+ |
+# | | + $swp1 BR $swp2 + | |
+# | +---------------------------------------------------------------------+ |
+# | |
+# | + $swp3 + gt6 (ip6gretap) + gt4 (gretap) |
+# | | : loc=2001:db8:2::1 : loc=192.0.2.129 |
+# | | : rem=2001:db8:2::2 : rem=192.0.2.130 |
+# | | : ttl=100 : ttl=100 |
+# | | : tos=inherit : tos=inherit |
+# | | : : |
+# +-----|---------------------:----------------------:----------------------+
+# | : :
+# +-----|---------------------:----------------------:----------------------+
+# | H3 + $h3 + h3-gt6 (ip6gretap) + h3-gt4 (gretap) |
+# | loc=2001:db8:2::2 loc=192.0.2.130 |
+# | rem=2001:db8:2::1 rem=192.0.2.129 |
+# | ttl=100 ttl=100 |
+# | tos=inherit tos=inherit |
+# | |
+# +-------------------------------------------------------------------------+
+
+mirror_gre_topo_h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28
+}
+
+mirror_gre_topo_h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+mirror_gre_topo_h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28
+}
+
+mirror_gre_topo_h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/28
+}
+
+mirror_gre_topo_h3_create()
+{
+ simple_if_init $h3
+
+ tunnel_create h3-gt4 gretap 192.0.2.130 192.0.2.129
+ ip link set h3-gt4 vrf v$h3
+ matchall_sink_create h3-gt4
+
+ tunnel_create h3-gt6 ip6gretap 2001:db8:2::2 2001:db8:2::1
+ ip link set h3-gt6 vrf v$h3
+ matchall_sink_create h3-gt6
+}
+
+mirror_gre_topo_h3_destroy()
+{
+ tunnel_destroy h3-gt6
+ tunnel_destroy h3-gt4
+
+ simple_if_fini $h3
+}
+
+mirror_gre_topo_switch_create()
+{
+ ip link set dev $swp3 up
+
+ ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 up
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+
+ tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \
+ ttl 100 tos inherit
+
+ tunnel_create gt6 ip6gretap 2001:db8:2::1 2001:db8:2::2 \
+ ttl 100 tos inherit allow-localremote
+
+ tc qdisc add dev $swp1 clsact
+}
+
+mirror_gre_topo_switch_destroy()
+{
+ tc qdisc del dev $swp1 clsact
+
+ tunnel_destroy gt6
+ tunnel_destroy gt4
+
+ ip link set dev $swp1 down
+ ip link set dev $swp2 down
+ ip link del dev br1
+
+ ip link set dev $swp3 down
+}
+
+mirror_gre_topo_create()
+{
+ mirror_gre_topo_h1_create
+ mirror_gre_topo_h2_create
+ mirror_gre_topo_h3_create
+
+ mirror_gre_topo_switch_create
+}
+
+mirror_gre_topo_destroy()
+{
+ mirror_gre_topo_switch_destroy
+
+ mirror_gre_topo_h3_destroy
+ mirror_gre_topo_h2_destroy
+ mirror_gre_topo_h1_destroy
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh
new file mode 100644
index 000000000000..e5028a5725e3
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: GPL-2.0
+
+mirror_install()
+{
+ local from_dev=$1; shift
+ local direction=$1; shift
+ local to_dev=$1; shift
+ local filter=$1; shift
+
+ tc filter add dev $from_dev $direction \
+ pref 1000 $filter \
+ action mirred egress mirror dev $to_dev
+}
+
+mirror_uninstall()
+{
+ local from_dev=$1; shift
+ local direction=$1; shift
+
+ tc filter del dev $swp1 $direction pref 1000
+}
+
+mirror_test()
+{
+ local vrf_name=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local dev=$1; shift
+ local pref=$1; shift
+ local expect=$1; shift
+
+ local t0=$(tc_rule_stats_get $dev $pref)
+ ip vrf exec $vrf_name \
+ ${PING} ${sip:+-I $sip} $dip -c 10 -i 0.1 -w 2 &> /dev/null
+ local t1=$(tc_rule_stats_get $dev $pref)
+ local delta=$((t1 - t0))
+ # Tolerate a couple stray extra packets.
+ ((expect <= delta && delta <= expect + 2))
+ check_err $? "Expected to capture $expect packets, got $delta."
+}
diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh
index cc6a14abfa87..a75cb51cc5bd 100755
--- a/tools/testing/selftests/net/forwarding/router.sh
+++ b/tools/testing/selftests/net/forwarding/router.sh
@@ -1,6 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+ALL_TESTS="ping_ipv4 ping_ipv6"
NUM_NETIFS=4
source lib.sh
@@ -114,12 +115,21 @@ cleanup()
vrf_cleanup
}
+ping_ipv4()
+{
+ ping_test $h1 198.51.100.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
trap cleanup EXIT
setup_prepare
setup_wait
-ping_test $h1 198.51.100.2
-ping6_test $h1 2001:db8:2::2
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
index 3bc351008db6..8b6d0fb6d604 100755
--- a/tools/testing/selftests/net/forwarding/router_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/router_multipath.sh
@@ -1,6 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+ALL_TESTS="ping_ipv4 ping_ipv6 multipath_test"
NUM_NETIFS=8
source lib.sh
@@ -191,7 +192,7 @@ multipath_eval()
diff=$(echo $weights_ratio - $packets_ratio | bc -l)
diff=${diff#-}
- test "$(echo "$diff / $weights_ratio > 0.1" | bc -l)" -eq 0
+ test "$(echo "$diff / $weights_ratio > 0.15" | bc -l)" -eq 0
check_err $? "Too large discrepancy between expected and measured ratios"
log_test "$desc"
log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
@@ -204,13 +205,11 @@ multipath4_test()
local weight_rp13=$3
local t0_rp12 t0_rp13 t1_rp12 t1_rp13
local packets_rp12 packets_rp13
- local hash_policy
# Transmit multiple flows from h1 to h2 and make sure they are
# distributed between both multipath links (rp12 and rp13)
# according to the configured weights.
- hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy)
- sysctl -q -w net.ipv4.fib_multipath_hash_policy=1
+ sysctl_set net.ipv4.fib_multipath_hash_policy 1
ip route replace 198.51.100.0/24 vrf vrf-r1 \
nexthop via 169.254.2.22 dev $rp12 weight $weight_rp12 \
nexthop via 169.254.3.23 dev $rp13 weight $weight_rp13
@@ -232,7 +231,7 @@ multipath4_test()
ip route replace 198.51.100.0/24 vrf vrf-r1 \
nexthop via 169.254.2.22 dev $rp12 \
nexthop via 169.254.3.23 dev $rp13
- sysctl -q -w net.ipv4.fib_multipath_hash_policy=$hash_policy
+ sysctl_restore net.ipv4.fib_multipath_hash_policy
}
multipath6_l4_test()
@@ -242,13 +241,11 @@ multipath6_l4_test()
local weight_rp13=$3
local t0_rp12 t0_rp13 t1_rp12 t1_rp13
local packets_rp12 packets_rp13
- local hash_policy
# Transmit multiple flows from h1 to h2 and make sure they are
# distributed between both multipath links (rp12 and rp13)
# according to the configured weights.
- hash_policy=$(sysctl -n net.ipv6.fib_multipath_hash_policy)
- sysctl -q -w net.ipv6.fib_multipath_hash_policy=1
+ sysctl_set net.ipv6.fib_multipath_hash_policy 1
ip route replace 2001:db8:2::/64 vrf vrf-r1 \
nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
@@ -271,7 +268,7 @@ multipath6_l4_test()
nexthop via fe80:2::22 dev $rp12 \
nexthop via fe80:3::23 dev $rp13
- sysctl -q -w net.ipv6.fib_multipath_hash_policy=$hash_policy
+ sysctl_restore net.ipv6.fib_multipath_hash_policy
}
multipath6_test()
@@ -364,13 +361,21 @@ cleanup()
vrf_cleanup
}
+ping_ipv4()
+{
+ ping_test $h1 198.51.100.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
trap cleanup EXIT
setup_prepare
setup_wait
-ping_test $h1 198.51.100.2
-ping6_test $h1 2001:db8:2::2
-multipath_test
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index 3a6385ebd5d0..813d02d1939d 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -1,6 +1,8 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \
+ mirred_egress_mirror_test gact_trap_test"
NUM_NETIFS=4
source tc_common.sh
source lib.sh
@@ -111,6 +113,10 @@ gact_trap_test()
{
RET=0
+ if [[ "$tcflags" != "skip_sw" ]]; then
+ return 0;
+ fi
+
tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
skip_hw dst_ip 192.0.2.2 action drop
tc filter add dev $swp1 ingress protocol ip pref 3 handle 103 flower \
@@ -179,24 +185,29 @@ cleanup()
ip link set $swp1 address $swp1origmac
}
+mirred_egress_redirect_test()
+{
+ mirred_egress_test "redirect"
+}
+
+mirred_egress_mirror_test()
+{
+ mirred_egress_test "mirror"
+}
+
trap cleanup EXIT
setup_prepare
setup_wait
-gact_drop_and_ok_test
-mirred_egress_test "redirect"
-mirred_egress_test "mirror"
+tests_run
tc_offload_check
if [[ $? -ne 0 ]]; then
log_info "Could not test offloaded functionality"
else
tcflags="skip_sw"
- gact_drop_and_ok_test
- mirred_egress_test "redirect"
- mirred_egress_test "mirror"
- gact_trap_test
+ tests_run
fi
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_chains.sh b/tools/testing/selftests/net/forwarding/tc_chains.sh
index 2fd15226974b..d2c783e94df3 100755
--- a/tools/testing/selftests/net/forwarding/tc_chains.sh
+++ b/tools/testing/selftests/net/forwarding/tc_chains.sh
@@ -1,6 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+ALL_TESTS="unreachable_chain_test gact_goto_chain_test"
NUM_NETIFS=2
source tc_common.sh
source lib.sh
@@ -107,16 +108,14 @@ trap cleanup EXIT
setup_prepare
setup_wait
-unreachable_chain_test
-gact_goto_chain_test
+tests_run
tc_offload_check
if [[ $? -ne 0 ]]; then
log_info "Could not test offloaded functionality"
else
tcflags="skip_sw"
- unreachable_chain_test
- gact_goto_chain_test
+ tests_run
fi
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
index 032b882adfc0..20d1077e5a3d 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -1,6 +1,8 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \
+ match_src_ip_test match_ip_flags_test"
NUM_NETIFS=2
source tc_common.sh
source lib.sh
@@ -149,6 +151,74 @@ match_src_ip_test()
log_test "src_ip match ($tcflags)"
}
+match_ip_flags_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags ip_flags frag action continue
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags ip_flags firstfrag action continue
+ tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+ $tcflags ip_flags nofirstfrag action continue
+ tc filter add dev $h2 ingress protocol ip pref 4 handle 104 flower \
+ $tcflags ip_flags nofrag action drop
+
+ $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip "frag=0" -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on wrong frag filter (nofrag)"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_fail $? "Matched on wrong firstfrag filter (nofrag)"
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Did not match on nofirstfrag filter (nofrag) "
+
+ tc_check_packets "dev $h2 ingress" 104 1
+ check_err $? "Did not match on nofrag filter (nofrag)"
+
+ $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip "frag=0,mf" -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match on frag filter (1stfrag)"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match fistfrag filter (1stfrag)"
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Matched on wrong nofirstfrag filter (1stfrag)"
+
+ tc_check_packets "dev $h2 ingress" 104 1
+ check_err $? "Match on wrong nofrag filter (1stfrag)"
+
+ $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip "frag=256,mf" -q
+ $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip "frag=256" -q
+
+ tc_check_packets "dev $h2 ingress" 101 3
+ check_err $? "Did not match on frag filter (no1stfrag)"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Matched on wrong firstfrag filter (no1stfrag)"
+
+ tc_check_packets "dev $h2 ingress" 103 3
+ check_err $? "Did not match on nofirstfrag filter (no1stfrag)"
+
+ tc_check_packets "dev $h2 ingress" 104 1
+ check_err $? "Matched on nofrag filter (no1stfrag)"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+ tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+ tc filter del dev $h2 ingress protocol ip pref 4 handle 104 flower
+
+ log_test "ip_flags match ($tcflags)"
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
@@ -177,20 +247,14 @@ trap cleanup EXIT
setup_prepare
setup_wait
-match_dst_mac_test
-match_src_mac_test
-match_dst_ip_test
-match_src_ip_test
+tests_run
tc_offload_check
if [[ $? -ne 0 ]]; then
log_info "Could not test offloaded functionality"
else
tcflags="skip_sw"
- match_dst_mac_test
- match_src_mac_test
- match_dst_ip_test
- match_src_ip_test
+ tests_run
fi
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_shblocks.sh b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
index 077b98048ef4..b5b917203815 100755
--- a/tools/testing/selftests/net/forwarding/tc_shblocks.sh
+++ b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
@@ -1,6 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+ALL_TESTS="shared_block_test"
NUM_NETIFS=4
source tc_common.sh
source lib.sh
@@ -109,14 +110,14 @@ trap cleanup EXIT
setup_prepare
setup_wait
-shared_block_test
+tests_run
tc_offload_check
if [[ $? -ne 0 ]]; then
log_info "Could not test offloaded functionality"
else
tcflags="skip_sw"
- shared_block_test
+ tests_run
fi
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 1e428781a625..7651fd4d86fe 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -368,7 +368,7 @@ test_pmtu_vti6_link_add_mtu() {
fail=0
- min=1280
+ min=68 # vti6 can carry IPv4 packets too
max=$((65535 - 40))
# Check invalid values first
for v in $((min - 1)) $((max + 1)); do
@@ -384,7 +384,7 @@ test_pmtu_vti6_link_add_mtu() {
done
# Now check valid values
- for v in 1280 1300 $((65535 - 40)); do
+ for v in 68 1280 1300 $((65535 - 40)); do
${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
mtu="$(link_get_mtu "${ns_a}" vti6_a)"
${ns_a} ip link del vti6_a
diff --git a/tools/testing/selftests/net/tcp_inq.c b/tools/testing/selftests/net/tcp_inq.c
new file mode 100644
index 000000000000..d044b29ddabc
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_inq.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright 2018 Google Inc.
+ * Author: Soheil Hassas Yeganeh (soheil@google.com)
+ *
+ * Simple example on how to use TCP_INQ and TCP_CM_INQ.
+ *
+ * License (GPLv2):
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ */
+#define _GNU_SOURCE
+
+#include <error.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#ifndef TCP_INQ
+#define TCP_INQ 36
+#endif
+
+#ifndef TCP_CM_INQ
+#define TCP_CM_INQ TCP_INQ
+#endif
+
+#define BUF_SIZE 8192
+#define CMSG_SIZE 32
+
+static int family = AF_INET6;
+static socklen_t addr_len = sizeof(struct sockaddr_in6);
+static int port = 4974;
+
+static void setup_loopback_addr(int family, struct sockaddr_storage *sockaddr)
+{
+ struct sockaddr_in6 *addr6 = (void *) sockaddr;
+ struct sockaddr_in *addr4 = (void *) sockaddr;
+
+ switch (family) {
+ case PF_INET:
+ memset(addr4, 0, sizeof(*addr4));
+ addr4->sin_family = AF_INET;
+ addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ addr4->sin_port = htons(port);
+ break;
+ case PF_INET6:
+ memset(addr6, 0, sizeof(*addr6));
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_addr = in6addr_loopback;
+ addr6->sin6_port = htons(port);
+ break;
+ default:
+ error(1, 0, "illegal family");
+ }
+}
+
+void *start_server(void *arg)
+{
+ int server_fd = (int)(unsigned long)arg;
+ struct sockaddr_in addr;
+ socklen_t addrlen = sizeof(addr);
+ char *buf;
+ int fd;
+ int r;
+
+ buf = malloc(BUF_SIZE);
+
+ for (;;) {
+ fd = accept(server_fd, (struct sockaddr *)&addr, &addrlen);
+ if (fd == -1) {
+ perror("accept");
+ break;
+ }
+ do {
+ r = send(fd, buf, BUF_SIZE, 0);
+ } while (r < 0 && errno == EINTR);
+ if (r < 0)
+ perror("send");
+ if (r != BUF_SIZE)
+ fprintf(stderr, "can only send %d bytes\n", r);
+ /* TCP_INQ can overestimate in-queue by one byte if we send
+ * the FIN packet. Sleep for 1 second, so that the client
+ * likely invoked recvmsg().
+ */
+ sleep(1);
+ close(fd);
+ }
+
+ free(buf);
+ close(server_fd);
+ pthread_exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+ struct sockaddr_storage listen_addr, addr;
+ int c, one = 1, inq = -1;
+ pthread_t server_thread;
+ char cmsgbuf[CMSG_SIZE];
+ struct iovec iov[1];
+ struct cmsghdr *cm;
+ struct msghdr msg;
+ int server_fd, fd;
+ char *buf;
+
+ while ((c = getopt(argc, argv, "46p:")) != -1) {
+ switch (c) {
+ case '4':
+ family = PF_INET;
+ addr_len = sizeof(struct sockaddr_in);
+ break;
+ case '6':
+ family = PF_INET6;
+ addr_len = sizeof(struct sockaddr_in6);
+ break;
+ case 'p':
+ port = atoi(optarg);
+ break;
+ }
+ }
+
+ server_fd = socket(family, SOCK_STREAM, 0);
+ if (server_fd < 0)
+ error(1, errno, "server socket");
+ setup_loopback_addr(family, &listen_addr);
+ if (setsockopt(server_fd, SOL_SOCKET, SO_REUSEADDR,
+ &one, sizeof(one)) != 0)
+ error(1, errno, "setsockopt(SO_REUSEADDR)");
+ if (bind(server_fd, (const struct sockaddr *)&listen_addr,
+ addr_len) == -1)
+ error(1, errno, "bind");
+ if (listen(server_fd, 128) == -1)
+ error(1, errno, "listen");
+ if (pthread_create(&server_thread, NULL, start_server,
+ (void *)(unsigned long)server_fd) != 0)
+ error(1, errno, "pthread_create");
+
+ fd = socket(family, SOCK_STREAM, 0);
+ if (fd < 0)
+ error(1, errno, "client socket");
+ setup_loopback_addr(family, &addr);
+ if (connect(fd, (const struct sockaddr *)&addr, addr_len) == -1)
+ error(1, errno, "connect");
+ if (setsockopt(fd, SOL_TCP, TCP_INQ, &one, sizeof(one)) != 0)
+ error(1, errno, "setsockopt(TCP_INQ)");
+
+ msg.msg_name = NULL;
+ msg.msg_namelen = 0;
+ msg.msg_iov = iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsgbuf;
+ msg.msg_controllen = sizeof(cmsgbuf);
+ msg.msg_flags = 0;
+
+ buf = malloc(BUF_SIZE);
+ iov[0].iov_base = buf;
+ iov[0].iov_len = BUF_SIZE / 2;
+
+ if (recvmsg(fd, &msg, 0) != iov[0].iov_len)
+ error(1, errno, "recvmsg");
+ if (msg.msg_flags & MSG_CTRUNC)
+ error(1, 0, "control message is truncated");
+
+ for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm))
+ if (cm->cmsg_level == SOL_TCP && cm->cmsg_type == TCP_CM_INQ)
+ inq = *((int *) CMSG_DATA(cm));
+
+ if (inq != BUF_SIZE - iov[0].iov_len) {
+ fprintf(stderr, "unexpected inq: %d\n", inq);
+ exit(1);
+ }
+
+ printf("PASSED\n");
+ free(buf);
+ close(fd);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c
new file mode 100644
index 000000000000..77f762780199
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_mmap.c
@@ -0,0 +1,447 @@
+/*
+ * Copyright 2018 Google Inc.
+ * Author: Eric Dumazet (edumazet@google.com)
+ *
+ * Reference program demonstrating tcp mmap() usage,
+ * and SO_RCVLOWAT hints for receiver.
+ *
+ * Note : NIC with header split is needed to use mmap() on TCP :
+ * Each incoming frame must be a multiple of PAGE_SIZE bytes of TCP payload.
+ *
+ * How to use on loopback interface :
+ *
+ * ifconfig lo mtu 61512 # 15*4096 + 40 (ipv6 header) + 32 (TCP with TS option header)
+ * tcp_mmap -s -z &
+ * tcp_mmap -H ::1 -z
+ *
+ * Or leave default lo mtu, but use -M option to set TCP_MAXSEG option to (4096 + 12)
+ * (4096 : page size on x86, 12: TCP TS option length)
+ * tcp_mmap -s -z -M $((4096+12)) &
+ * tcp_mmap -H ::1 -z -M $((4096+12))
+ *
+ * Note: -z option on sender uses MSG_ZEROCOPY, which forces a copy when packets go through loopback interface.
+ * We might use sendfile() instead, but really this test program is about mmap(), for receivers ;)
+ *
+ * $ ./tcp_mmap -s & # Without mmap()
+ * $ for i in {1..4}; do ./tcp_mmap -H ::1 -z ; done
+ * received 32768 MB (0 % mmap'ed) in 14.1157 s, 19.4732 Gbit
+ * cpu usage user:0.057 sys:7.815, 240.234 usec per MB, 65531 c-switches
+ * received 32768 MB (0 % mmap'ed) in 14.6833 s, 18.7204 Gbit
+ * cpu usage user:0.043 sys:8.103, 248.596 usec per MB, 65524 c-switches
+ * received 32768 MB (0 % mmap'ed) in 11.143 s, 24.6682 Gbit
+ * cpu usage user:0.044 sys:6.576, 202.026 usec per MB, 65519 c-switches
+ * received 32768 MB (0 % mmap'ed) in 14.9056 s, 18.4413 Gbit
+ * cpu usage user:0.036 sys:8.193, 251.129 usec per MB, 65530 c-switches
+ * $ kill %1 # kill tcp_mmap server
+ *
+ * $ ./tcp_mmap -s -z & # With mmap()
+ * $ for i in {1..4}; do ./tcp_mmap -H ::1 -z ; done
+ * received 32768 MB (99.9939 % mmap'ed) in 6.73792 s, 40.7956 Gbit
+ * cpu usage user:0.045 sys:2.827, 87.6465 usec per MB, 65532 c-switches
+ * received 32768 MB (99.9939 % mmap'ed) in 7.26732 s, 37.8238 Gbit
+ * cpu usage user:0.037 sys:3.087, 95.3369 usec per MB, 65532 c-switches
+ * received 32768 MB (99.9939 % mmap'ed) in 7.61661 s, 36.0893 Gbit
+ * cpu usage user:0.046 sys:3.559, 110.016 usec per MB, 65529 c-switches
+ * received 32768 MB (99.9939 % mmap'ed) in 7.43764 s, 36.9577 Gbit
+ * cpu usage user:0.035 sys:3.467, 106.873 usec per MB, 65530 c-switches
+ *
+ * License (GPLv2):
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <error.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/time.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <poll.h>
+#include <linux/tcp.h>
+#include <assert.h>
+
+#ifndef MSG_ZEROCOPY
+#define MSG_ZEROCOPY 0x4000000
+#endif
+
+#define FILE_SZ (1UL << 35)
+static int cfg_family = AF_INET6;
+static socklen_t cfg_alen = sizeof(struct sockaddr_in6);
+static int cfg_port = 8787;
+
+static int rcvbuf; /* Default: autotuning. Can be set with -r <integer> option */
+static int sndbuf; /* Default: autotuning. Can be set with -w <integer> option */
+static int zflg; /* zero copy option. (MSG_ZEROCOPY for sender, mmap() for receiver */
+static int xflg; /* hash received data (simple xor) (-h option) */
+static int keepflag; /* -k option: receiver shall keep all received file in memory (no munmap() calls) */
+
+static int chunk_size = 512*1024;
+
+unsigned long htotal;
+
+static inline void prefetch(const void *x)
+{
+#if defined(__x86_64__)
+ asm volatile("prefetcht0 %P0" : : "m" (*(const char *)x));
+#endif
+}
+
+void hash_zone(void *zone, unsigned int length)
+{
+ unsigned long temp = htotal;
+
+ while (length >= 8*sizeof(long)) {
+ prefetch(zone + 384);
+ temp ^= *(unsigned long *)zone;
+ temp ^= *(unsigned long *)(zone + sizeof(long));
+ temp ^= *(unsigned long *)(zone + 2*sizeof(long));
+ temp ^= *(unsigned long *)(zone + 3*sizeof(long));
+ temp ^= *(unsigned long *)(zone + 4*sizeof(long));
+ temp ^= *(unsigned long *)(zone + 5*sizeof(long));
+ temp ^= *(unsigned long *)(zone + 6*sizeof(long));
+ temp ^= *(unsigned long *)(zone + 7*sizeof(long));
+ zone += 8*sizeof(long);
+ length -= 8*sizeof(long);
+ }
+ while (length >= 1) {
+ temp ^= *(unsigned char *)zone;
+ zone += 1;
+ length--;
+ }
+ htotal = temp;
+}
+
+void *child_thread(void *arg)
+{
+ unsigned long total_mmap = 0, total = 0;
+ struct tcp_zerocopy_receive zc;
+ unsigned long delta_usec;
+ int flags = MAP_SHARED;
+ struct timeval t0, t1;
+ char *buffer = NULL;
+ void *addr = NULL;
+ double throughput;
+ struct rusage ru;
+ int lu, fd;
+
+ fd = (int)(unsigned long)arg;
+
+ gettimeofday(&t0, NULL);
+
+ fcntl(fd, F_SETFL, O_NDELAY);
+ buffer = malloc(chunk_size);
+ if (!buffer) {
+ perror("malloc");
+ goto error;
+ }
+ if (zflg) {
+ addr = mmap(NULL, chunk_size, PROT_READ, flags, fd, 0);
+ if (addr == (void *)-1)
+ zflg = 0;
+ }
+ while (1) {
+ struct pollfd pfd = { .fd = fd, .events = POLLIN, };
+ int sub;
+
+ poll(&pfd, 1, 10000);
+ if (zflg) {
+ socklen_t zc_len = sizeof(zc);
+ int res;
+
+ zc.address = (__u64)addr;
+ zc.length = chunk_size;
+ zc.recv_skip_hint = 0;
+ res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE,
+ &zc, &zc_len);
+ if (res == -1)
+ break;
+
+ if (zc.length) {
+ assert(zc.length <= chunk_size);
+ total_mmap += zc.length;
+ if (xflg)
+ hash_zone(addr, zc.length);
+ total += zc.length;
+ }
+ if (zc.recv_skip_hint) {
+ assert(zc.recv_skip_hint <= chunk_size);
+ lu = read(fd, buffer, zc.recv_skip_hint);
+ if (lu > 0) {
+ if (xflg)
+ hash_zone(buffer, lu);
+ total += lu;
+ }
+ }
+ continue;
+ }
+ sub = 0;
+ while (sub < chunk_size) {
+ lu = read(fd, buffer + sub, chunk_size - sub);
+ if (lu == 0)
+ goto end;
+ if (lu < 0)
+ break;
+ if (xflg)
+ hash_zone(buffer + sub, lu);
+ total += lu;
+ sub += lu;
+ }
+ }
+end:
+ gettimeofday(&t1, NULL);
+ delta_usec = (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec;
+
+ throughput = 0;
+ if (delta_usec)
+ throughput = total * 8.0 / (double)delta_usec / 1000.0;
+ getrusage(RUSAGE_THREAD, &ru);
+ if (total > 1024*1024) {
+ unsigned long total_usec;
+ unsigned long mb = total >> 20;
+ total_usec = 1000000*ru.ru_utime.tv_sec + ru.ru_utime.tv_usec +
+ 1000000*ru.ru_stime.tv_sec + ru.ru_stime.tv_usec;
+ printf("received %lg MB (%lg %% mmap'ed) in %lg s, %lg Gbit\n"
+ " cpu usage user:%lg sys:%lg, %lg usec per MB, %lu c-switches\n",
+ total / (1024.0 * 1024.0),
+ 100.0*total_mmap/total,
+ (double)delta_usec / 1000000.0,
+ throughput,
+ (double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec / 1000000.0,
+ (double)ru.ru_stime.tv_sec + (double)ru.ru_stime.tv_usec / 1000000.0,
+ (double)total_usec/mb,
+ ru.ru_nvcsw);
+ }
+error:
+ free(buffer);
+ close(fd);
+ if (zflg)
+ munmap(addr, chunk_size);
+ pthread_exit(0);
+}
+
+static void apply_rcvsnd_buf(int fd)
+{
+ if (rcvbuf && setsockopt(fd, SOL_SOCKET,
+ SO_RCVBUF, &rcvbuf, sizeof(rcvbuf)) == -1) {
+ perror("setsockopt SO_RCVBUF");
+ }
+
+ if (sndbuf && setsockopt(fd, SOL_SOCKET,
+ SO_SNDBUF, &sndbuf, sizeof(sndbuf)) == -1) {
+ perror("setsockopt SO_SNDBUF");
+ }
+}
+
+
+static void setup_sockaddr(int domain, const char *str_addr,
+ struct sockaddr_storage *sockaddr)
+{
+ struct sockaddr_in6 *addr6 = (void *) sockaddr;
+ struct sockaddr_in *addr4 = (void *) sockaddr;
+
+ switch (domain) {
+ case PF_INET:
+ memset(addr4, 0, sizeof(*addr4));
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = htons(cfg_port);
+ if (str_addr &&
+ inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+ error(1, 0, "ipv4 parse error: %s", str_addr);
+ break;
+ case PF_INET6:
+ memset(addr6, 0, sizeof(*addr6));
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = htons(cfg_port);
+ if (str_addr &&
+ inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+ error(1, 0, "ipv6 parse error: %s", str_addr);
+ break;
+ default:
+ error(1, 0, "illegal domain");
+ }
+}
+
+static void do_accept(int fdlisten)
+{
+ if (setsockopt(fdlisten, SOL_SOCKET, SO_RCVLOWAT,
+ &chunk_size, sizeof(chunk_size)) == -1) {
+ perror("setsockopt SO_RCVLOWAT");
+ }
+
+ apply_rcvsnd_buf(fdlisten);
+
+ while (1) {
+ struct sockaddr_in addr;
+ socklen_t addrlen = sizeof(addr);
+ pthread_t th;
+ int fd, res;
+
+ fd = accept(fdlisten, (struct sockaddr *)&addr, &addrlen);
+ if (fd == -1) {
+ perror("accept");
+ continue;
+ }
+ res = pthread_create(&th, NULL, child_thread,
+ (void *)(unsigned long)fd);
+ if (res) {
+ errno = res;
+ perror("pthread_create");
+ close(fd);
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ struct sockaddr_storage listenaddr, addr;
+ unsigned int max_pacing_rate = 0;
+ unsigned long total = 0;
+ char *host = NULL;
+ int fd, c, on = 1;
+ char *buffer;
+ int sflg = 0;
+ int mss = 0;
+
+ while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:")) != -1) {
+ switch (c) {
+ case '4':
+ cfg_family = PF_INET;
+ cfg_alen = sizeof(struct sockaddr_in);
+ break;
+ case '6':
+ cfg_family = PF_INET6;
+ cfg_alen = sizeof(struct sockaddr_in6);
+ break;
+ case 'p':
+ cfg_port = atoi(optarg);
+ break;
+ case 'H':
+ host = optarg;
+ break;
+ case 's': /* server : listen for incoming connections */
+ sflg++;
+ break;
+ case 'r':
+ rcvbuf = atoi(optarg);
+ break;
+ case 'w':
+ sndbuf = atoi(optarg);
+ break;
+ case 'z':
+ zflg = 1;
+ break;
+ case 'M':
+ mss = atoi(optarg);
+ break;
+ case 'x':
+ xflg = 1;
+ break;
+ case 'k':
+ keepflag = 1;
+ break;
+ case 'P':
+ max_pacing_rate = atoi(optarg) ;
+ break;
+ default:
+ exit(1);
+ }
+ }
+ if (sflg) {
+ int fdlisten = socket(cfg_family, SOCK_STREAM, 0);
+
+ if (fdlisten == -1) {
+ perror("socket");
+ exit(1);
+ }
+ apply_rcvsnd_buf(fdlisten);
+ setsockopt(fdlisten, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
+
+ setup_sockaddr(cfg_family, host, &listenaddr);
+
+ if (mss &&
+ setsockopt(fdlisten, IPPROTO_TCP, TCP_MAXSEG,
+ &mss, sizeof(mss)) == -1) {
+ perror("setsockopt TCP_MAXSEG");
+ exit(1);
+ }
+ if (bind(fdlisten, (const struct sockaddr *)&listenaddr, cfg_alen) == -1) {
+ perror("bind");
+ exit(1);
+ }
+ if (listen(fdlisten, 128) == -1) {
+ perror("listen");
+ exit(1);
+ }
+ do_accept(fdlisten);
+ }
+ buffer = mmap(NULL, chunk_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (buffer == (char *)-1) {
+ perror("mmap");
+ exit(1);
+ }
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd == -1) {
+ perror("socket");
+ exit(1);
+ }
+ apply_rcvsnd_buf(fd);
+
+ setup_sockaddr(cfg_family, host, &addr);
+
+ if (mss &&
+ setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == -1) {
+ perror("setsockopt TCP_MAXSEG");
+ exit(1);
+ }
+ if (connect(fd, (const struct sockaddr *)&addr, cfg_alen) == -1) {
+ perror("connect");
+ exit(1);
+ }
+ if (max_pacing_rate &&
+ setsockopt(fd, SOL_SOCKET, SO_MAX_PACING_RATE,
+ &max_pacing_rate, sizeof(max_pacing_rate)) == -1)
+ perror("setsockopt SO_MAX_PACING_RATE");
+
+ if (zflg && setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY,
+ &on, sizeof(on)) == -1) {
+ perror("setsockopt SO_ZEROCOPY, (-z option disabled)");
+ zflg = 0;
+ }
+ while (total < FILE_SZ) {
+ long wr = FILE_SZ - total;
+
+ if (wr > chunk_size)
+ wr = chunk_size;
+ /* Note : we just want to fill the pipe with 0 bytes */
+ wr = send(fd, buffer, wr, zflg ? MSG_ZEROCOPY : 0);
+ if (wr <= 0)
+ break;
+ total += wr;
+ }
+ close(fd);
+ munmap(buffer, chunk_size);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
new file mode 100644
index 000000000000..48a0592db938
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso.c
@@ -0,0 +1,620 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <stddef.h>
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <net/if.h>
+#include <linux/in.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <netinet/if_ether.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/udp.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#ifndef ETH_MAX_MTU
+#define ETH_MAX_MTU 0xFFFFU
+#endif
+
+#ifndef UDP_SEGMENT
+#define UDP_SEGMENT 103
+#endif
+
+#define CONST_MTU_TEST 1500
+
+#define CONST_HDRLEN_V4 (sizeof(struct iphdr) + sizeof(struct udphdr))
+#define CONST_HDRLEN_V6 (sizeof(struct ip6_hdr) + sizeof(struct udphdr))
+
+#define CONST_MSS_V4 (CONST_MTU_TEST - CONST_HDRLEN_V4)
+#define CONST_MSS_V6 (CONST_MTU_TEST - CONST_HDRLEN_V6)
+
+#define CONST_MAX_SEGS_V4 (ETH_MAX_MTU / CONST_MSS_V4)
+#define CONST_MAX_SEGS_V6 (ETH_MAX_MTU / CONST_MSS_V6)
+
+static bool cfg_do_ipv4;
+static bool cfg_do_ipv6;
+static bool cfg_do_connected;
+static bool cfg_do_connectionless;
+static bool cfg_do_msgmore;
+static bool cfg_do_setsockopt;
+static int cfg_specific_test_id = -1;
+
+static const char cfg_ifname[] = "lo";
+static unsigned short cfg_port = 9000;
+
+static char buf[ETH_MAX_MTU];
+
+struct testcase {
+ int tlen; /* send() buffer size, may exceed mss */
+ bool tfail; /* send() call is expected to fail */
+ int gso_len; /* mss after applying gso */
+ int r_num_mss; /* recv(): number of calls of full mss */
+ int r_len_last; /* recv(): size of last non-mss dgram, if any */
+};
+
+const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT;
+const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) };
+
+struct testcase testcases_v4[] = {
+ {
+ /* no GSO: send a single byte */
+ .tlen = 1,
+ .r_len_last = 1,
+ },
+ {
+ /* no GSO: send a single MSS */
+ .tlen = CONST_MSS_V4,
+ .r_num_mss = 1,
+ },
+ {
+ /* no GSO: send a single MSS + 1B: fail */
+ .tlen = CONST_MSS_V4 + 1,
+ .tfail = true,
+ },
+ {
+ /* send a single MSS: will fail with GSO, because the segment
+ * logic in udp4_ufo_fragment demands a gso skb to be > MTU
+ */
+ .tlen = CONST_MSS_V4,
+ .gso_len = CONST_MSS_V4,
+ .tfail = true,
+ .r_num_mss = 1,
+ },
+ {
+ /* send a single MSS + 1B */
+ .tlen = CONST_MSS_V4 + 1,
+ .gso_len = CONST_MSS_V4,
+ .r_num_mss = 1,
+ .r_len_last = 1,
+ },
+ {
+ /* send exactly 2 MSS */
+ .tlen = CONST_MSS_V4 * 2,
+ .gso_len = CONST_MSS_V4,
+ .r_num_mss = 2,
+ },
+ {
+ /* send 2 MSS + 1B */
+ .tlen = (CONST_MSS_V4 * 2) + 1,
+ .gso_len = CONST_MSS_V4,
+ .r_num_mss = 2,
+ .r_len_last = 1,
+ },
+ {
+ /* send MAX segs */
+ .tlen = (ETH_MAX_MTU / CONST_MSS_V4) * CONST_MSS_V4,
+ .gso_len = CONST_MSS_V4,
+ .r_num_mss = (ETH_MAX_MTU / CONST_MSS_V4),
+ },
+
+ {
+ /* send MAX bytes */
+ .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4,
+ .gso_len = CONST_MSS_V4,
+ .r_num_mss = CONST_MAX_SEGS_V4,
+ .r_len_last = ETH_MAX_MTU - CONST_HDRLEN_V4 -
+ (CONST_MAX_SEGS_V4 * CONST_MSS_V4),
+ },
+ {
+ /* send MAX + 1: fail */
+ .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4 + 1,
+ .gso_len = CONST_MSS_V4,
+ .tfail = true,
+ },
+ {
+ /* EOL */
+ }
+};
+
+#ifndef IP6_MAX_MTU
+#define IP6_MAX_MTU (ETH_MAX_MTU + sizeof(struct ip6_hdr))
+#endif
+
+struct testcase testcases_v6[] = {
+ {
+ /* no GSO: send a single byte */
+ .tlen = 1,
+ .r_len_last = 1,
+ },
+ {
+ /* no GSO: send a single MSS */
+ .tlen = CONST_MSS_V6,
+ .r_num_mss = 1,
+ },
+ {
+ /* no GSO: send a single MSS + 1B: fail */
+ .tlen = CONST_MSS_V6 + 1,
+ .tfail = true,
+ },
+ {
+ /* send a single MSS: will fail with GSO, because the segment
+ * logic in udp4_ufo_fragment demands a gso skb to be > MTU
+ */
+ .tlen = CONST_MSS_V6,
+ .gso_len = CONST_MSS_V6,
+ .tfail = true,
+ .r_num_mss = 1,
+ },
+ {
+ /* send a single MSS + 1B */
+ .tlen = CONST_MSS_V6 + 1,
+ .gso_len = CONST_MSS_V6,
+ .r_num_mss = 1,
+ .r_len_last = 1,
+ },
+ {
+ /* send exactly 2 MSS */
+ .tlen = CONST_MSS_V6 * 2,
+ .gso_len = CONST_MSS_V6,
+ .r_num_mss = 2,
+ },
+ {
+ /* send 2 MSS + 1B */
+ .tlen = (CONST_MSS_V6 * 2) + 1,
+ .gso_len = CONST_MSS_V6,
+ .r_num_mss = 2,
+ .r_len_last = 1,
+ },
+ {
+ /* send MAX segs */
+ .tlen = (IP6_MAX_MTU / CONST_MSS_V6) * CONST_MSS_V6,
+ .gso_len = CONST_MSS_V6,
+ .r_num_mss = (IP6_MAX_MTU / CONST_MSS_V6),
+ },
+
+ {
+ /* send MAX bytes */
+ .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6,
+ .gso_len = CONST_MSS_V6,
+ .r_num_mss = CONST_MAX_SEGS_V6,
+ .r_len_last = IP6_MAX_MTU - CONST_HDRLEN_V6 -
+ (CONST_MAX_SEGS_V6 * CONST_MSS_V6),
+ },
+ {
+ /* send MAX + 1: fail */
+ .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6 + 1,
+ .gso_len = CONST_MSS_V6,
+ .tfail = true,
+ },
+ {
+ /* EOL */
+ }
+};
+
+static unsigned int get_device_mtu(int fd, const char *ifname)
+{
+ struct ifreq ifr;
+
+ memset(&ifr, 0, sizeof(ifr));
+
+ strcpy(ifr.ifr_name, ifname);
+
+ if (ioctl(fd, SIOCGIFMTU, &ifr))
+ error(1, errno, "ioctl get mtu");
+
+ return ifr.ifr_mtu;
+}
+
+static void __set_device_mtu(int fd, const char *ifname, unsigned int mtu)
+{
+ struct ifreq ifr;
+
+ memset(&ifr, 0, sizeof(ifr));
+
+ ifr.ifr_mtu = mtu;
+ strcpy(ifr.ifr_name, ifname);
+
+ if (ioctl(fd, SIOCSIFMTU, &ifr))
+ error(1, errno, "ioctl set mtu");
+}
+
+static void set_device_mtu(int fd, int mtu)
+{
+ int val;
+
+ val = get_device_mtu(fd, cfg_ifname);
+ fprintf(stderr, "device mtu (orig): %u\n", val);
+
+ __set_device_mtu(fd, cfg_ifname, mtu);
+ val = get_device_mtu(fd, cfg_ifname);
+ if (val != mtu)
+ error(1, 0, "unable to set device mtu to %u\n", val);
+
+ fprintf(stderr, "device mtu (test): %u\n", val);
+}
+
+static void set_pmtu_discover(int fd, bool is_ipv4)
+{
+ int level, name, val;
+
+ if (is_ipv4) {
+ level = SOL_IP;
+ name = IP_MTU_DISCOVER;
+ val = IP_PMTUDISC_DO;
+ } else {
+ level = SOL_IPV6;
+ name = IPV6_MTU_DISCOVER;
+ val = IPV6_PMTUDISC_DO;
+ }
+
+ if (setsockopt(fd, level, name, &val, sizeof(val)))
+ error(1, errno, "setsockopt path mtu");
+}
+
+static unsigned int get_path_mtu(int fd, bool is_ipv4)
+{
+ socklen_t vallen;
+ unsigned int mtu;
+ int ret;
+
+ vallen = sizeof(mtu);
+ if (is_ipv4)
+ ret = getsockopt(fd, SOL_IP, IP_MTU, &mtu, &vallen);
+ else
+ ret = getsockopt(fd, SOL_IPV6, IPV6_MTU, &mtu, &vallen);
+
+ if (ret)
+ error(1, errno, "getsockopt mtu");
+
+
+ fprintf(stderr, "path mtu (read): %u\n", mtu);
+ return mtu;
+}
+
+/* very wordy version of system("ip route add dev lo mtu 1500 127.0.0.3/32") */
+static void set_route_mtu(int mtu, bool is_ipv4)
+{
+ struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
+ struct nlmsghdr *nh;
+ struct rtattr *rta;
+ struct rtmsg *rt;
+ char data[NLMSG_ALIGN(sizeof(*nh)) +
+ NLMSG_ALIGN(sizeof(*rt)) +
+ NLMSG_ALIGN(RTA_LENGTH(sizeof(addr6))) +
+ NLMSG_ALIGN(RTA_LENGTH(sizeof(int))) +
+ NLMSG_ALIGN(RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)))];
+ int fd, ret, alen, off = 0;
+
+ alen = is_ipv4 ? sizeof(addr4) : sizeof(addr6);
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (fd == -1)
+ error(1, errno, "socket netlink");
+
+ memset(data, 0, sizeof(data));
+
+ nh = (void *)data;
+ nh->nlmsg_type = RTM_NEWROUTE;
+ nh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE;
+ off += NLMSG_ALIGN(sizeof(*nh));
+
+ rt = (void *)(data + off);
+ rt->rtm_family = is_ipv4 ? AF_INET : AF_INET6;
+ rt->rtm_table = RT_TABLE_MAIN;
+ rt->rtm_dst_len = alen << 3;
+ rt->rtm_protocol = RTPROT_BOOT;
+ rt->rtm_scope = RT_SCOPE_UNIVERSE;
+ rt->rtm_type = RTN_UNICAST;
+ off += NLMSG_ALIGN(sizeof(*rt));
+
+ rta = (void *)(data + off);
+ rta->rta_type = RTA_DST;
+ rta->rta_len = RTA_LENGTH(alen);
+ if (is_ipv4)
+ memcpy(RTA_DATA(rta), &addr4, alen);
+ else
+ memcpy(RTA_DATA(rta), &addr6, alen);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ rta = (void *)(data + off);
+ rta->rta_type = RTA_OIF;
+ rta->rta_len = RTA_LENGTH(sizeof(int));
+ *((int *)(RTA_DATA(rta))) = 1; //if_nametoindex("lo");
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ /* MTU is a subtype in a metrics type */
+ rta = (void *)(data + off);
+ rta->rta_type = RTA_METRICS;
+ rta->rta_len = RTA_LENGTH(0) + RTA_LENGTH(sizeof(int));
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ /* now fill MTU subtype. Note that it fits within above rta_len */
+ rta = (void *)(((char *) rta) + RTA_LENGTH(0));
+ rta->rta_type = RTAX_MTU;
+ rta->rta_len = RTA_LENGTH(sizeof(int));
+ *((int *)(RTA_DATA(rta))) = mtu;
+
+ nh->nlmsg_len = off;
+
+ ret = sendto(fd, data, off, 0, (void *)&nladdr, sizeof(nladdr));
+ if (ret != off)
+ error(1, errno, "send netlink: %uB != %uB\n", ret, off);
+
+ if (close(fd))
+ error(1, errno, "close netlink");
+
+ fprintf(stderr, "route mtu (test): %u\n", mtu);
+}
+
+static bool __send_one(int fd, struct msghdr *msg, int flags)
+{
+ int ret;
+
+ ret = sendmsg(fd, msg, flags);
+ if (ret == -1 && (errno == EMSGSIZE || errno == ENOMEM))
+ return false;
+ if (ret == -1)
+ error(1, errno, "sendmsg");
+ if (ret != msg->msg_iov->iov_len)
+ error(1, 0, "sendto: %d != %lu", ret, msg->msg_iov->iov_len);
+ if (msg->msg_flags)
+ error(1, 0, "sendmsg: return flags 0x%x\n", msg->msg_flags);
+
+ return true;
+}
+
+static bool send_one(int fd, int len, int gso_len,
+ struct sockaddr *addr, socklen_t alen)
+{
+ char control[CMSG_SPACE(sizeof(uint16_t))] = {0};
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ struct cmsghdr *cm;
+
+ iov.iov_base = buf;
+ iov.iov_len = len;
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ msg.msg_name = addr;
+ msg.msg_namelen = alen;
+
+ if (gso_len && !cfg_do_setsockopt) {
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ cm = CMSG_FIRSTHDR(&msg);
+ cm->cmsg_level = SOL_UDP;
+ cm->cmsg_type = UDP_SEGMENT;
+ cm->cmsg_len = CMSG_LEN(sizeof(uint16_t));
+ *((uint16_t *) CMSG_DATA(cm)) = gso_len;
+ }
+
+ /* If MSG_MORE, send 1 byte followed by remainder */
+ if (cfg_do_msgmore && len > 1) {
+ iov.iov_len = 1;
+ if (!__send_one(fd, &msg, MSG_MORE))
+ error(1, 0, "send 1B failed");
+
+ iov.iov_base++;
+ iov.iov_len = len - 1;
+ }
+
+ return __send_one(fd, &msg, 0);
+}
+
+static int recv_one(int fd, int flags)
+{
+ int ret;
+
+ ret = recv(fd, buf, sizeof(buf), flags);
+ if (ret == -1 && errno == EAGAIN && (flags & MSG_DONTWAIT))
+ return 0;
+ if (ret == -1)
+ error(1, errno, "recv");
+
+ return ret;
+}
+
+static void run_one(struct testcase *test, int fdt, int fdr,
+ struct sockaddr *addr, socklen_t alen)
+{
+ int i, ret, val, mss;
+ bool sent;
+
+ fprintf(stderr, "ipv%d tx:%d gso:%d %s\n",
+ addr->sa_family == AF_INET ? 4 : 6,
+ test->tlen, test->gso_len,
+ test->tfail ? "(fail)" : "");
+
+ val = test->gso_len;
+ if (cfg_do_setsockopt) {
+ if (setsockopt(fdt, SOL_UDP, UDP_SEGMENT, &val, sizeof(val)))
+ error(1, errno, "setsockopt udp segment");
+ }
+
+ sent = send_one(fdt, test->tlen, test->gso_len, addr, alen);
+ if (sent && test->tfail)
+ error(1, 0, "send succeeded while expecting failure");
+ if (!sent && !test->tfail)
+ error(1, 0, "send failed while expecting success");
+ if (!sent)
+ return;
+
+ mss = addr->sa_family == AF_INET ? CONST_MSS_V4 : CONST_MSS_V6;
+
+ /* Recv all full MSS datagrams */
+ for (i = 0; i < test->r_num_mss; i++) {
+ ret = recv_one(fdr, 0);
+ if (ret != mss)
+ error(1, 0, "recv.%d: %d != %d", i, ret, mss);
+ }
+
+ /* Recv the non-full last datagram, if tlen was not a multiple of mss */
+ if (test->r_len_last) {
+ ret = recv_one(fdr, 0);
+ if (ret != test->r_len_last)
+ error(1, 0, "recv.%d: %d != %d (last)",
+ i, ret, test->r_len_last);
+ }
+
+ /* Verify received all data */
+ ret = recv_one(fdr, MSG_DONTWAIT);
+ if (ret)
+ error(1, 0, "recv: unexpected datagram");
+}
+
+static void run_all(int fdt, int fdr, struct sockaddr *addr, socklen_t alen)
+{
+ struct testcase *tests, *test;
+
+ tests = addr->sa_family == AF_INET ? testcases_v4 : testcases_v6;
+
+ for (test = tests; test->tlen; test++) {
+ /* if a specific test is given, then skip all others */
+ if (cfg_specific_test_id == -1 ||
+ cfg_specific_test_id == test - tests)
+ run_one(test, fdt, fdr, addr, alen);
+ }
+}
+
+static void run_test(struct sockaddr *addr, socklen_t alen)
+{
+ struct timeval tv = { .tv_usec = 100 * 1000 };
+ int fdr, fdt, val;
+
+ fdr = socket(addr->sa_family, SOCK_DGRAM, 0);
+ if (fdr == -1)
+ error(1, errno, "socket r");
+
+ if (bind(fdr, addr, alen))
+ error(1, errno, "bind");
+
+ /* Have tests fail quickly instead of hang */
+ if (setsockopt(fdr, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+ error(1, errno, "setsockopt rcv timeout");
+
+ fdt = socket(addr->sa_family, SOCK_DGRAM, 0);
+ if (fdt == -1)
+ error(1, errno, "socket t");
+
+ /* Do not fragment these datagrams: only succeed if GSO works */
+ set_pmtu_discover(fdt, addr->sa_family == AF_INET);
+
+ if (cfg_do_connectionless) {
+ set_device_mtu(fdt, CONST_MTU_TEST);
+ run_all(fdt, fdr, addr, alen);
+ }
+
+ if (cfg_do_connected) {
+ set_device_mtu(fdt, CONST_MTU_TEST + 100);
+ set_route_mtu(CONST_MTU_TEST, addr->sa_family == AF_INET);
+
+ if (connect(fdt, addr, alen))
+ error(1, errno, "connect");
+
+ val = get_path_mtu(fdt, addr->sa_family == AF_INET);
+ if (val != CONST_MTU_TEST)
+ error(1, 0, "bad path mtu %u\n", val);
+
+ run_all(fdt, fdr, addr, 0 /* use connected addr */);
+ }
+
+ if (close(fdt))
+ error(1, errno, "close t");
+ if (close(fdr))
+ error(1, errno, "close r");
+}
+
+static void run_test_v4(void)
+{
+ struct sockaddr_in addr = {0};
+
+ addr.sin_family = AF_INET;
+ addr.sin_port = htons(cfg_port);
+ addr.sin_addr = addr4;
+
+ run_test((void *)&addr, sizeof(addr));
+}
+
+static void run_test_v6(void)
+{
+ struct sockaddr_in6 addr = {0};
+
+ addr.sin6_family = AF_INET6;
+ addr.sin6_port = htons(cfg_port);
+ addr.sin6_addr = addr6;
+
+ run_test((void *)&addr, sizeof(addr));
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "46cCmst:")) != -1) {
+ switch (c) {
+ case '4':
+ cfg_do_ipv4 = true;
+ break;
+ case '6':
+ cfg_do_ipv6 = true;
+ break;
+ case 'c':
+ cfg_do_connected = true;
+ break;
+ case 'C':
+ cfg_do_connectionless = true;
+ break;
+ case 'm':
+ cfg_do_msgmore = true;
+ break;
+ case 's':
+ cfg_do_setsockopt = true;
+ break;
+ case 't':
+ cfg_specific_test_id = strtoul(optarg, NULL, 0);
+ break;
+ default:
+ error(1, 0, "%s: parse error", argv[0]);
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ parse_opts(argc, argv);
+
+ if (cfg_do_ipv4)
+ run_test_v4();
+ if (cfg_do_ipv6)
+ run_test_v6();
+
+ fprintf(stderr, "OK\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/net/udpgso.sh b/tools/testing/selftests/net/udpgso.sh
new file mode 100755
index 000000000000..fec24f584fe9
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of udpgso regression tests
+
+echo "ipv4 cmsg"
+./in_netns.sh ./udpgso -4 -C
+
+echo "ipv4 setsockopt"
+./in_netns.sh ./udpgso -4 -C -s
+
+echo "ipv6 cmsg"
+./in_netns.sh ./udpgso -6 -C
+
+echo "ipv6 setsockopt"
+./in_netns.sh ./udpgso -6 -C -s
+
+echo "ipv4 connected"
+./in_netns.sh ./udpgso -4 -c
+
+# blocked on 2nd loopback address
+# echo "ipv6 connected"
+# ./in_netns.sh ./udpgso -6 -c
+
+echo "ipv4 msg_more"
+./in_netns.sh ./udpgso -4 -C -m
+
+echo "ipv6 msg_more"
+./in_netns.sh ./udpgso -6 -C -m
diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh
new file mode 100755
index 000000000000..792fa4d0285e
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso_bench.sh
@@ -0,0 +1,74 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of udpgso benchmarks
+
+wake_children() {
+ local -r jobs="$(jobs -p)"
+
+ if [[ "${jobs}" != "" ]]; then
+ kill -1 ${jobs} 2>/dev/null
+ fi
+}
+trap wake_children EXIT
+
+run_one() {
+ local -r args=$@
+
+ ./udpgso_bench_rx &
+ ./udpgso_bench_rx -t &
+
+ ./udpgso_bench_tx ${args}
+}
+
+run_in_netns() {
+ local -r args=$@
+
+ ./in_netns.sh $0 __subprocess ${args}
+}
+
+run_udp() {
+ local -r args=$@
+
+ echo "udp"
+ run_in_netns ${args}
+
+ echo "udp gso"
+ run_in_netns ${args} -S
+
+ echo "udp gso zerocopy"
+ run_in_netns ${args} -S -z
+}
+
+run_tcp() {
+ local -r args=$@
+
+ echo "tcp"
+ run_in_netns ${args} -t
+
+ echo "tcp zerocopy"
+ run_in_netns ${args} -t -z
+}
+
+run_all() {
+ local -r core_args="-l 4"
+ local -r ipv4_args="${core_args} -4 -D 127.0.0.1"
+ local -r ipv6_args="${core_args} -6 -D ::1"
+
+ echo "ipv4"
+ run_tcp "${ipv4_args}"
+ run_udp "${ipv4_args}"
+
+ echo "ipv6"
+ run_tcp "${ipv4_args}"
+ run_udp "${ipv6_args}"
+}
+
+if [[ $# -eq 0 ]]; then
+ run_all
+elif [[ $1 == "__subprocess" ]]; then
+ shift
+ run_one $@
+else
+ run_in_netns $@
+fi
diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c
new file mode 100644
index 000000000000..727cf67a3f75
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso_bench_rx.c
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/errqueue.h>
+#include <linux/if_packet.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+static int cfg_port = 8000;
+static bool cfg_tcp;
+static bool cfg_verify;
+
+static bool interrupted;
+static unsigned long packets, bytes;
+
+static void sigint_handler(int signum)
+{
+ if (signum == SIGINT)
+ interrupted = true;
+}
+
+static unsigned long gettimeofday_ms(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static void do_poll(int fd)
+{
+ struct pollfd pfd;
+ int ret;
+
+ pfd.events = POLLIN;
+ pfd.revents = 0;
+ pfd.fd = fd;
+
+ do {
+ ret = poll(&pfd, 1, 10);
+ if (ret == -1)
+ error(1, errno, "poll");
+ if (ret == 0)
+ continue;
+ if (pfd.revents != POLLIN)
+ error(1, errno, "poll: 0x%x expected 0x%x\n",
+ pfd.revents, POLLIN);
+ } while (!ret && !interrupted);
+}
+
+static int do_socket(bool do_tcp)
+{
+ struct sockaddr_in6 addr = {0};
+ int fd, val;
+
+ fd = socket(PF_INET6, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket");
+
+ val = 1 << 21;
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val)))
+ error(1, errno, "setsockopt rcvbuf");
+ val = 1;
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)))
+ error(1, errno, "setsockopt reuseport");
+
+ addr.sin6_family = PF_INET6;
+ addr.sin6_port = htons(cfg_port);
+ addr.sin6_addr = in6addr_any;
+ if (bind(fd, (void *) &addr, sizeof(addr)))
+ error(1, errno, "bind");
+
+ if (do_tcp) {
+ int accept_fd = fd;
+
+ if (listen(accept_fd, 1))
+ error(1, errno, "listen");
+
+ do_poll(accept_fd);
+
+ fd = accept(accept_fd, NULL, NULL);
+ if (fd == -1)
+ error(1, errno, "accept");
+ if (close(accept_fd))
+ error(1, errno, "close accept fd");
+ }
+
+ return fd;
+}
+
+/* Flush all outstanding bytes for the tcp receive queue */
+static void do_flush_tcp(int fd)
+{
+ int ret;
+
+ while (true) {
+ /* MSG_TRUNC flushes up to len bytes */
+ ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT);
+ if (ret == -1 && errno == EAGAIN)
+ return;
+ if (ret == -1)
+ error(1, errno, "flush");
+ if (ret == 0) {
+ /* client detached */
+ exit(0);
+ }
+
+ packets++;
+ bytes += ret;
+ }
+
+}
+
+static char sanitized_char(char val)
+{
+ return (val >= 'a' && val <= 'z') ? val : '.';
+}
+
+static void do_verify_udp(const char *data, int len)
+{
+ char cur = data[0];
+ int i;
+
+ /* verify contents */
+ if (cur < 'a' || cur > 'z')
+ error(1, 0, "data initial byte out of range");
+
+ for (i = 1; i < len; i++) {
+ if (cur == 'z')
+ cur = 'a';
+ else
+ cur++;
+
+ if (data[i] != cur)
+ error(1, 0, "data[%d]: len %d, %c(%hhu) != %c(%hhu)\n",
+ i, len,
+ sanitized_char(data[i]), data[i],
+ sanitized_char(cur), cur);
+ }
+}
+
+/* Flush all outstanding datagrams. Verify first few bytes of each. */
+static void do_flush_udp(int fd)
+{
+ static char rbuf[ETH_DATA_LEN];
+ int ret, len, budget = 256;
+
+ len = cfg_verify ? sizeof(rbuf) : 0;
+ while (budget--) {
+ /* MSG_TRUNC will make return value full datagram length */
+ ret = recv(fd, rbuf, len, MSG_TRUNC | MSG_DONTWAIT);
+ if (ret == -1 && errno == EAGAIN)
+ return;
+ if (ret == -1)
+ error(1, errno, "recv");
+ if (len) {
+ if (ret == 0)
+ error(1, errno, "recv: 0 byte datagram\n");
+
+ do_verify_udp(rbuf, ret);
+ }
+
+ packets++;
+ bytes += ret;
+ }
+}
+
+static void usage(const char *filepath)
+{
+ error(1, 0, "Usage: %s [-tv] [-p port]", filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "ptv")) != -1) {
+ switch (c) {
+ case 'p':
+ cfg_port = htons(strtoul(optarg, NULL, 0));
+ break;
+ case 't':
+ cfg_tcp = true;
+ break;
+ case 'v':
+ cfg_verify = true;
+ break;
+ }
+ }
+
+ if (optind != argc)
+ usage(argv[0]);
+
+ if (cfg_tcp && cfg_verify)
+ error(1, 0, "TODO: implement verify mode for tcp");
+}
+
+static void do_recv(void)
+{
+ unsigned long tnow, treport;
+ int fd;
+
+ fd = do_socket(cfg_tcp);
+
+ treport = gettimeofday_ms() + 1000;
+ do {
+ do_poll(fd);
+
+ if (cfg_tcp)
+ do_flush_tcp(fd);
+ else
+ do_flush_udp(fd);
+
+ tnow = gettimeofday_ms();
+ if (tnow > treport) {
+ if (packets)
+ fprintf(stderr,
+ "%s rx: %6lu MB/s %8lu calls/s\n",
+ cfg_tcp ? "tcp" : "udp",
+ bytes >> 20, packets);
+ bytes = packets = 0;
+ treport = tnow + 1000;
+ }
+
+ } while (!interrupted);
+
+ if (close(fd))
+ error(1, errno, "close");
+}
+
+int main(int argc, char **argv)
+{
+ parse_opts(argc, argv);
+
+ signal(SIGINT, sigint_handler);
+
+ do_recv();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c
new file mode 100644
index 000000000000..e821564053cf
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso_bench_tx.c
@@ -0,0 +1,420 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <netinet/if_ether.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#ifndef ETH_MAX_MTU
+#define ETH_MAX_MTU 0xFFFFU
+#endif
+
+#ifndef UDP_SEGMENT
+#define UDP_SEGMENT 103
+#endif
+
+#ifndef SO_ZEROCOPY
+#define SO_ZEROCOPY 60
+#endif
+
+#ifndef MSG_ZEROCOPY
+#define MSG_ZEROCOPY 0x4000000
+#endif
+
+#define NUM_PKT 100
+
+static bool cfg_cache_trash;
+static int cfg_cpu = -1;
+static int cfg_connected = true;
+static int cfg_family = PF_UNSPEC;
+static uint16_t cfg_mss;
+static int cfg_payload_len = (1472 * 42);
+static int cfg_port = 8000;
+static int cfg_runtime_ms = -1;
+static bool cfg_segment;
+static bool cfg_sendmmsg;
+static bool cfg_tcp;
+static bool cfg_zerocopy;
+
+static socklen_t cfg_alen;
+static struct sockaddr_storage cfg_dst_addr;
+
+static bool interrupted;
+static char buf[NUM_PKT][ETH_MAX_MTU];
+
+static void sigint_handler(int signum)
+{
+ if (signum == SIGINT)
+ interrupted = true;
+}
+
+static unsigned long gettimeofday_ms(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static int set_cpu(int cpu)
+{
+ cpu_set_t mask;
+
+ CPU_ZERO(&mask);
+ CPU_SET(cpu, &mask);
+ if (sched_setaffinity(0, sizeof(mask), &mask))
+ error(1, 0, "setaffinity %d", cpu);
+
+ return 0;
+}
+
+static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr)
+{
+ struct sockaddr_in6 *addr6 = (void *) sockaddr;
+ struct sockaddr_in *addr4 = (void *) sockaddr;
+
+ switch (domain) {
+ case PF_INET:
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = htons(cfg_port);
+ if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+ error(1, 0, "ipv4 parse error: %s", str_addr);
+ break;
+ case PF_INET6:
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = htons(cfg_port);
+ if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+ error(1, 0, "ipv6 parse error: %s", str_addr);
+ break;
+ default:
+ error(1, 0, "illegal domain");
+ }
+}
+
+static void flush_zerocopy(int fd)
+{
+ struct msghdr msg = {0}; /* flush */
+ int ret;
+
+ while (1) {
+ ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
+ if (ret == -1 && errno == EAGAIN)
+ break;
+ if (ret == -1)
+ error(1, errno, "errqueue");
+ if (msg.msg_flags != (MSG_ERRQUEUE | MSG_CTRUNC))
+ error(1, 0, "errqueue: flags 0x%x\n", msg.msg_flags);
+ msg.msg_flags = 0;
+ }
+}
+
+static int send_tcp(int fd, char *data)
+{
+ int ret, done = 0, count = 0;
+
+ while (done < cfg_payload_len) {
+ ret = send(fd, data + done, cfg_payload_len - done,
+ cfg_zerocopy ? MSG_ZEROCOPY : 0);
+ if (ret == -1)
+ error(1, errno, "write");
+
+ done += ret;
+ count++;
+ }
+
+ return count;
+}
+
+static int send_udp(int fd, char *data)
+{
+ int ret, total_len, len, count = 0;
+
+ total_len = cfg_payload_len;
+
+ while (total_len) {
+ len = total_len < cfg_mss ? total_len : cfg_mss;
+
+ ret = sendto(fd, data, len, cfg_zerocopy ? MSG_ZEROCOPY : 0,
+ cfg_connected ? NULL : (void *)&cfg_dst_addr,
+ cfg_connected ? 0 : cfg_alen);
+ if (ret == -1)
+ error(1, errno, "write");
+ if (ret != len)
+ error(1, errno, "write: %uB != %uB\n", ret, len);
+
+ total_len -= len;
+ count++;
+ }
+
+ return count;
+}
+
+static int send_udp_sendmmsg(int fd, char *data)
+{
+ const int max_nr_msg = ETH_MAX_MTU / ETH_DATA_LEN;
+ struct mmsghdr mmsgs[max_nr_msg];
+ struct iovec iov[max_nr_msg];
+ unsigned int off = 0, left;
+ int i = 0, ret;
+
+ memset(mmsgs, 0, sizeof(mmsgs));
+
+ left = cfg_payload_len;
+ while (left) {
+ if (i == max_nr_msg)
+ error(1, 0, "sendmmsg: exceeds max_nr_msg");
+
+ iov[i].iov_base = data + off;
+ iov[i].iov_len = cfg_mss < left ? cfg_mss : left;
+
+ mmsgs[i].msg_hdr.msg_iov = iov + i;
+ mmsgs[i].msg_hdr.msg_iovlen = 1;
+
+ off += iov[i].iov_len;
+ left -= iov[i].iov_len;
+ i++;
+ }
+
+ ret = sendmmsg(fd, mmsgs, i, cfg_zerocopy ? MSG_ZEROCOPY : 0);
+ if (ret == -1)
+ error(1, errno, "sendmmsg");
+
+ return ret;
+}
+
+static void send_udp_segment_cmsg(struct cmsghdr *cm)
+{
+ uint16_t *valp;
+
+ cm->cmsg_level = SOL_UDP;
+ cm->cmsg_type = UDP_SEGMENT;
+ cm->cmsg_len = CMSG_LEN(sizeof(cfg_mss));
+ valp = (void *)CMSG_DATA(cm);
+ *valp = cfg_mss;
+}
+
+static int send_udp_segment(int fd, char *data)
+{
+ char control[CMSG_SPACE(sizeof(cfg_mss))] = {0};
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ int ret;
+
+ iov.iov_base = data;
+ iov.iov_len = cfg_payload_len;
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+ send_udp_segment_cmsg(CMSG_FIRSTHDR(&msg));
+
+ msg.msg_name = (void *)&cfg_dst_addr;
+ msg.msg_namelen = cfg_alen;
+
+ ret = sendmsg(fd, &msg, cfg_zerocopy ? MSG_ZEROCOPY : 0);
+ if (ret == -1)
+ error(1, errno, "sendmsg");
+ if (ret != iov.iov_len)
+ error(1, 0, "sendmsg: %u != %lu\n", ret, iov.iov_len);
+
+ return 1;
+}
+
+static void usage(const char *filepath)
+{
+ error(1, 0, "Usage: %s [-46cmStuz] [-C cpu] [-D dst ip] [-l secs] [-p port] [-s sendsize]",
+ filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int max_len, hdrlen;
+ int c;
+
+ while ((c = getopt(argc, argv, "46cC:D:l:mp:s:Stuz")) != -1) {
+ switch (c) {
+ case '4':
+ if (cfg_family != PF_UNSPEC)
+ error(1, 0, "Pass one of -4 or -6");
+ cfg_family = PF_INET;
+ cfg_alen = sizeof(struct sockaddr_in);
+ break;
+ case '6':
+ if (cfg_family != PF_UNSPEC)
+ error(1, 0, "Pass one of -4 or -6");
+ cfg_family = PF_INET6;
+ cfg_alen = sizeof(struct sockaddr_in6);
+ break;
+ case 'c':
+ cfg_cache_trash = true;
+ break;
+ case 'C':
+ cfg_cpu = strtol(optarg, NULL, 0);
+ break;
+ case 'D':
+ setup_sockaddr(cfg_family, optarg, &cfg_dst_addr);
+ break;
+ case 'l':
+ cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000;
+ break;
+ case 'm':
+ cfg_sendmmsg = true;
+ break;
+ case 'p':
+ cfg_port = strtoul(optarg, NULL, 0);
+ break;
+ case 's':
+ cfg_payload_len = strtoul(optarg, NULL, 0);
+ break;
+ case 'S':
+ cfg_segment = true;
+ break;
+ case 't':
+ cfg_tcp = true;
+ break;
+ case 'u':
+ cfg_connected = false;
+ break;
+ case 'z':
+ cfg_zerocopy = true;
+ break;
+ }
+ }
+
+ if (optind != argc)
+ usage(argv[0]);
+
+ if (cfg_family == PF_UNSPEC)
+ error(1, 0, "must pass one of -4 or -6");
+ if (cfg_tcp && !cfg_connected)
+ error(1, 0, "connectionless tcp makes no sense");
+ if (cfg_segment && cfg_sendmmsg)
+ error(1, 0, "cannot combine segment offload and sendmmsg");
+
+ if (cfg_family == PF_INET)
+ hdrlen = sizeof(struct iphdr) + sizeof(struct udphdr);
+ else
+ hdrlen = sizeof(struct ip6_hdr) + sizeof(struct udphdr);
+
+ cfg_mss = ETH_DATA_LEN - hdrlen;
+ max_len = ETH_MAX_MTU - hdrlen;
+
+ if (cfg_payload_len > max_len)
+ error(1, 0, "payload length %u exceeds max %u",
+ cfg_payload_len, max_len);
+}
+
+static void set_pmtu_discover(int fd, bool is_ipv4)
+{
+ int level, name, val;
+
+ if (is_ipv4) {
+ level = SOL_IP;
+ name = IP_MTU_DISCOVER;
+ val = IP_PMTUDISC_DO;
+ } else {
+ level = SOL_IPV6;
+ name = IPV6_MTU_DISCOVER;
+ val = IPV6_PMTUDISC_DO;
+ }
+
+ if (setsockopt(fd, level, name, &val, sizeof(val)))
+ error(1, errno, "setsockopt path mtu");
+}
+
+int main(int argc, char **argv)
+{
+ unsigned long num_msgs, num_sends;
+ unsigned long tnow, treport, tstop;
+ int fd, i, val;
+
+ parse_opts(argc, argv);
+
+ if (cfg_cpu > 0)
+ set_cpu(cfg_cpu);
+
+ for (i = 0; i < sizeof(buf[0]); i++)
+ buf[0][i] = 'a' + (i % 26);
+ for (i = 1; i < NUM_PKT; i++)
+ memcpy(buf[i], buf[0], sizeof(buf[0]));
+
+ signal(SIGINT, sigint_handler);
+
+ fd = socket(cfg_family, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket");
+
+ if (cfg_zerocopy) {
+ val = 1;
+ if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val)))
+ error(1, errno, "setsockopt zerocopy");
+ }
+
+ if (cfg_connected &&
+ connect(fd, (void *)&cfg_dst_addr, cfg_alen))
+ error(1, errno, "connect");
+
+ if (cfg_segment)
+ set_pmtu_discover(fd, cfg_family == PF_INET);
+
+ num_msgs = num_sends = 0;
+ tnow = gettimeofday_ms();
+ tstop = tnow + cfg_runtime_ms;
+ treport = tnow + 1000;
+
+ i = 0;
+ do {
+ if (cfg_tcp)
+ num_sends += send_tcp(fd, buf[i]);
+ else if (cfg_segment)
+ num_sends += send_udp_segment(fd, buf[i]);
+ else if (cfg_sendmmsg)
+ num_sends += send_udp_sendmmsg(fd, buf[i]);
+ else
+ num_sends += send_udp(fd, buf[i]);
+ num_msgs++;
+
+ if (cfg_zerocopy && ((num_msgs & 0xF) == 0))
+ flush_zerocopy(fd);
+
+ tnow = gettimeofday_ms();
+ if (tnow > treport) {
+ fprintf(stderr,
+ "%s tx: %6lu MB/s %8lu calls/s %6lu msg/s\n",
+ cfg_tcp ? "tcp" : "udp",
+ (num_msgs * cfg_payload_len) >> 20,
+ num_sends, num_msgs);
+ num_msgs = num_sends = 0;
+ treport = tnow + 1000;
+ }
+
+ /* cold cache when writing buffer */
+ if (cfg_cache_trash)
+ i = ++i < NUM_PKT ? i : 0;
+
+ } while (!interrupted && (cfg_runtime_ms == -1 || tnow < tstop));
+
+ if (close(fd))
+ error(1, errno, "close");
+
+ return 0;
+}
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
index 93cf8fea8ae7..3a2f51fc7fd4 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
@@ -398,13 +398,83 @@
255
]
],
- "cmdUnderTest": "for i in `seq 1 32`; do cmd=\"action csum tcp continue index $i \"; args=\"$args$cmd\"; done && $TC actions add $args",
- "expExitCode": "255",
+ "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
+ "expExitCode": "0",
"verifyCmd": "$TC actions ls action csum",
"matchPattern": "^[ \t]+index [0-9]* ref",
"matchCount": "32",
"teardown": [
"$TC actions flush action csum"
]
+ },
+ {
+ "id": "b4e9",
+ "name": "Delete batch of 32 csum actions",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ],
+ "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\""
+ ],
+ "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action csum",
+ "matchPattern": "^[ \t]+index [0-9]+ ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "0015",
+ "name": "Add batch of 32 csum tcp actions with large cookies",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i cookie aaabbbcccdddeee \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action csum",
+ "matchPattern": "^[ \t]+index [0-9]* ref",
+ "matchCount": "32",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "989e",
+ "name": "Delete batch of 32 csum actions with large cookies",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ],
+ "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i cookie aaabbbcccdddeee \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\""
+ ],
+ "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action csum",
+ "matchPattern": "^[ \t]+index [0-9]+ ref",
+ "matchCount": "0",
+ "teardown": []
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json
index 9f34f0753969..03777730ef29 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json
@@ -1,7 +1,7 @@
[
{
- "id": "a568",
- "name": "Add action with ife type",
+ "id": "7682",
+ "name": "Create valid ife encode action with mark and pass control",
"category": [
"actions",
"ife"
@@ -12,21 +12,710 @@
0,
1,
255
- ],
- "$TC actions add action ife encode type 0xDEAD index 1"
+ ]
],
- "cmdUnderTest": "$TC actions get action ife index 1",
+ "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 2",
+ "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow mark.*index 2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "ef47",
+ "name": "Create valid ife encode action with mark and pipe control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use mark 10 pipe index 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 2",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use mark.*index 2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "df43",
+ "name": "Create valid ife encode action with mark and continue control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow mark continue index 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 2",
+ "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*allow mark.*index 2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "e4cf",
+ "name": "Create valid ife encode action with mark and drop control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use mark 789 drop index 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 2",
+ "matchPattern": "action order [0-9]*: ife encode action drop.*type 0xED3E.*use mark 789.*index 2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "ccba",
+ "name": "Create valid ife encode action with mark and reclassify control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use mark 656768 reclassify index 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 2",
+ "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use mark 656768.*index 2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "a1cf",
+ "name": "Create valid ife encode action with mark and jump control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use mark 65 jump 1 index 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 2",
+ "matchPattern": "action order [0-9]*: ife encode action jump 1.*type 0xED3E.*use mark 65.*index 2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "cb3d",
+ "name": "Create valid ife encode action with mark value at 32-bit maximum",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use mark 4294967295 reclassify index 90",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 90",
+ "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use mark 4294967295.*index 90",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "1efb",
+ "name": "Create ife encode action with mark value exceeding 32-bit maximum",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use mark 4294967295999 pipe index 90",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action ife index 90",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use mark 4294967295999.*index 90",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "95ed",
+ "name": "Create valid ife encode action with prio and pass control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow prio pass index 9",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 9",
+ "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow prio.*index 9",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "aa17",
+ "name": "Create valid ife encode action with prio and pipe control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use prio 7 pipe index 9",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 9",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use prio 7.*index 9",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "74c7",
+ "name": "Create valid ife encode action with prio and continue control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use prio 3 continue index 9",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 9",
+ "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*use prio 3.*index 9",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "7a97",
+ "name": "Create valid ife encode action with prio and drop control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow prio drop index 9",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 9",
+ "matchPattern": "action order [0-9]*: ife encode action drop.*type 0xED3E.*allow prio.*index 9",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "f66b",
+ "name": "Create valid ife encode action with prio and reclassify control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use prio 998877 reclassify index 9",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 9",
+ "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use prio 998877.*index 9",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "3056",
+ "name": "Create valid ife encode action with prio and jump control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use prio 998877 jump 10 index 9",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 9",
+ "matchPattern": "action order [0-9]*: ife encode action jump 10.*type 0xED3E.*use prio 998877.*index 9",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "7dd3",
+ "name": "Create valid ife encode action with prio value at 32-bit maximum",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use prio 4294967295 reclassify index 99",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 99",
+ "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use prio 4294967295.*index 99",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "2ca1",
+ "name": "Create ife encode action with prio value exceeding 32-bit maximum",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use prio 4294967298 pipe index 99",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action ife index 99",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use prio 4294967298.*index 99",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "05bb",
+ "name": "Create valid ife encode action with tcindex and pass control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow tcindex pass index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow tcindex.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "ce65",
+ "name": "Create valid ife encode action with tcindex and pipe control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use tcindex 111 pipe index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use tcindex 111.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "09cd",
+ "name": "Create valid ife encode action with tcindex and continue control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use tcindex 1 continue index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*use tcindex 1.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "8eb5",
+ "name": "Create valid ife encode action with tcindex and continue control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use tcindex 1 continue index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*use tcindex 1.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "451a",
+ "name": "Create valid ife encode action with tcindex and drop control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow tcindex drop index 77",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 77",
+ "matchPattern": "action order [0-9]*: ife encode action drop.*type 0xED3E.*allow tcindex.*index 77",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "d76c",
+ "name": "Create valid ife encode action with tcindex and reclassify control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow tcindex reclassify index 77",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 77",
+ "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*allow tcindex.*index 77",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "e731",
+ "name": "Create valid ife encode action with tcindex and jump control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow tcindex jump 999 index 77",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 77",
+ "matchPattern": "action order [0-9]*: ife encode action jump 999.*type 0xED3E.*allow tcindex.*index 77",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "b7b8",
+ "name": "Create valid ife encode action with tcindex value at 16-bit maximum",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use tcindex 65535 pass index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*use tcindex 65535.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "d0d8",
+ "name": "Create ife encode action with tcindex value exceeding 16-bit maximum",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use tcindex 65539 pipe index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use tcindex 65539.*index 1",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "2a9c",
+ "name": "Create valid ife encode action with mac src parameter",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow mark src 00:11:22:33:44:55 pipe index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*allow mark src 00:11:22:33:44:55.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "cf5c",
+ "name": "Create valid ife encode action with mac dst parameter",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use prio 9876 dst 00:11:22:33:44:55 reclassify index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use prio 9876 dst 00:11:22:33:44:55.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "2353",
+ "name": "Create valid ife encode action with mac src and mac dst parameters",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow tcindex src 00:aa:bb:cc:dd:ee dst 00:11:22:33:44:55 pass index 11",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 11",
+ "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow tcindex dst 00:11:22:33:44:55 src 00:aa:bb:cc:dd:ee .*index 11",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "552c",
+ "name": "Create valid ife encode action with mark and type parameters",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use mark 7 type 0xfefe pass index 1",
"expExitCode": "0",
"verifyCmd": "$TC actions get action ife index 1",
- "matchPattern": "type 0xDEAD",
+ "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xFEFE.*use mark 7.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "0421",
+ "name": "Create valid ife encode action with prio and type parameters",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use prio 444 type 0xabba pipe index 21",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 21",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xABBA.*use prio 444.*index 21",
"matchCount": "1",
"teardown": [
"$TC actions flush action ife"
]
},
{
- "id": "b983",
- "name": "Add action without ife type",
+ "id": "4017",
+ "name": "Create valid ife encode action with tcindex and type parameters",
"category": [
"actions",
"ife"
@@ -37,16 +726,339 @@
0,
1,
255
- ],
- "$TC actions add action ife encode index 1"
+ ]
],
- "cmdUnderTest": "$TC actions get action ife index 1",
+ "cmdUnderTest": "$TC actions add action ife encode use tcindex 5000 type 0xabcd reclassify index 21",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 21",
+ "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xABCD.*use tcindex 5000.*index 21",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "fac3",
+ "name": "Create valid ife encode action with index at 32-bit maximnum",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 4294967295",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 4294967295",
+ "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow mark.*index 4294967295",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "7c25",
+ "name": "Create valid ife decode action with pass control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife decode pass index 1",
"expExitCode": "0",
"verifyCmd": "$TC actions get action ife index 1",
- "matchPattern": "type 0xED3E",
+ "matchPattern": "action order [0-9]*: ife decode action pass.*type 0x0.*allow mark allow tcindex allow prio.*index 1",
"matchCount": "1",
"teardown": [
"$TC actions flush action ife"
]
+ },
+ {
+ "id": "dccb",
+ "name": "Create valid ife decode action with pipe control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife decode pipe index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife decode action pipe.*type 0x0.*allow mark allow tcindex allow prio.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "7bb9",
+ "name": "Create valid ife decode action with continue control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife decode continue index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife decode action continue.*type 0x0.*allow mark allow tcindex allow prio.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "d9ad",
+ "name": "Create valid ife decode action with drop control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife decode drop index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife decode action drop.*type 0x0.*allow mark allow tcindex allow prio.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "219f",
+ "name": "Create valid ife decode action with reclassify control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife decode reclassify index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife decode action reclassify.*type 0x0.*allow mark allow tcindex allow prio.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "8f44",
+ "name": "Create valid ife decode action with jump control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife decode jump 10 index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife decode action jump 10.*type 0x0.*allow mark allow tcindex allow prio.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "56cf",
+ "name": "Create ife encode action with index exceeding 32-bit maximum",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 4294967295999",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action ife index 4294967295999",
+ "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow mark.*index 4294967295999",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "ee94",
+ "name": "Create ife encode action with invalid control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow mark kuka index 4",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action ife index 4",
+ "matchPattern": "action order [0-9]*: ife encode action kuka.*type 0xED3E.*allow mark.*index 4",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "b330",
+ "name": "Create ife encode action with cookie",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow prio pipe index 4 cookie aabbccddeeff112233445566778800a1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 4",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*allow prio.*index 4.*cookie aabbccddeeff112233445566778800a1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "bbc0",
+ "name": "Create ife encode action with invalid argument",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow foo pipe index 4",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action ife index 4",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*allow foo.*index 4",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "d54a",
+ "name": "Create ife encode action with invalid type argument",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow prio type 70000 pipe index 4",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action ife index 4",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0x11170.*allow prio.*index 4",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "7ee0",
+ "name": "Create ife encode action with invalid mac src argument",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow prio src 00:11:22:33:44:pp pipe index 4",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action ife index 4",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*allow prio.*index 4",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "0a7d",
+ "name": "Create ife encode action with invalid mac dst argument",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow prio dst 00.111-22:33:44:aa pipe index 4",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action ife index 4",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*allow prio.*index 4",
+ "matchCount": "0",
+ "teardown": []
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json
new file mode 100644
index 000000000000..3aca33c00039
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json
@@ -0,0 +1,588 @@
+[
+ {
+ "id": "9784",
+ "name": "Add valid sample action with mandatory arguments",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 10 group 1 index 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action sample index 2",
+ "matchPattern": "action order [0-9]+: sample rate 1/10 group 1.*index 2 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "5c91",
+ "name": "Add valid sample action with mandatory arguments and continue control action",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 700 group 2 continue index 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action sample index 2",
+ "matchPattern": "action order [0-9]+: sample rate 1/700 group 2 continue.*index 2 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "334b",
+ "name": "Add valid sample action with mandatory arguments and drop control action",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 10000 group 11 drop index 22",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action sample",
+ "matchPattern": "action order [0-9]+: sample rate 1/10000 group 11 drop.*index 22 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "da69",
+ "name": "Add valid sample action with mandatory arguments and reclassify control action",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 20000 group 72 reclassify index 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action sample",
+ "matchPattern": "action order [0-9]+: sample rate 1/20000 group 72 reclassify.*index 100 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "13ce",
+ "name": "Add valid sample action with mandatory arguments and pipe control action",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 20 group 2 pipe index 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action sample",
+ "matchPattern": "action order [0-9]+: sample rate 1/20 group 2 pipe.*index 100 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "1886",
+ "name": "Add valid sample action with mandatory arguments and jump control action",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 700 group 25 jump 4 index 200",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action sample index 200",
+ "matchPattern": "action order [0-9]+: sample rate 1/700 group 25 jump 4.*index 200 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "b6d4",
+ "name": "Add sample action with mandatory arguments and invalid control action",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 200000 group 52 foo index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action sample",
+ "matchPattern": "action order [0-9]+: sample rate 1/200000 group 52 foo.*index 1 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "a874",
+ "name": "Add invalid sample action without mandatory arguments",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action sample",
+ "matchPattern": "action order [0-9]+: sample.*index 1 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "ac01",
+ "name": "Add invalid sample action without mandatory argument rate",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample group 10 index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action sample",
+ "matchPattern": "action order [0-9]+: sample.*group 10.*index 1 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "4203",
+ "name": "Add invalid sample action without mandatory argument group",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 100 index 10",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action sample index 10",
+ "matchPattern": "action order [0-9]+: sample rate 1/100.*index 10 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "14a7",
+ "name": "Add invalid sample action without mandatory argument group",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 100 index 10",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action sample index 10",
+ "matchPattern": "action order [0-9]+: sample rate 1/100.*index 10 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "8f2e",
+ "name": "Add valid sample action with trunc argument",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 trunc 1024 index 10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action sample index 10",
+ "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 trunc_size 1024 pipe.*index 10 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "45f8",
+ "name": "Add sample action with maximum rate argument",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 4294967295 group 4 index 10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action sample index 10",
+ "matchPattern": "action order [0-9]+: sample rate 1/4294967295 group 4 pipe.*index 10 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "ad0c",
+ "name": "Add sample action with maximum trunc argument",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 16000 group 4 trunc 4294967295 index 10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action sample index 10",
+ "matchPattern": "action order [0-9]+: sample rate 1/16000 group 4 trunc_size 4294967295 pipe.*index 10 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "83a9",
+ "name": "Add sample action with maximum group argument",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 4294 group 4294967295 index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action sample index 1",
+ "matchPattern": "action order [0-9]+: sample rate 1/4294 group 4294967295 pipe.*index 1 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "ed27",
+ "name": "Add sample action with invalid rate argument",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 4294967296 group 4 index 10",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action sample index 10",
+ "matchPattern": "action order [0-9]+: sample rate 1/4294967296 group 4 pipe.*index 10 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "2eae",
+ "name": "Add sample action with invalid group argument",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 4098 group 5294967299 continue index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action sample index 1",
+ "matchPattern": "action order [0-9]+: sample rate 1/4098 group 5294967299 continue.*index 1 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "6ff3",
+ "name": "Add sample action with invalid trunc size",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 trunc 112233445566 index 11",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action sample index 11",
+ "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 trunc_size 112233445566.*index 11 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "2b2a",
+ "name": "Add sample action with invalid index",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 index 5294967299",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action sample index 5294967299",
+ "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pipe.*index 5294967299 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "dee2",
+ "name": "Add sample action with maximum allowed index",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 index 4294967295",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action sample index 4294967295",
+ "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pipe.*index 4294967295 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "560e",
+ "name": "Add sample action with cookie",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 index 45 cookie aabbccdd",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action sample index 45",
+ "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pipe.*index 45.*cookie aabbccdd",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "704a",
+ "name": "Replace existing sample action with new rate argument",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action sample rate 1024 group 4 index 4"
+ ],
+ "cmdUnderTest": "$TC actions replace action sample rate 2048 group 4 index 4",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action sample",
+ "matchPattern": "action order [0-9]+: sample rate 1/2048 group 4 pipe.*index 4",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "60eb",
+ "name": "Replace existing sample action with new group argument",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action sample rate 1024 group 4 index 4"
+ ],
+ "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 index 4",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action sample",
+ "matchPattern": "action order [0-9]+: sample rate 1/1024 group 7 pipe.*index 4",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "2cce",
+ "name": "Replace existing sample action with new trunc argument",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action sample rate 1024 group 4 trunc 48 index 4"
+ ],
+ "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 trunc 64 index 4",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action sample",
+ "matchPattern": "action order [0-9]+: sample rate 1/1024 group 7 trunc_size 64 pipe.*index 4",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "59d1",
+ "name": "Replace existing sample action with new control argument",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action sample rate 1024 group 4 reclassify index 4"
+ ],
+ "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 pipe index 4",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action sample",
+ "matchPattern": "action order [0-9]+: sample rate 1/1024 group 7 pipe.*index 4",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ }
+]