summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/bpf/bpf_devel_QA.txt519
-rw-r--r--Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt5
-rw-r--r--Documentation/devicetree/bindings/net/can/fsl-flexcan.txt6
-rw-r--r--Documentation/devicetree/bindings/net/fsl-fec.txt4
-rw-r--r--Documentation/devicetree/bindings/net/ieee802154/adf7242.txt2
-rw-r--r--Documentation/devicetree/bindings/net/phy.txt12
-rw-r--r--Documentation/devicetree/bindings/net/sff,sfp.txt10
-rw-r--r--Documentation/networking/00-INDEX2
-rw-r--r--Documentation/networking/dsa/dsa.txt5
-rw-r--r--Documentation/networking/ieee802154.txt40
-rw-r--r--Documentation/networking/ip-sysctl.txt1
-rw-r--r--Documentation/networking/kapi.rst24
-rw-r--r--Documentation/networking/xfrm_device.txt132
-rw-r--r--MAINTAINERS16
-rw-r--r--arch/arm/boot/dts/imx25.dtsi4
-rw-r--r--arch/arm/boot/dts/imx28.dtsi4
-rw-r--r--arch/arm/boot/dts/imx35.dtsi4
-rw-r--r--arch/arm/boot/dts/imx53.dtsi4
-rw-r--r--arch/arm/boot/dts/ls1021a-qds.dts16
-rw-r--r--arch/arm/boot/dts/ls1021a-twr.dts16
-rw-r--r--arch/arm/boot/dts/ls1021a.dtsi36
-rw-r--r--arch/powerpc/boot/dts/fsl/p1010si-post.dtsi2
-rw-r--r--drivers/atm/eni.c18
-rw-r--r--drivers/hv/ring_buffer.c23
-rw-r--r--drivers/net/Kconfig11
-rw-r--r--drivers/net/Makefile1
-rw-r--r--drivers/net/can/c_can/c_can_pci.c4
-rw-r--r--drivers/net/can/flexcan.c241
-rw-r--r--drivers/net/can/usb/peak_usb/pcan_usb.c9
-rw-r--r--drivers/net/can/usb/peak_usb/pcan_usb_core.c53
-rw-r--r--drivers/net/can/usb/peak_usb/pcan_usb_core.h5
-rw-r--r--drivers/net/can/usb/peak_usb/pcan_usb_pro.c9
-rw-r--r--drivers/net/can/vxcan.c16
-rw-r--r--drivers/net/dsa/Kconfig2
-rw-r--r--drivers/net/dsa/b53/b53_common.c10
-rw-r--r--drivers/net/dsa/b53/b53_priv.h7
-rw-r--r--drivers/net/dsa/bcm_sf2.c11
-rw-r--r--drivers/net/dsa/dsa_loop.c9
-rw-r--r--drivers/net/dsa/lan9303-core.c79
-rw-r--r--drivers/net/dsa/microchip/ksz_common.c12
-rw-r--r--drivers/net/dsa/mt7530.c288
-rw-r--r--drivers/net/dsa/mt7530.h83
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c76
-rw-r--r--drivers/net/dummy.c215
-rw-r--r--drivers/net/ethernet/alteon/acenic.c14
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c12
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c11
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h1
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c17
-rw-r--r--drivers/net/ethernet/cadence/macb.h167
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c699
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_main.c115
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_queues.c11
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_queues.h4
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h76
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h5
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c822
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h18
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4.h23
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c39
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h1
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c278
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c97
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c19
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c8
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_hw.c70
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_hw.h14
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_regs.h4
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h1
-rw-r--r--drivers/net/ethernet/cisco/enic/enic_ethtool.c12
-rw-r--r--drivers/net/ethernet/cisco/enic/enic_main.c1
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c20
-rw-r--r--drivers/net/ethernet/hisilicon/Kconfig28
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/Makefile7
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h88
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hnae3.c14
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hnae3.h7
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c (renamed from drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c)2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c (renamed from drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c)2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.h (renamed from drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h)0
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c (renamed from drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c)22
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile8
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c409
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h29
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c410
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile9
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c342
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h256
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c1490
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h164
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c181
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c6
-rw-r--r--drivers/net/ethernet/marvell/mvpp2.c47
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c54
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c5
-rw-r--r--drivers/net/ethernet/netronome/nfp/Makefile1
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/jit.c487
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.c7
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.h35
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/offload.c23
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/verifier.c8
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/action.c4
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/cmsg.h19
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/match.c79
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/offload.c13
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_asm.c7
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_asm.h17
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_main.c6
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_main.h29
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net.h2
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c4
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c808
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c109
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h2
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c9
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp_hwinfo.c10
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c72
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h1
-rw-r--r--drivers/net/ethernet/nvidia/forcedeth.c27
-rw-r--r--drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c4
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_main.c4
-rw-r--r--drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c64
-rw-r--r--drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h1
-rw-r--r--drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c42
-rw-r--r--drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h9
-rw-r--r--drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h10
-rw-r--r--drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c3
-rw-r--r--drivers/net/ethernet/realtek/r8169.c87
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c54
-rw-r--r--drivers/net/ethernet/ti/cpsw.c96
-rw-r--r--drivers/net/ethernet/ti/cpsw.h23
-rw-r--r--drivers/net/ethernet/ti/cpsw_ale.c109
-rw-r--r--drivers/net/ethernet/ti/cpsw_ale.h1
-rw-r--r--drivers/net/ethernet/ti/davinci_cpdma.c2
-rw-r--r--drivers/net/ethernet/ti/davinci_emac.c20
-rw-r--r--drivers/net/ethernet/ti/netcp_ethss.c6
-rw-r--r--drivers/net/hyperv/hyperv_net.h60
-rw-r--r--drivers/net/hyperv/netvsc.c73
-rw-r--r--drivers/net/hyperv/netvsc_drv.c111
-rw-r--r--drivers/net/hyperv/rndis_filter.c33
-rw-r--r--drivers/net/ieee802154/adf7242.c90
-rw-r--r--drivers/net/ipvlan/ipvlan_core.c17
-rw-r--r--drivers/net/ipvlan/ipvlan_main.c33
-rw-r--r--drivers/net/netdevsim/Makefile7
-rw-r--r--drivers/net/netdevsim/bpf.c373
-rw-r--r--drivers/net/netdevsim/netdev.c502
-rw-r--r--drivers/net/netdevsim/netdevsim.h78
-rw-r--r--drivers/net/phy/amd.c2
-rw-r--r--drivers/net/phy/at803x.c24
-rw-r--r--drivers/net/phy/bcm-cygnus.c2
-rw-r--r--drivers/net/phy/bcm63xx.c4
-rw-r--r--drivers/net/phy/bcm7xxx.c6
-rw-r--r--drivers/net/phy/broadcom.c69
-rw-r--r--drivers/net/phy/cicada.c4
-rw-r--r--drivers/net/phy/davicom.c5
-rw-r--r--drivers/net/phy/dp83640.c2
-rw-r--r--drivers/net/phy/dp83822.c2
-rw-r--r--drivers/net/phy/dp83848.c2
-rw-r--r--drivers/net/phy/dp83867.c2
-rw-r--r--drivers/net/phy/icplus.c4
-rw-r--r--drivers/net/phy/intel-xway.c12
-rw-r--r--drivers/net/phy/lxt.c5
-rw-r--r--drivers/net/phy/marvell.c9
-rw-r--r--drivers/net/phy/marvell10g.c1
-rw-r--r--drivers/net/phy/mdio_bus.c21
-rw-r--r--drivers/net/phy/mdio_device.c34
-rw-r--r--drivers/net/phy/meson-gxl.c1
-rw-r--r--drivers/net/phy/micrel.c24
-rw-r--r--drivers/net/phy/microchip.c1
-rw-r--r--drivers/net/phy/national.c2
-rw-r--r--drivers/net/phy/phy.c46
-rw-r--r--drivers/net/phy/phy_device.c58
-rw-r--r--drivers/net/phy/phylink.c398
-rw-r--r--drivers/net/phy/qsemi.c2
-rw-r--r--drivers/net/phy/realtek.c128
-rw-r--r--drivers/net/phy/rockchip.c1
-rw-r--r--drivers/net/phy/sfp-bus.c120
-rw-r--r--drivers/net/phy/sfp.c78
-rw-r--r--drivers/net/phy/smsc.c11
-rw-r--r--drivers/net/phy/ste10Xp.c4
-rw-r--r--drivers/net/phy/uPD60620.c1
-rw-r--r--drivers/net/phy/vitesse.c12
-rw-r--r--drivers/net/slip/slip.c4
-rw-r--r--drivers/net/tun.c148
-rw-r--r--drivers/net/usb/qmi_wwan.c4
-rw-r--r--drivers/net/veth.c3
-rw-r--r--drivers/net/virtio_net.c9
-rw-r--r--drivers/net/vmxnet3/vmxnet3_int.h8
-rw-r--r--drivers/of/of_mdio.c4
-rw-r--r--drivers/pci/pci.c25
-rw-r--r--fs/gfs2/glock.c7
-rw-r--r--include/linux/brcmphy.h1
-rw-r--r--include/linux/dsa/lan9303.h1
-rw-r--r--include/linux/filter.h1
-rw-r--r--include/linux/hyperv.h22
-rw-r--r--include/linux/if_macvlan.h2
-rw-r--r--include/linux/mdio.h5
-rw-r--r--include/linux/netdevice.h5
-rw-r--r--include/linux/pci.h1
-rw-r--r--include/linux/phy.h51
-rw-r--r--include/linux/phylink.h201
-rw-r--r--include/linux/rhashtable.h38
-rw-r--r--include/linux/sctp.h37
-rw-r--r--include/linux/sfp.h58
-rw-r--r--include/linux/skb_array.h5
-rw-r--r--include/linux/skbuff.h5
-rw-r--r--include/linux/spinlock.h6
-rw-r--r--include/linux/tcp.h2
-rw-r--r--include/net/act_api.h15
-rw-r--r--include/net/addrconf.h2
-rw-r--r--include/net/dn_route.h1
-rw-r--r--include/net/dsa.h61
-rw-r--r--include/net/dst.h39
-rw-r--r--include/net/erspan.h193
-rw-r--r--include/net/gen_stats.h3
-rw-r--r--include/net/inet_connection_sock.h2
-rw-r--r--include/net/inet_hashtables.h29
-rw-r--r--include/net/inet_timewait_sock.h4
-rw-r--r--include/net/ip.h9
-rw-r--r--include/net/ip6_fib.h14
-rw-r--r--include/net/ip6_tunnel.h4
-rw-r--r--include/net/ip_tunnels.h5
-rw-r--r--include/net/ipv6.h17
-rw-r--r--include/net/netns/sctp.h5
-rw-r--r--include/net/pkt_sched.h10
-rw-r--r--include/net/rtnetlink.h4
-rw-r--r--include/net/sch_generic.h90
-rw-r--r--include/net/sctp/constants.h9
-rw-r--r--include/net/sctp/sctp.h6
-rw-r--r--include/net/sctp/sm.h18
-rw-r--r--include/net/sctp/stream_interleave.h61
-rw-r--r--include/net/sctp/structs.h68
-rw-r--r--include/net/sctp/ulpevent.h23
-rw-r--r--include/net/sctp/ulpqueue.h10
-rw-r--r--include/net/sock.h11
-rw-r--r--include/net/tc_act/tc_mirred.h6
-rw-r--r--include/net/tcp.h11
-rw-r--r--include/net/xfrm.h38
-rw-r--r--include/trace/events/bridge.h4
-rw-r--r--include/uapi/linux/bpf.h6
-rw-r--r--include/uapi/linux/ethtool.h1
-rw-r--r--include/uapi/linux/if_ether.h1
-rw-r--r--include/uapi/linux/if_tun.h1
-rw-r--r--include/uapi/linux/if_tunnel.h3
-rw-r--r--include/uapi/linux/sctp.h3
-rw-r--r--kernel/bpf/verifier.c62
-rw-r--r--lib/Makefile2
-rw-r--r--lib/bucket_locks.c54
-rw-r--r--lib/rhashtable.c160
-rw-r--r--lib/test_rhashtable.c6
-rw-r--r--net/atm/common.c2
-rw-r--r--net/atm/mpc.c9
-rw-r--r--net/atm/mpoa_caches.c48
-rw-r--r--net/atm/mpoa_caches.h9
-rw-r--r--net/atm/mpoa_proc.c15
-rw-r--r--net/bridge/br_device.c10
-rw-r--r--net/bridge/br_fdb.c392
-rw-r--r--net/bridge/br_mdb.c6
-rw-r--r--net/bridge/br_nf_core.c1
-rw-r--r--net/bridge/br_private.h16
-rw-r--r--net/bridge/br_switchdev.c8
-rw-r--r--net/can/gw.c14
-rw-r--r--net/core/dev.c86
-rw-r--r--net/core/dst.c14
-rw-r--r--net/core/filter.c40
-rw-r--r--net/core/flow_dissector.c12
-rw-r--r--net/core/gen_stats.c9
-rw-r--r--net/core/pktgen.c12
-rw-r--r--net/core/rtnetlink.c312
-rw-r--r--net/core/sock_reuseport.c4
-rw-r--r--net/dccp/minisocks.c7
-rw-r--r--net/decnet/dn_dev.c9
-rw-r--r--net/decnet/dn_fib.c6
-rw-r--r--net/decnet/dn_route.c42
-rw-r--r--net/dsa/Kconfig9
-rw-r--r--net/dsa/Makefile3
-rw-r--r--net/dsa/dsa2.c2
-rw-r--r--net/dsa/dsa_priv.h9
-rw-r--r--net/dsa/legacy.c20
-rw-r--r--net/dsa/slave.c25
-rw-r--r--net/dsa/switch.c111
-rw-r--r--net/dsa/tag_mtk.c38
-rw-r--r--net/ipv4/inet_hashtables.c184
-rw-r--r--net/ipv4/inet_timewait_sock.c27
-rw-r--r--net/ipv4/ip_gre.c172
-rw-r--r--net/ipv4/route.c2
-rw-r--r--net/ipv4/tcp.c3
-rw-r--r--net/ipv4/tcp_fastopen.c30
-rw-r--r--net/ipv4/tcp_input.c31
-rw-r--r--net/ipv4/tcp_metrics.c5
-rw-r--r--net/ipv4/tcp_minisocks.c7
-rw-r--r--net/ipv4/tcp_output.c9
-rw-r--r--net/ipv4/tcp_timer.c17
-rw-r--r--net/ipv4/udp.c44
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c2
-rw-r--r--net/ipv6/addrconf.c48
-rw-r--r--net/ipv6/addrlabel.c25
-rw-r--r--net/ipv6/ila/ila_xlat.c4
-rw-r--r--net/ipv6/inet6_hashtables.c77
-rw-r--r--net/ipv6/ip6_fib.c30
-rw-r--r--net/ipv6/ip6_gre.c649
-rw-r--r--net/ipv6/ip6_output.c4
-rw-r--r--net/ipv6/ip6_tunnel.c5
-rw-r--r--net/ipv6/ip6mr.c9
-rw-r--r--net/ipv6/route.c70
-rw-r--r--net/ipv6/seg6.c4
-rw-r--r--net/ipv6/udp.c54
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c2
-rw-r--r--net/ipv6/xfrm6_policy.c2
-rw-r--r--net/mac80211/mesh_pathtbl.c34
-rw-r--r--net/mpls/af_mpls.c15
-rw-r--r--net/netfilter/nft_set_hash.c10
-rw-r--r--net/netfilter/xt_policy.c3
-rw-r--r--net/netlink/af_netlink.c71
-rw-r--r--net/netlink/diag.c8
-rw-r--r--net/openvswitch/flow.c6
-rw-r--r--net/openvswitch/flow_netlink.c8
-rw-r--r--net/openvswitch/vport-internal_dev.c9
-rw-r--r--net/phonet/pn_netlink.c21
-rw-r--r--net/qrtr/qrtr.c8
-rw-r--r--net/rds/connection.c7
-rw-r--r--net/rds/rds.h6
-rw-r--r--net/rds/tcp.c38
-rw-r--r--net/rds/tcp.h1
-rw-r--r--net/sched/act_api.c2
-rw-r--r--net/sched/act_bpf.c10
-rw-r--r--net/sched/act_connmark.c8
-rw-r--r--net/sched/act_csum.c8
-rw-r--r--net/sched/act_gact.c8
-rw-r--r--net/sched/act_ife.c18
-rw-r--r--net/sched/act_ipt.c18
-rw-r--r--net/sched/act_mirred.c27
-rw-r--r--net/sched/act_nat.c8
-rw-r--r--net/sched/act_pedit.c10
-rw-r--r--net/sched/act_police.c8
-rw-r--r--net/sched/act_sample.c10
-rw-r--r--net/sched/act_simple.c10
-rw-r--r--net/sched/act_skbedit.c8
-rw-r--r--net/sched/act_skbmod.c10
-rw-r--r--net/sched/act_tunnel_key.c10
-rw-r--r--net/sched/act_vlan.c10
-rw-r--r--net/sched/cls_api.c43
-rw-r--r--net/sched/cls_flower.c1
-rw-r--r--net/sched/sch_api.c105
-rw-r--r--net/sched/sch_generic.c440
-rw-r--r--net/sched/sch_mq.c34
-rw-r--r--net/sched/sch_mqprio.c69
-rw-r--r--net/sctp/Makefile2
-rw-r--r--net/sctp/associola.c2
-rw-r--r--net/sctp/chunk.c8
-rw-r--r--net/sctp/output.c5
-rw-r--r--net/sctp/outqueue.c12
-rw-r--r--net/sctp/proc.c6
-rw-r--r--net/sctp/sm_make_chunk.c69
-rw-r--r--net/sctp/sm_sideeffect.c47
-rw-r--r--net/sctp/sm_statefuns.c45
-rw-r--r--net/sctp/sm_statetable.c5
-rw-r--r--net/sctp/socket.c149
-rw-r--r--net/sctp/stream.c47
-rw-r--r--net/sctp/stream_interleave.c1334
-rw-r--r--net/sctp/stream_sched.c3
-rw-r--r--net/sctp/sysctl.c7
-rw-r--r--net/sctp/ulpevent.c15
-rw-r--r--net/sctp/ulpqueue.c23
-rw-r--r--net/smc/af_smc.c24
-rw-r--r--net/smc/smc_cdc.c12
-rw-r--r--net/smc/smc_clc.c84
-rw-r--r--net/smc/smc_clc.h34
-rw-r--r--net/smc/smc_close.c2
-rw-r--r--net/smc/smc_close.h1
-rw-r--r--net/smc/smc_rx.c5
-rw-r--r--net/smc/smc_tx.c11
-rw-r--r--net/tipc/bcast.c12
-rw-r--r--net/tipc/link.c2
-rw-r--r--net/tipc/msg.c51
-rw-r--r--net/tipc/msg.h3
-rw-r--r--net/tipc/socket.c6
-rw-r--r--net/xfrm/xfrm_device.c6
-rw-r--r--net/xfrm/xfrm_input.c1
-rw-r--r--net/xfrm/xfrm_output.c2
-rw-r--r--net/xfrm/xfrm_policy.c137
-rw-r--r--net/xfrm/xfrm_replay.c3
-rw-r--r--net/xfrm/xfrm_state.c7
-rw-r--r--samples/bpf/tcbpf2_kern.c170
-rw-r--r--samples/bpf/test_cgrp2_attach2.c36
-rwxr-xr-xsamples/bpf/test_tunnel_bpf.sh128
-rw-r--r--security/selinux/xfrm.c2
-rw-r--r--tools/testing/selftests/bpf/Makefile5
-rw-r--r--tools/testing/selftests/bpf/sample_ret0.c7
-rw-r--r--tools/testing/selftests/bpf/test_align.c156
-rwxr-xr-xtools/testing/selftests/bpf/test_offload.py681
395 files changed, 17508 insertions, 4503 deletions
diff --git a/Documentation/bpf/bpf_devel_QA.txt b/Documentation/bpf/bpf_devel_QA.txt
new file mode 100644
index 000000000000..cefef855dea4
--- /dev/null
+++ b/Documentation/bpf/bpf_devel_QA.txt
@@ -0,0 +1,519 @@
+This document provides information for the BPF subsystem about various
+workflows related to reporting bugs, submitting patches, and queueing
+patches for stable kernels.
+
+For general information about submitting patches, please refer to
+Documentation/process/. This document only describes additional specifics
+related to BPF.
+
+Reporting bugs:
+---------------
+
+Q: How do I report bugs for BPF kernel code?
+
+A: Since all BPF kernel development as well as bpftool and iproute2 BPF
+ loader development happens through the netdev kernel mailing list,
+ please report any found issues around BPF to the following mailing
+ list:
+
+ netdev@vger.kernel.org
+
+ This may also include issues related to XDP, BPF tracing, etc.
+
+ Given netdev has a high volume of traffic, please also add the BPF
+ maintainers to Cc (from kernel MAINTAINERS file):
+
+ Alexei Starovoitov <ast@kernel.org>
+ Daniel Borkmann <daniel@iogearbox.net>
+
+ In case a buggy commit has already been identified, make sure to keep
+ the actual commit authors in Cc as well for the report. They can
+ typically be identified through the kernel's git tree.
+
+ Please do *not* report BPF issues to bugzilla.kernel.org since it
+ is a guarantee that the reported issue will be overlooked.
+
+Submitting patches:
+-------------------
+
+Q: To which mailing list do I need to submit my BPF patches?
+
+A: Please submit your BPF patches to the netdev kernel mailing list:
+
+ netdev@vger.kernel.org
+
+ Historically, BPF came out of networking and has always been maintained
+ by the kernel networking community. Although these days BPF touches
+ many other subsystems as well, the patches are still routed mainly
+ through the networking community.
+
+ In case your patch has changes in various different subsystems (e.g.
+ tracing, security, etc), make sure to Cc the related kernel mailing
+ lists and maintainers from there as well, so they are able to review
+ the changes and provide their Acked-by's to the patches.
+
+Q: Where can I find patches currently under discussion for BPF subsystem?
+
+A: All patches that are Cc'ed to netdev are queued for review under netdev
+ patchwork project:
+
+ http://patchwork.ozlabs.org/project/netdev/list/
+
+ Those patches which target BPF, are assigned to a 'bpf' delegate for
+ further processing from BPF maintainers. The current queue with
+ patches under review can be found at:
+
+ https://patchwork.ozlabs.org/project/netdev/list/?delegate=77147
+
+ Once the patches have been reviewed by the BPF community as a whole
+ and approved by the BPF maintainers, their status in patchwork will be
+ changed to 'Accepted' and the submitter will be notified by mail. This
+ means that the patches look good from a BPF perspective and have been
+ applied to one of the two BPF kernel trees.
+
+ In case feedback from the community requires a respin of the patches,
+ their status in patchwork will be set to 'Changes Requested', and purged
+ from the current review queue. Likewise for cases where patches would
+ get rejected or are not applicable to the BPF trees (but assigned to
+ the 'bpf' delegate).
+
+Q: How do the changes make their way into Linux?
+
+A: There are two BPF kernel trees (git repositories). Once patches have
+ been accepted by the BPF maintainers, they will be applied to one
+ of the two BPF trees:
+
+ https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git/
+ https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/
+
+ The bpf tree itself is for fixes only, whereas bpf-next for features,
+ cleanups or other kind of improvements ("next-like" content). This is
+ analogous to net and net-next trees for networking. Both bpf and
+ bpf-next will only have a master branch in order to simplify against
+ which branch patches should get rebased to.
+
+ Accumulated BPF patches in the bpf tree will regularly get pulled
+ into the net kernel tree. Likewise, accumulated BPF patches accepted
+ into the bpf-next tree will make their way into net-next tree. net and
+ net-next are both run by David S. Miller. From there, they will go
+ into the kernel mainline tree run by Linus Torvalds. To read up on the
+ process of net and net-next being merged into the mainline tree, see
+ the netdev FAQ under:
+
+ Documentation/networking/netdev-FAQ.txt
+
+ Occasionally, to prevent merge conflicts, we might send pull requests
+ to other trees (e.g. tracing) with a small subset of the patches, but
+ net and net-next are always the main trees targeted for integration.
+
+ The pull requests will contain a high-level summary of the accumulated
+ patches and can be searched on netdev kernel mailing list through the
+ following subject lines (yyyy-mm-dd is the date of the pull request):
+
+ pull-request: bpf yyyy-mm-dd
+ pull-request: bpf-next yyyy-mm-dd
+
+Q: How do I indicate which tree (bpf vs. bpf-next) my patch should be
+ applied to?
+
+A: The process is the very same as described in the netdev FAQ, so
+ please read up on it. The subject line must indicate whether the
+ patch is a fix or rather "next-like" content in order to let the
+ maintainers know whether it is targeted at bpf or bpf-next.
+
+ For fixes eventually landing in bpf -> net tree, the subject must
+ look like:
+
+ git format-patch --subject-prefix='PATCH bpf' start..finish
+
+ For features/improvements/etc that should eventually land in
+ bpf-next -> net-next, the subject must look like:
+
+ git format-patch --subject-prefix='PATCH bpf-next' start..finish
+
+ If unsure whether the patch or patch series should go into bpf
+ or net directly, or bpf-next or net-next directly, it is not a
+ problem either if the subject line says net or net-next as target.
+ It is eventually up to the maintainers to do the delegation of
+ the patches.
+
+ If it is clear that patches should go into bpf or bpf-next tree,
+ please make sure to rebase the patches against those trees in
+ order to reduce potential conflicts.
+
+ In case the patch or patch series has to be reworked and sent out
+ again in a second or later revision, it is also required to add a
+ version number (v2, v3, ...) into the subject prefix:
+
+ git format-patch --subject-prefix='PATCH net-next v2' start..finish
+
+ When changes have been requested to the patch series, always send the
+ whole patch series again with the feedback incorporated (never send
+ individual diffs on top of the old series).
+
+Q: What does it mean when a patch gets applied to bpf or bpf-next tree?
+
+A: It means that the patch looks good for mainline inclusion from
+ a BPF point of view.
+
+ Be aware that this is not a final verdict that the patch will
+ automatically get accepted into net or net-next trees eventually:
+
+ On the netdev kernel mailing list reviews can come in at any point
+ in time. If discussions around a patch conclude that they cannot
+ get included as-is, we will either apply a follow-up fix or drop
+ them from the trees entirely. Therefore, we also reserve to rebase
+ the trees when deemed necessary. After all, the purpose of the tree
+ is to i) accumulate and stage BPF patches for integration into trees
+ like net and net-next, and ii) run extensive BPF test suite and
+ workloads on the patches before they make their way any further.
+
+ Once the BPF pull request was accepted by David S. Miller, then
+ the patches end up in net or net-next tree, respectively, and
+ make their way from there further into mainline. Again, see the
+ netdev FAQ for additional information e.g. on how often they are
+ merged to mainline.
+
+Q: How long do I need to wait for feedback on my BPF patches?
+
+A: We try to keep the latency low. The usual time to feedback will
+ be around 2 or 3 business days. It may vary depending on the
+ complexity of changes and current patch load.
+
+Q: How often do you send pull requests to major kernel trees like
+ net or net-next?
+
+A: Pull requests will be sent out rather often in order to not
+ accumulate too many patches in bpf or bpf-next.
+
+ As a rule of thumb, expect pull requests for each tree regularly
+ at the end of the week. In some cases pull requests could additionally
+ come also in the middle of the week depending on the current patch
+ load or urgency.
+
+Q: Are patches applied to bpf-next when the merge window is open?
+
+A: For the time when the merge window is open, bpf-next will not be
+ processed. This is roughly analogous to net-next patch processing,
+ so feel free to read up on the netdev FAQ about further details.
+
+ During those two weeks of merge window, we might ask you to resend
+ your patch series once bpf-next is open again. Once Linus released
+ a v*-rc1 after the merge window, we continue processing of bpf-next.
+
+ For non-subscribers to kernel mailing lists, there is also a status
+ page run by David S. Miller on net-next that provides guidance:
+
+ http://vger.kernel.org/~davem/net-next.html
+
+Q: I made a BPF verifier change, do I need to add test cases for
+ BPF kernel selftests?
+
+A: If the patch has changes to the behavior of the verifier, then yes,
+ it is absolutely necessary to add test cases to the BPF kernel
+ selftests suite. If they are not present and we think they are
+ needed, then we might ask for them before accepting any changes.
+
+ In particular, test_verifier.c is tracking a high number of BPF test
+ cases, including a lot of corner cases that LLVM BPF back end may
+ generate out of the restricted C code. Thus, adding test cases is
+ absolutely crucial to make sure future changes do not accidentally
+ affect prior use-cases. Thus, treat those test cases as: verifier
+ behavior that is not tracked in test_verifier.c could potentially
+ be subject to change.
+
+Q: When should I add code to samples/bpf/ and when to BPF kernel
+ selftests?
+
+A: In general, we prefer additions to BPF kernel selftests rather than
+ samples/bpf/. The rationale is very simple: kernel selftests are
+ regularly run by various bots to test for kernel regressions.
+
+ The more test cases we add to BPF selftests, the better the coverage
+ and the less likely it is that those could accidentally break. It is
+ not that BPF kernel selftests cannot demo how a specific feature can
+ be used.
+
+ That said, samples/bpf/ may be a good place for people to get started,
+ so it might be advisable that simple demos of features could go into
+ samples/bpf/, but advanced functional and corner-case testing rather
+ into kernel selftests.
+
+ If your sample looks like a test case, then go for BPF kernel selftests
+ instead!
+
+Q: When should I add code to the bpftool?
+
+A: The main purpose of bpftool (under tools/bpf/bpftool/) is to provide
+ a central user space tool for debugging and introspection of BPF programs
+ and maps that are active in the kernel. If UAPI changes related to BPF
+ enable for dumping additional information of programs or maps, then
+ bpftool should be extended as well to support dumping them.
+
+Q: When should I add code to iproute2's BPF loader?
+
+A: For UAPI changes related to the XDP or tc layer (e.g. cls_bpf), the
+ convention is that those control-path related changes are added to
+ iproute2's BPF loader as well from user space side. This is not only
+ useful to have UAPI changes properly designed to be usable, but also
+ to make those changes available to a wider user base of major
+ downstream distributions.
+
+Q: Do you accept patches as well for iproute2's BPF loader?
+
+A: Patches for the iproute2's BPF loader have to be sent to:
+
+ netdev@vger.kernel.org
+
+ While those patches are not processed by the BPF kernel maintainers,
+ please keep them in Cc as well, so they can be reviewed.
+
+ The official git repository for iproute2 is run by Stephen Hemminger
+ and can be found at:
+
+ https://git.kernel.org/pub/scm/linux/kernel/git/shemminger/iproute2.git/
+
+ The patches need to have a subject prefix of '[PATCH iproute2 master]'
+ or '[PATCH iproute2 net-next]'. 'master' or 'net-next' describes the
+ target branch where the patch should be applied to. Meaning, if kernel
+ changes went into the net-next kernel tree, then the related iproute2
+ changes need to go into the iproute2 net-next branch, otherwise they
+ can be targeted at master branch. The iproute2 net-next branch will get
+ merged into the master branch after the current iproute2 version from
+ master has been released.
+
+ Like BPF, the patches end up in patchwork under the netdev project and
+ are delegated to 'shemminger' for further processing:
+
+ http://patchwork.ozlabs.org/project/netdev/list/?delegate=389
+
+Q: What is the minimum requirement before I submit my BPF patches?
+
+A: When submitting patches, always take the time and properly test your
+ patches *prior* to submission. Never rush them! If maintainers find
+ that your patches have not been properly tested, it is a good way to
+ get them grumpy. Testing patch submissions is a hard requirement!
+
+ Note, fixes that go to bpf tree *must* have a Fixes: tag included. The
+ same applies to fixes that target bpf-next, where the affected commit
+ is in net-next (or in some cases bpf-next). The Fixes: tag is crucial
+ in order to identify follow-up commits and tremendously helps for people
+ having to do backporting, so it is a must have!
+
+ We also don't accept patches with an empty commit message. Take your
+ time and properly write up a high quality commit message, it is
+ essential!
+
+ Think about it this way: other developers looking at your code a month
+ from now need to understand *why* a certain change has been done that
+ way, and whether there have been flaws in the analysis or assumptions
+ that the original author did. Thus providing a proper rationale and
+ describing the use-case for the changes is a must.
+
+ Patch submissions with >1 patch must have a cover letter which includes
+ a high level description of the series. This high level summary will
+ then be placed into the merge commit by the BPF maintainers such that
+ it is also accessible from the git log for future reference.
+
+Q: What do I need to consider when adding a new instruction or feature
+ that would require BPF JIT and/or LLVM integration as well?
+
+A: We try hard to keep all BPF JITs up to date such that the same user
+ experience can be guaranteed when running BPF programs on different
+ architectures without having the program punt to the less efficient
+ interpreter in case the in-kernel BPF JIT is enabled.
+
+ If you are unable to implement or test the required JIT changes for
+ certain architectures, please work together with the related BPF JIT
+ developers in order to get the feature implemented in a timely manner.
+ Please refer to the git log (arch/*/net/) to locate the necessary
+ people for helping out.
+
+ Also always make sure to add BPF test cases (e.g. test_bpf.c and
+ test_verifier.c) for new instructions, so that they can receive
+ broad test coverage and help run-time testing the various BPF JITs.
+
+ In case of new BPF instructions, once the changes have been accepted
+ into the Linux kernel, please implement support into LLVM's BPF back
+ end. See LLVM section below for further information.
+
+Stable submission:
+------------------
+
+Q: I need a specific BPF commit in stable kernels. What should I do?
+
+A: In case you need a specific fix in stable kernels, first check whether
+ the commit has already been applied in the related linux-*.y branches:
+
+ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/
+
+ If not the case, then drop an email to the BPF maintainers with the
+ netdev kernel mailing list in Cc and ask for the fix to be queued up:
+
+ netdev@vger.kernel.org
+
+ The process in general is the same as on netdev itself, see also the
+ netdev FAQ document.
+
+Q: Do you also backport to kernels not currently maintained as stable?
+
+A: No. If you need a specific BPF commit in kernels that are currently not
+ maintained by the stable maintainers, then you are on your own.
+
+ The current stable and longterm stable kernels are all listed here:
+
+ https://www.kernel.org/
+
+Q: The BPF patch I am about to submit needs to go to stable as well. What
+ should I do?
+
+A: The same rules apply as with netdev patch submissions in general, see
+ netdev FAQ under:
+
+ Documentation/networking/netdev-FAQ.txt
+
+ Never add "Cc: stable@vger.kernel.org" to the patch description, but
+ ask the BPF maintainers to queue the patches instead. This can be done
+ with a note, for example, under the "---" part of the patch which does
+ not go into the git log. Alternatively, this can be done as a simple
+ request by mail instead.
+
+Q: Where do I find currently queued BPF patches that will be submitted
+ to stable?
+
+A: Once patches that fix critical bugs got applied into the bpf tree, they
+ are queued up for stable submission under:
+
+ http://patchwork.ozlabs.org/bundle/bpf/stable/?state=*
+
+ They will be on hold there at minimum until the related commit made its
+ way into the mainline kernel tree.
+
+ After having been under broader exposure, the queued patches will be
+ submitted by the BPF maintainers to the stable maintainers.
+
+Testing patches:
+----------------
+
+Q: Which BPF kernel selftests version should I run my kernel against?
+
+A: If you run a kernel xyz, then always run the BPF kernel selftests from
+ that kernel xyz as well. Do not expect that the BPF selftest from the
+ latest mainline tree will pass all the time.
+
+ In particular, test_bpf.c and test_verifier.c have a large number of
+ test cases and are constantly updated with new BPF test sequences, or
+ existing ones are adapted to verifier changes e.g. due to verifier
+ becoming smarter and being able to better track certain things.
+
+LLVM:
+-----
+
+Q: Where do I find LLVM with BPF support?
+
+A: The BPF back end for LLVM is upstream in LLVM since version 3.7.1.
+
+ All major distributions these days ship LLVM with BPF back end enabled,
+ so for the majority of use-cases it is not required to compile LLVM by
+ hand anymore, just install the distribution provided package.
+
+ LLVM's static compiler lists the supported targets through 'llc --version',
+ make sure BPF targets are listed. Example:
+
+ $ llc --version
+ LLVM (http://llvm.org/):
+ LLVM version 6.0.0svn
+ Optimized build.
+ Default target: x86_64-unknown-linux-gnu
+ Host CPU: skylake
+
+ Registered Targets:
+ bpf - BPF (host endian)
+ bpfeb - BPF (big endian)
+ bpfel - BPF (little endian)
+ x86 - 32-bit X86: Pentium-Pro and above
+ x86-64 - 64-bit X86: EM64T and AMD64
+
+ For developers in order to utilize the latest features added to LLVM's
+ BPF back end, it is advisable to run the latest LLVM releases. Support
+ for new BPF kernel features such as additions to the BPF instruction
+ set are often developed together.
+
+ All LLVM releases can be found at: http://releases.llvm.org/
+
+Q: Got it, so how do I build LLVM manually anyway?
+
+A: You need cmake and gcc-c++ as build requisites for LLVM. Once you have
+ that set up, proceed with building the latest LLVM and clang version
+ from the git repositories:
+
+ $ git clone http://llvm.org/git/llvm.git
+ $ cd llvm/tools
+ $ git clone --depth 1 http://llvm.org/git/clang.git
+ $ cd ..; mkdir build; cd build
+ $ cmake .. -DLLVM_TARGETS_TO_BUILD="BPF;X86" \
+ -DBUILD_SHARED_LIBS=OFF \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DLLVM_BUILD_RUNTIME=OFF
+ $ make -j $(getconf _NPROCESSORS_ONLN)
+
+ The built binaries can then be found in the build/bin/ directory, where
+ you can point the PATH variable to.
+
+Q: Should I notify BPF kernel maintainers about issues in LLVM's BPF code
+ generation back end or about LLVM generated code that the verifier
+ refuses to accept?
+
+A: Yes, please do! LLVM's BPF back end is a key piece of the whole BPF
+ infrastructure and it ties deeply into verification of programs from the
+ kernel side. Therefore, any issues on either side need to be investigated
+ and fixed whenever necessary.
+
+ Therefore, please make sure to bring them up at netdev kernel mailing
+ list and Cc BPF maintainers for LLVM and kernel bits:
+
+ Yonghong Song <yhs@fb.com>
+ Alexei Starovoitov <ast@kernel.org>
+ Daniel Borkmann <daniel@iogearbox.net>
+
+ LLVM also has an issue tracker where BPF related bugs can be found:
+
+ https://bugs.llvm.org/buglist.cgi?quicksearch=bpf
+
+ However, it is better to reach out through mailing lists with having
+ maintainers in Cc.
+
+Q: I have added a new BPF instruction to the kernel, how can I integrate
+ it into LLVM?
+
+A: LLVM has a -mcpu selector for the BPF back end in order to allow the
+ selection of BPF instruction set extensions. By default the 'generic'
+ processor target is used, which is the base instruction set (v1) of BPF.
+
+ LLVM has an option to select -mcpu=probe where it will probe the host
+ kernel for supported BPF instruction set extensions and selects the
+ optimal set automatically.
+
+ For cross-compilation, a specific version can be select manually as well.
+
+ $ llc -march bpf -mcpu=help
+ Available CPUs for this target:
+
+ generic - Select the generic processor.
+ probe - Select the probe processor.
+ v1 - Select the v1 processor.
+ v2 - Select the v2 processor.
+ [...]
+
+ Newly added BPF instructions to the Linux kernel need to follow the same
+ scheme, bump the instruction set version and implement probing for the
+ extensions such that -mcpu=probe users can benefit from the optimization
+ transparently when upgrading their kernels.
+
+ If you are unable to implement support for the newly added BPF instruction
+ please reach out to BPF developers for help.
+
+ By the way, the BPF kernel selftests run with -mcpu=probe for better
+ test coverage.
+
+Happy BPF hacking!
diff --git a/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt b/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt
index 9a734d808aa7..b7336b9d6a3c 100644
--- a/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt
+++ b/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt
@@ -2,7 +2,10 @@
Required properties:
-- compatible: should be "brcm,bcm7445-switch-v4.0" or "brcm,bcm7278-switch-v4.0"
+- compatible: should be one of
+ "brcm,bcm7445-switch-v4.0"
+ "brcm,bcm7278-switch-v4.0"
+ "brcm,bcm7278-switch-v4.8"
- reg: addresses and length of the register sets for the device, must be 6
pairs of register addresses and lengths
- interrupts: interrupts for the devices, must be two interrupts
diff --git a/Documentation/devicetree/bindings/net/can/fsl-flexcan.txt b/Documentation/devicetree/bindings/net/can/fsl-flexcan.txt
index 56d6cc336e1c..bfc0c433654f 100644
--- a/Documentation/devicetree/bindings/net/can/fsl-flexcan.txt
+++ b/Documentation/devicetree/bindings/net/can/fsl-flexcan.txt
@@ -18,6 +18,12 @@ Optional properties:
- xceiver-supply: Regulator that powers the CAN transceiver
+- big-endian: This means the registers of FlexCAN controller are big endian.
+ This is optional property.i.e. if this property is not present in
+ device tree node then controller is assumed to be little endian.
+ if this property is present then controller is assumed to be big
+ endian.
+
Example:
can@1c000 {
diff --git a/Documentation/devicetree/bindings/net/fsl-fec.txt b/Documentation/devicetree/bindings/net/fsl-fec.txt
index f0dc94409107..2d41fb96ce0a 100644
--- a/Documentation/devicetree/bindings/net/fsl-fec.txt
+++ b/Documentation/devicetree/bindings/net/fsl-fec.txt
@@ -59,7 +59,7 @@ ethernet@83fec000 {
reg = <0x83fec000 0x4000>;
interrupts = <87>;
phy-mode = "mii";
- phy-reset-gpios = <&gpio2 14 0>; /* GPIO2_14 */
+ phy-reset-gpios = <&gpio2 14 GPIO_ACTIVE_LOW>; /* GPIO2_14 */
local-mac-address = [00 04 9F 01 1B B9];
phy-supply = <&reg_fec_supply>;
};
@@ -71,7 +71,7 @@ ethernet@83fec000 {
reg = <0x83fec000 0x4000>;
interrupts = <87>;
phy-mode = "mii";
- phy-reset-gpios = <&gpio2 14 0>; /* GPIO2_14 */
+ phy-reset-gpios = <&gpio2 14 GPIO_ACTIVE_LOW>; /* GPIO2_14 */
local-mac-address = [00 04 9F 01 1B B9];
phy-supply = <&reg_fec_supply>;
phy-handle = <&ethphy>;
diff --git a/Documentation/devicetree/bindings/net/ieee802154/adf7242.txt b/Documentation/devicetree/bindings/net/ieee802154/adf7242.txt
index dea5124cdc52..d24172cc6d32 100644
--- a/Documentation/devicetree/bindings/net/ieee802154/adf7242.txt
+++ b/Documentation/devicetree/bindings/net/ieee802154/adf7242.txt
@@ -1,7 +1,7 @@
* ADF7242 IEEE 802.15.4 *
Required properties:
- - compatible: should be "adi,adf7242"
+ - compatible: should be "adi,adf7242", "adi,adf7241"
- spi-max-frequency: maximal bus speed (12.5 MHz)
- reg: the chipselect index
- interrupts: the interrupt generated by the device via pin IRQ1.
diff --git a/Documentation/devicetree/bindings/net/phy.txt b/Documentation/devicetree/bindings/net/phy.txt
index 77d0b2a61ffa..72860ce7f610 100644
--- a/Documentation/devicetree/bindings/net/phy.txt
+++ b/Documentation/devicetree/bindings/net/phy.txt
@@ -53,6 +53,14 @@ Optional Properties:
to ensure the integrated PHY is used. The absence of this property indicates
the muxers should be configured so that the external PHY is used.
+- reset-gpios: The GPIO phandle and specifier for the PHY reset signal.
+
+- reset-delay-us: Delay after the reset was asserted in microseconds.
+ If this property is missing the delay will be skipped.
+
+- reset-post-delay-us: Delay after the reset was deasserted in microseconds.
+ If this property is missing the delay will be skipped.
+
Example:
ethernet-phy@0 {
@@ -60,4 +68,8 @@ ethernet-phy@0 {
interrupt-parent = <&PIC>;
interrupts = <35 IRQ_TYPE_EDGE_RISING>;
reg = <0>;
+
+ reset-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>;
+ reset-delay-us = <1000>;
+ reset-post-delay-us = <2000>;
};
diff --git a/Documentation/devicetree/bindings/net/sff,sfp.txt b/Documentation/devicetree/bindings/net/sff,sfp.txt
index 60e970ce10ee..f1c441bedf68 100644
--- a/Documentation/devicetree/bindings/net/sff,sfp.txt
+++ b/Documentation/devicetree/bindings/net/sff,sfp.txt
@@ -3,7 +3,9 @@ Transceiver
Required properties:
-- compatible : must be "sff,sfp"
+- compatible : must be one of
+ "sff,sfp" for SFP modules
+ "sff,sff" for soldered down SFF modules
Optional Properties:
@@ -11,7 +13,8 @@ Optional Properties:
interface
- mod-def0-gpios : GPIO phandle and a specifier of the MOD-DEF0 (AKA Mod_ABS)
- module presence input gpio signal, active (module absent) high
+ module presence input gpio signal, active (module absent) high. Must
+ not be present for SFF modules
- los-gpios : GPIO phandle and a specifier of the Receiver Loss of Signal
Indication input gpio signal, active (signal lost) high
@@ -24,10 +27,11 @@ Optional Properties:
- rate-select0-gpios : GPIO phandle and a specifier of the Rx Signaling Rate
Select (AKA RS0) output gpio signal, low: low Rx rate, high: high Rx rate
+ Must not be present for SFF modules
- rate-select1-gpios : GPIO phandle and a specifier of the Tx Signaling Rate
Select (AKA RS1) output gpio signal (SFP+ only), low: low Tx rate, high:
- high Tx rate
+ high Tx rate. Must not be present for SFF modules
Example #1: Direct serdes to SFP connection
diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX
index 7a79b3587dd3..f5d642c01dd3 100644
--- a/Documentation/networking/00-INDEX
+++ b/Documentation/networking/00-INDEX
@@ -228,6 +228,8 @@ x25.txt
- general info on X.25 development.
x25-iface.txt
- description of the X.25 Packet Layer to LAPB device interface.
+xfrm_device.txt
+ - description of XFRM offload API
xfrm_proc.txt
- description of the statistics package for XFRM.
xfrm_sync.txt
diff --git a/Documentation/networking/dsa/dsa.txt b/Documentation/networking/dsa/dsa.txt
index b8b40753133e..25170ad7d25b 100644
--- a/Documentation/networking/dsa/dsa.txt
+++ b/Documentation/networking/dsa/dsa.txt
@@ -385,11 +385,6 @@ Switch configuration
avoid relying on what a previous software agent such as a bootloader/firmware
may have previously configured.
-- set_addr: Some switches require the programming of the management interface's
- Ethernet MAC address, switch drivers can also disable ageing of MAC addresses
- on the management interface and "hardcode"/"force" this MAC address for the
- CPU/management interface as an optimization
-
PHY devices and link management
-------------------------------
diff --git a/Documentation/networking/ieee802154.txt b/Documentation/networking/ieee802154.txt
index 057e9fdbfac9..e74d8e1da0e2 100644
--- a/Documentation/networking/ieee802154.txt
+++ b/Documentation/networking/ieee802154.txt
@@ -97,6 +97,46 @@ The include/net/mac802154.h defines following functions:
- void ieee802154_unregister_hw(struct ieee802154_hw *hw):
freeing registered PHY
+ - void ieee802154_rx_irqsafe(struct ieee802154_hw *hw, struct sk_buff *skb,
+ u8 lqi):
+ telling 802.15.4 module there is a new received frame in the skb with
+ the RF Link Quality Indicator (LQI) from the hardware device
+
+ - void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb,
+ bool ifs_handling):
+ telling 802.15.4 module the frame in the skb is or going to be
+ transmitted through the hardware device
+
+The device driver must implement the following callbacks in the IEEE 802.15.4
+operations structure at least:
+struct ieee802154_ops {
+ ...
+ int (*start)(struct ieee802154_hw *hw);
+ void (*stop)(struct ieee802154_hw *hw);
+ ...
+ int (*xmit_async)(struct ieee802154_hw *hw, struct sk_buff *skb);
+ int (*ed)(struct ieee802154_hw *hw, u8 *level);
+ int (*set_channel)(struct ieee802154_hw *hw, u8 page, u8 channel);
+ ...
+};
+
+ - int start(struct ieee802154_hw *hw):
+ handler that 802.15.4 module calls for the hardware device initialization.
+
+ - void stop(struct ieee802154_hw *hw):
+ handler that 802.15.4 module calls for the hardware device cleanup.
+
+ - int xmit_async(struct ieee802154_hw *hw, struct sk_buff *skb):
+ handler that 802.15.4 module calls for each frame in the skb going to be
+ transmitted through the hardware device.
+
+ - int ed(struct ieee802154_hw *hw, u8 *level):
+ handler that 802.15.4 module calls for Energy Detection from the hardware
+ device.
+
+ - int set_channel(struct ieee802154_hw *hw, u8 page, u8 channel):
+ set radio for listening on specific channel of the hardware device.
+
Moreover IEEE 802.15.4 device operations structure should be filled.
Fake drivers
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 46c7e1085efc..3f2c40d8e6aa 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -606,6 +606,7 @@ tcp_fastopen_blackhole_timeout_sec - INTEGER
This time period will grow exponentially when more blackhole issues
get detected right after Fastopen is re-enabled and will reset to
initial value when the blackhole issue goes away.
+ 0 to disable the blackhole detection.
By default, it is set to 1hr.
tcp_syn_retries - INTEGER
diff --git a/Documentation/networking/kapi.rst b/Documentation/networking/kapi.rst
index 580289f345da..f03ae64be8bc 100644
--- a/Documentation/networking/kapi.rst
+++ b/Documentation/networking/kapi.rst
@@ -145,3 +145,27 @@ PHY Support
.. kernel-doc:: drivers/net/phy/mdio_bus.c
:internal:
+
+PHYLINK
+-------
+
+ PHYLINK interfaces traditional network drivers with PHYLIB, fixed-links,
+ and SFF modules (eg, hot-pluggable SFP) that may contain PHYs. PHYLINK
+ provides management of the link state and link modes.
+
+.. kernel-doc:: include/linux/phylink.h
+ :internal:
+
+.. kernel-doc:: drivers/net/phy/phylink.c
+
+SFP support
+-----------
+
+.. kernel-doc:: drivers/net/phy/sfp-bus.c
+ :internal:
+
+.. kernel-doc:: include/linux/sfp.h
+ :internal:
+
+.. kernel-doc:: drivers/net/phy/sfp-bus.c
+ :export:
diff --git a/Documentation/networking/xfrm_device.txt b/Documentation/networking/xfrm_device.txt
new file mode 100644
index 000000000000..2d9d588cd34b
--- /dev/null
+++ b/Documentation/networking/xfrm_device.txt
@@ -0,0 +1,132 @@
+
+===============================================
+XFRM device - offloading the IPsec computations
+===============================================
+Shannon Nelson <shannon.nelson@oracle.com>
+
+
+Overview
+========
+
+IPsec is a useful feature for securing network traffic, but the
+computational cost is high: a 10Gbps link can easily be brought down
+to under 1Gbps, depending on the traffic and link configuration.
+Luckily, there are NICs that offer a hardware based IPsec offload which
+can radically increase throughput and decrease CPU utilization. The XFRM
+Device interface allows NIC drivers to offer to the stack access to the
+hardware offload.
+
+Userland access to the offload is typically through a system such as
+libreswan or KAME/raccoon, but the iproute2 'ip xfrm' command set can
+be handy when experimenting. An example command might look something
+like this:
+
+ ip x s add proto esp dst 14.0.0.70 src 14.0.0.52 spi 0x07 mode transport \
+ reqid 0x07 replay-window 32 \
+ aead 'rfc4106(gcm(aes))' 0x44434241343332312423222114131211f4f3f2f1 128 \
+ sel src 14.0.0.52/24 dst 14.0.0.70/24 proto tcp \
+ offload dev eth4 dir in
+
+Yes, that's ugly, but that's what shell scripts and/or libreswan are for.
+
+
+
+Callbacks to implement
+======================
+
+/* from include/linux/netdevice.h */
+struct xfrmdev_ops {
+ int (*xdo_dev_state_add) (struct xfrm_state *x);
+ void (*xdo_dev_state_delete) (struct xfrm_state *x);
+ void (*xdo_dev_state_free) (struct xfrm_state *x);
+ bool (*xdo_dev_offload_ok) (struct sk_buff *skb,
+ struct xfrm_state *x);
+};
+
+The NIC driver offering ipsec offload will need to implement these
+callbacks to make the offload available to the network stack's
+XFRM subsytem. Additionally, the feature bits NETIF_F_HW_ESP and
+NETIF_F_HW_ESP_TX_CSUM will signal the availability of the offload.
+
+
+
+Flow
+====
+
+At probe time and before the call to register_netdev(), the driver should
+set up local data structures and XFRM callbacks, and set the feature bits.
+The XFRM code's listener will finish the setup on NETDEV_REGISTER.
+
+ adapter->netdev->xfrmdev_ops = &ixgbe_xfrmdev_ops;
+ adapter->netdev->features |= NETIF_F_HW_ESP;
+ adapter->netdev->hw_enc_features |= NETIF_F_HW_ESP;
+
+When new SAs are set up with a request for "offload" feature, the
+driver's xdo_dev_state_add() will be given the new SA to be offloaded
+and an indication of whether it is for Rx or Tx. The driver should
+ - verify the algorithm is supported for offloads
+ - store the SA information (key, salt, target-ip, protocol, etc)
+ - enable the HW offload of the SA
+
+The driver can also set an offload_handle in the SA, an opaque void pointer
+that can be used to convey context into the fast-path offload requests.
+
+ xs->xso.offload_handle = context;
+
+
+When the network stack is preparing an IPsec packet for an SA that has
+been setup for offload, it first calls into xdo_dev_offload_ok() with
+the skb and the intended offload state to ask the driver if the offload
+will serviceable. This can check the packet information to be sure the
+offload can be supported (e.g. IPv4 or IPv6, no IPv4 options, etc) and
+return true of false to signify its support.
+
+When ready to send, the driver needs to inspect the Tx packet for the
+offload information, including the opaque context, and set up the packet
+send accordingly.
+
+ xs = xfrm_input_state(skb);
+ context = xs->xso.offload_handle;
+ set up HW for send
+
+The stack has already inserted the appropriate IPsec headers in the
+packet data, the offload just needs to do the encryption and fix up the
+header values.
+
+
+When a packet is received and the HW has indicated that it offloaded a
+decryption, the driver needs to add a reference to the decoded SA into
+the packet's skb. At this point the data should be decrypted but the
+IPsec headers are still in the packet data; they are removed later up
+the stack in xfrm_input().
+
+ find and hold the SA that was used to the Rx skb
+ get spi, protocol, and destination IP from packet headers
+ xs = find xs from (spi, protocol, dest_IP)
+ xfrm_state_hold(xs);
+
+ store the state information into the skb
+ skb->sp = secpath_dup(skb->sp);
+ skb->sp->xvec[skb->sp->len++] = xs;
+ skb->sp->olen++;
+
+ indicate the success and/or error status of the offload
+ xo = xfrm_offload(skb);
+ xo->flags = CRYPTO_DONE;
+ xo->status = crypto_status;
+
+ hand the packet to napi_gro_receive() as usual
+
+
+When the SA is removed by the user, the driver's xdo_dev_state_delete()
+is asked to disable the offload. Later, xdo_dev_state_free() is called
+from a garbage collection routine after all reference counts to the state
+have been removed and any remaining resources can be cleared for the
+offload state. How these are used by the driver will depend on specific
+hardware needs.
+
+As a netdev is set to DOWN the XFRM stack's netdev listener will call
+xdo_dev_state_delete() and xdo_dev_state_free() on any remaining offloaded
+states.
+
+
diff --git a/MAINTAINERS b/MAINTAINERS
index a6e86e20761e..810415d10b03 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2725,12 +2725,16 @@ M: Alexei Starovoitov <ast@kernel.org>
M: Daniel Borkmann <daniel@iogearbox.net>
L: netdev@vger.kernel.org
L: linux-kernel@vger.kernel.org
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git
S: Supported
F: arch/x86/net/bpf_jit*
F: Documentation/networking/filter.txt
F: Documentation/bpf/
F: include/linux/bpf*
F: include/linux/filter.h
+F: include/trace/events/bpf.h
+F: include/trace/events/xdp.h
F: include/uapi/linux/bpf*
F: include/uapi/linux/filter.h
F: kernel/bpf/
@@ -8724,6 +8728,13 @@ L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/ethernet/mediatek/
+MEDIATEK SWITCH DRIVER
+M: Sean Wang <sean.wang@mediatek.com>
+L: netdev@vger.kernel.org
+S: Maintained
+F: drivers/net/dsa/mt7530.*
+F: net/dsa/tag_mtk.c
+
MEDIATEK JPEG DRIVER
M: Rick Chang <rick.chang@mediatek.com>
M: Bin Liu <bin.liu@mediatek.com>
@@ -9600,6 +9611,11 @@ NETWORKING [WIRELESS]
L: linux-wireless@vger.kernel.org
Q: http://patchwork.kernel.org/project/linux-wireless/list/
+NETDEVSIM
+M: Jakub Kicinski <jakub.kicinski@netronome.com>
+S: Maintained
+F: drivers/net/netdevsim/*
+
NETXEN (1/10) GbE SUPPORT
M: Manish Chopra <manish.chopra@cavium.com>
M: Rahul Verma <rahul.verma@cavium.com>
diff --git a/arch/arm/boot/dts/imx25.dtsi b/arch/arm/boot/dts/imx25.dtsi
index 09ce8b81fafa..fcaff1c66bcb 100644
--- a/arch/arm/boot/dts/imx25.dtsi
+++ b/arch/arm/boot/dts/imx25.dtsi
@@ -122,7 +122,7 @@
};
can1: can@43f88000 {
- compatible = "fsl,imx25-flexcan", "fsl,p1010-flexcan";
+ compatible = "fsl,imx25-flexcan";
reg = <0x43f88000 0x4000>;
interrupts = <43>;
clocks = <&clks 75>, <&clks 75>;
@@ -131,7 +131,7 @@
};
can2: can@43f8c000 {
- compatible = "fsl,imx25-flexcan", "fsl,p1010-flexcan";
+ compatible = "fsl,imx25-flexcan";
reg = <0x43f8c000 0x4000>;
interrupts = <44>;
clocks = <&clks 76>, <&clks 76>;
diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi
index 2f4ebe0318d3..e52e05c0fe56 100644
--- a/arch/arm/boot/dts/imx28.dtsi
+++ b/arch/arm/boot/dts/imx28.dtsi
@@ -1038,7 +1038,7 @@
};
can0: can@80032000 {
- compatible = "fsl,imx28-flexcan", "fsl,p1010-flexcan";
+ compatible = "fsl,imx28-flexcan";
reg = <0x80032000 0x2000>;
interrupts = <8>;
clocks = <&clks 58>, <&clks 58>;
@@ -1047,7 +1047,7 @@
};
can1: can@80034000 {
- compatible = "fsl,imx28-flexcan", "fsl,p1010-flexcan";
+ compatible = "fsl,imx28-flexcan";
reg = <0x80034000 0x2000>;
interrupts = <9>;
clocks = <&clks 59>, <&clks 59>;
diff --git a/arch/arm/boot/dts/imx35.dtsi b/arch/arm/boot/dts/imx35.dtsi
index 6d5e6a60bee7..1f0e2203b576 100644
--- a/arch/arm/boot/dts/imx35.dtsi
+++ b/arch/arm/boot/dts/imx35.dtsi
@@ -303,7 +303,7 @@
};
can1: can@53fe4000 {
- compatible = "fsl,imx35-flexcan", "fsl,p1010-flexcan";
+ compatible = "fsl,imx35-flexcan";
reg = <0x53fe4000 0x1000>;
clocks = <&clks 33>, <&clks 33>;
clock-names = "ipg", "per";
@@ -312,7 +312,7 @@
};
can2: can@53fe8000 {
- compatible = "fsl,imx35-flexcan", "fsl,p1010-flexcan";
+ compatible = "fsl,imx35-flexcan";
reg = <0x53fe8000 0x1000>;
clocks = <&clks 34>, <&clks 34>;
clock-names = "ipg", "per";
diff --git a/arch/arm/boot/dts/imx53.dtsi b/arch/arm/boot/dts/imx53.dtsi
index 84f17f7abb71..85071ff8c639 100644
--- a/arch/arm/boot/dts/imx53.dtsi
+++ b/arch/arm/boot/dts/imx53.dtsi
@@ -536,7 +536,7 @@
};
can1: can@53fc8000 {
- compatible = "fsl,imx53-flexcan", "fsl,p1010-flexcan";
+ compatible = "fsl,imx53-flexcan";
reg = <0x53fc8000 0x4000>;
interrupts = <82>;
clocks = <&clks IMX5_CLK_CAN1_IPG_GATE>,
@@ -546,7 +546,7 @@
};
can2: can@53fcc000 {
- compatible = "fsl,imx53-flexcan", "fsl,p1010-flexcan";
+ compatible = "fsl,imx53-flexcan";
reg = <0x53fcc000 0x4000>;
interrupts = <83>;
clocks = <&clks IMX5_CLK_CAN2_IPG_GATE>,
diff --git a/arch/arm/boot/dts/ls1021a-qds.dts b/arch/arm/boot/dts/ls1021a-qds.dts
index 940875316d0f..4f211e3c903a 100644
--- a/arch/arm/boot/dts/ls1021a-qds.dts
+++ b/arch/arm/boot/dts/ls1021a-qds.dts
@@ -331,3 +331,19 @@
&uart1 {
status = "okay";
};
+
+&can0 {
+ status = "okay";
+};
+
+&can1 {
+ status = "okay";
+};
+
+&can2 {
+ status = "disabled";
+};
+
+&can3 {
+ status = "disabled";
+};
diff --git a/arch/arm/boot/dts/ls1021a-twr.dts b/arch/arm/boot/dts/ls1021a-twr.dts
index a8b148ad1dd2..7202d9c504be 100644
--- a/arch/arm/boot/dts/ls1021a-twr.dts
+++ b/arch/arm/boot/dts/ls1021a-twr.dts
@@ -243,3 +243,19 @@
&uart1 {
status = "okay";
};
+
+&can0 {
+ status = "okay";
+};
+
+&can1 {
+ status = "okay";
+};
+
+&can2 {
+ status = "disabled";
+};
+
+&can3 {
+ status = "disabled";
+};
diff --git a/arch/arm/boot/dts/ls1021a.dtsi b/arch/arm/boot/dts/ls1021a.dtsi
index 9319e1f0f1d8..7789031898b0 100644
--- a/arch/arm/boot/dts/ls1021a.dtsi
+++ b/arch/arm/boot/dts/ls1021a.dtsi
@@ -730,5 +730,41 @@
<0000 0 0 3 &gic GIC_SPI 191 IRQ_TYPE_LEVEL_HIGH>,
<0000 0 0 4 &gic GIC_SPI 193 IRQ_TYPE_LEVEL_HIGH>;
};
+
+ can0: can@2a70000 {
+ compatible = "fsl,ls1021ar2-flexcan";
+ reg = <0x0 0x2a70000 0x0 0x1000>;
+ interrupts = <GIC_SPI 126 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clockgen 4 1>, <&clockgen 4 1>;
+ clock-names = "ipg", "per";
+ big-endian;
+ };
+
+ can1: can@2a80000 {
+ compatible = "fsl,ls1021ar2-flexcan";
+ reg = <0x0 0x2a80000 0x0 0x1000>;
+ interrupts = <GIC_SPI 127 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clockgen 4 1>, <&clockgen 4 1>;
+ clock-names = "ipg", "per";
+ big-endian;
+ };
+
+ can2: can@2a90000 {
+ compatible = "fsl,ls1021ar2-flexcan";
+ reg = <0x0 0x2a90000 0x0 0x1000>;
+ interrupts = <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clockgen 4 1>, <&clockgen 4 1>;
+ clock-names = "ipg", "per";
+ big-endian;
+ };
+
+ can3: can@2aa0000 {
+ compatible = "fsl,ls1021ar2-flexcan";
+ reg = <0x0 0x2aa0000 0x0 0x1000>;
+ interrupts = <GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clockgen 4 1>, <&clockgen 4 1>;
+ clock-names = "ipg", "per";
+ big-endian;
+ };
};
};
diff --git a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi
index af12ead88c5f..1b4aafc1f6a2 100644
--- a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi
@@ -137,12 +137,14 @@
compatible = "fsl,p1010-flexcan";
reg = <0x1c000 0x1000>;
interrupts = <48 0x2 0 0>;
+ big-endian;
};
can1: can@1d000 {
compatible = "fsl,p1010-flexcan";
reg = <0x1d000 0x1000>;
interrupts = <61 0x2 0 0>;
+ big-endian;
};
L2: l2-cache-controller@20000 {
diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
index ce47eb17901d..6470e3c4c990 100644
--- a/drivers/atm/eni.c
+++ b/drivers/atm/eni.c
@@ -473,7 +473,7 @@ static int do_rx_dma(struct atm_vcc *vcc,struct sk_buff *skb,
ENI_PRV_POS(skb) = eni_vcc->descr+size+1;
skb_queue_tail(&eni_dev->rx_queue,skb);
eni_vcc->last = skb;
-rx_enqueued++;
+ rx_enqueued++;
}
eni_vcc->descr = here;
eni_out(dma_wr,MID_DMA_WR_RX);
@@ -715,7 +715,7 @@ static void get_service(struct atm_dev *dev)
else eni_dev->slow = vcc;
eni_dev->last_slow = vcc;
}
-putting++;
+ putting++;
ENI_VCC(vcc)->servicing++;
}
}
@@ -744,7 +744,7 @@ static void dequeue_rx(struct atm_dev *dev)
}
EVENT("dequeued (size=%ld,pos=0x%lx)\n",ENI_PRV_SIZE(skb),
ENI_PRV_POS(skb));
-rx_dequeued++;
+ rx_dequeued++;
vcc = ATM_SKB(skb)->vcc;
eni_vcc = ENI_VCC(vcc);
first = 0;
@@ -1174,7 +1174,7 @@ DPRINTK("doing direct send\n"); /* @@@ well, this doesn't work anyway */
DPRINTK("dma_wr set to %d, tx_pos is now %ld\n",dma_wr,tx->tx_pos);
eni_out(dma_wr,MID_DMA_WR_TX);
skb_queue_tail(&eni_dev->tx_queue,skb);
-queued++;
+ queued++;
return enq_ok;
}
@@ -1195,7 +1195,7 @@ static void poll_tx(struct atm_dev *dev)
if (res == enq_ok) continue;
DPRINTK("re-queuing TX PDU\n");
skb_queue_head(&tx->backlog,skb);
-requeued++;
+ requeued++;
if (res == enq_jam) return;
break;
}
@@ -1232,7 +1232,7 @@ static void dequeue_tx(struct atm_dev *dev)
else dev_kfree_skb_irq(skb);
atomic_inc(&vcc->stats->tx);
wake_up(&eni_dev->tx_wait);
-dma_complete++;
+ dma_complete++;
}
}
@@ -1555,7 +1555,7 @@ static void eni_tasklet(unsigned long data)
}
if (events & MID_TX_COMPLETE) {
EVENT("INT: TX COMPLETE\n",0,0);
-tx_complete++;
+ tx_complete++;
wake_up(&eni_dev->tx_wait);
/* poll_rx ? */
}
@@ -2069,14 +2069,14 @@ static int eni_send(struct atm_vcc *vcc,struct sk_buff *skb)
}
*(u32 *) skb->data = htonl(*(u32 *) skb->data);
}
-submitted++;
+ submitted++;
ATM_SKB(skb)->vcc = vcc;
tasklet_disable(&ENI_DEV(vcc->dev)->task);
res = do_tx(skb);
tasklet_enable(&ENI_DEV(vcc->dev)->task);
if (res == enq_ok) return 0;
skb_queue_tail(&ENI_VCC(vcc)->tx->backlog,skb);
-backlogged++;
+ backlogged++;
tasklet_schedule(&ENI_DEV(vcc->dev)->task);
return 0;
}
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 12eb8caa4263..50e071444a5c 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -140,6 +140,29 @@ static u32 hv_copyto_ringbuffer(
return start_write_offset;
}
+/*
+ *
+ * hv_get_ringbuffer_availbytes()
+ *
+ * Get number of bytes available to read and to write to
+ * for the specified ring buffer
+ */
+static void
+hv_get_ringbuffer_availbytes(const struct hv_ring_buffer_info *rbi,
+ u32 *read, u32 *write)
+{
+ u32 read_loc, write_loc, dsize;
+
+ /* Capture the read/write indices before they changed */
+ read_loc = READ_ONCE(rbi->ring_buffer->read_index);
+ write_loc = READ_ONCE(rbi->ring_buffer->write_index);
+ dsize = rbi->ring_datasize;
+
+ *write = write_loc >= read_loc ? dsize - (write_loc - read_loc) :
+ read_loc - write_loc;
+ *read = dsize - *write;
+}
+
/* Get various debug metrics for the specified ring buffer. */
void hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info *ring_info,
struct hv_ring_buffer_debug_info *debug_info)
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 0936da592e12..944ec3c9282c 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -497,4 +497,15 @@ config THUNDERBOLT_NET
source "drivers/net/hyperv/Kconfig"
+config NETDEVSIM
+ tristate "Simulated networking device"
+ depends on DEBUG_FS
+ help
+ This driver is a developer testing tool and software model that can
+ be used to test various control path networking APIs, especially
+ HW-offload related.
+
+ To compile this driver as a module, choose M here: the module
+ will be called netdevsim.
+
endif # NETDEVICES
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 766f62d02a0b..04c3b747812c 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -78,3 +78,4 @@ obj-$(CONFIG_FUJITSU_ES) += fjes/
thunderbolt-net-y += thunderbolt.o
obj-$(CONFIG_THUNDERBOLT_NET) += thunderbolt-net.o
+obj-$(CONFIG_NETDEVSIM) += netdevsim/
diff --git a/drivers/net/can/c_can/c_can_pci.c b/drivers/net/can/c_can/c_can_pci.c
index d065c0e2d18e..406b4847e5dc 100644
--- a/drivers/net/can/c_can/c_can_pci.c
+++ b/drivers/net/can/c_can/c_can_pci.c
@@ -251,14 +251,14 @@ static void c_can_pci_remove(struct pci_dev *pdev)
pci_disable_device(pdev);
}
-static struct c_can_pci_data c_can_sta2x11= {
+static const struct c_can_pci_data c_can_sta2x11= {
.type = BOSCH_C_CAN,
.reg_align = C_CAN_REG_ALIGN_32,
.freq = 52000000, /* 52 Mhz */
.bar = 0,
};
-static struct c_can_pci_data c_can_pch = {
+static const struct c_can_pci_data c_can_pch = {
.type = BOSCH_C_CAN,
.reg_align = C_CAN_REG_32,
.freq = 50000000, /* 50 MHz */
diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index 0626dcfd1f3d..3cd371c94e83 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -190,6 +190,7 @@
* MX53 FlexCAN2 03.00.00.00 yes no no no no
* MX6s FlexCAN3 10.00.12.00 yes yes no no yes
* VF610 FlexCAN3 ? no yes no yes yes?
+ * LS1021A FlexCAN2 03.00.04.00 no yes no no yes
*
* Some SOCs do not have the RX_WARN & TX_WARN interrupt line connected.
*/
@@ -279,6 +280,10 @@ struct flexcan_priv {
struct clk *clk_per;
const struct flexcan_devtype_data *devtype_data;
struct regulator *reg_xceiver;
+
+ /* Read and Write APIs */
+ u32 (*read)(void __iomem *addr);
+ void (*write)(u32 val, void __iomem *addr);
};
static const struct flexcan_devtype_data fsl_p1010_devtype_data = {
@@ -301,6 +306,12 @@ static const struct flexcan_devtype_data fsl_vf610_devtype_data = {
FLEXCAN_QUIRK_BROKEN_PERR_STATE,
};
+static const struct flexcan_devtype_data fsl_ls1021a_r2_devtype_data = {
+ .quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS |
+ FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_BROKEN_PERR_STATE |
+ FLEXCAN_QUIRK_USE_OFF_TIMESTAMP,
+};
+
static const struct can_bittiming_const flexcan_bittiming_const = {
.name = DRV_NAME,
.tseg1_min = 4,
@@ -313,39 +324,45 @@ static const struct can_bittiming_const flexcan_bittiming_const = {
.brp_inc = 1,
};
-/* Abstract off the read/write for arm versus ppc. This
- * assumes that PPC uses big-endian registers and everything
- * else uses little-endian registers, independent of CPU
- * endianness.
+/* FlexCAN module is essentially modelled as a little-endian IP in most
+ * SoCs, i.e the registers as well as the message buffer areas are
+ * implemented in a little-endian fashion.
+ *
+ * However there are some SoCs (e.g. LS1021A) which implement the FlexCAN
+ * module in a big-endian fashion (i.e the registers as well as the
+ * message buffer areas are implemented in a big-endian way).
+ *
+ * In addition, the FlexCAN module can be found on SoCs having ARM or
+ * PPC cores. So, we need to abstract off the register read/write
+ * functions, ensuring that these cater to all the combinations of module
+ * endianness and underlying CPU endianness.
*/
-#if defined(CONFIG_PPC)
-static inline u32 flexcan_read(void __iomem *addr)
+static inline u32 flexcan_read_be(void __iomem *addr)
{
- return in_be32(addr);
+ return ioread32be(addr);
}
-static inline void flexcan_write(u32 val, void __iomem *addr)
+static inline void flexcan_write_be(u32 val, void __iomem *addr)
{
- out_be32(addr, val);
+ iowrite32be(val, addr);
}
-#else
-static inline u32 flexcan_read(void __iomem *addr)
+
+static inline u32 flexcan_read_le(void __iomem *addr)
{
- return readl(addr);
+ return ioread32(addr);
}
-static inline void flexcan_write(u32 val, void __iomem *addr)
+static inline void flexcan_write_le(u32 val, void __iomem *addr)
{
- writel(val, addr);
+ iowrite32(val, addr);
}
-#endif
static inline void flexcan_error_irq_enable(const struct flexcan_priv *priv)
{
struct flexcan_regs __iomem *regs = priv->regs;
u32 reg_ctrl = (priv->reg_ctrl_default | FLEXCAN_CTRL_ERR_MSK);
- flexcan_write(reg_ctrl, &regs->ctrl);
+ priv->write(reg_ctrl, &regs->ctrl);
}
static inline void flexcan_error_irq_disable(const struct flexcan_priv *priv)
@@ -353,7 +370,7 @@ static inline void flexcan_error_irq_disable(const struct flexcan_priv *priv)
struct flexcan_regs __iomem *regs = priv->regs;
u32 reg_ctrl = (priv->reg_ctrl_default & ~FLEXCAN_CTRL_ERR_MSK);
- flexcan_write(reg_ctrl, &regs->ctrl);
+ priv->write(reg_ctrl, &regs->ctrl);
}
static inline int flexcan_transceiver_enable(const struct flexcan_priv *priv)
@@ -378,14 +395,14 @@ static int flexcan_chip_enable(struct flexcan_priv *priv)
unsigned int timeout = FLEXCAN_TIMEOUT_US / 10;
u32 reg;
- reg = flexcan_read(&regs->mcr);
+ reg = priv->read(&regs->mcr);
reg &= ~FLEXCAN_MCR_MDIS;
- flexcan_write(reg, &regs->mcr);
+ priv->write(reg, &regs->mcr);
- while (timeout-- && (flexcan_read(&regs->mcr) & FLEXCAN_MCR_LPM_ACK))
+ while (timeout-- && (priv->read(&regs->mcr) & FLEXCAN_MCR_LPM_ACK))
udelay(10);
- if (flexcan_read(&regs->mcr) & FLEXCAN_MCR_LPM_ACK)
+ if (priv->read(&regs->mcr) & FLEXCAN_MCR_LPM_ACK)
return -ETIMEDOUT;
return 0;
@@ -397,14 +414,14 @@ static int flexcan_chip_disable(struct flexcan_priv *priv)
unsigned int timeout = FLEXCAN_TIMEOUT_US / 10;
u32 reg;
- reg = flexcan_read(&regs->mcr);
+ reg = priv->read(&regs->mcr);
reg |= FLEXCAN_MCR_MDIS;
- flexcan_write(reg, &regs->mcr);
+ priv->write(reg, &regs->mcr);
- while (timeout-- && !(flexcan_read(&regs->mcr) & FLEXCAN_MCR_LPM_ACK))
+ while (timeout-- && !(priv->read(&regs->mcr) & FLEXCAN_MCR_LPM_ACK))
udelay(10);
- if (!(flexcan_read(&regs->mcr) & FLEXCAN_MCR_LPM_ACK))
+ if (!(priv->read(&regs->mcr) & FLEXCAN_MCR_LPM_ACK))
return -ETIMEDOUT;
return 0;
@@ -416,14 +433,14 @@ static int flexcan_chip_freeze(struct flexcan_priv *priv)
unsigned int timeout = 1000 * 1000 * 10 / priv->can.bittiming.bitrate;
u32 reg;
- reg = flexcan_read(&regs->mcr);
+ reg = priv->read(&regs->mcr);
reg |= FLEXCAN_MCR_HALT;
- flexcan_write(reg, &regs->mcr);
+ priv->write(reg, &regs->mcr);
- while (timeout-- && !(flexcan_read(&regs->mcr) & FLEXCAN_MCR_FRZ_ACK))
+ while (timeout-- && !(priv->read(&regs->mcr) & FLEXCAN_MCR_FRZ_ACK))
udelay(100);
- if (!(flexcan_read(&regs->mcr) & FLEXCAN_MCR_FRZ_ACK))
+ if (!(priv->read(&regs->mcr) & FLEXCAN_MCR_FRZ_ACK))
return -ETIMEDOUT;
return 0;
@@ -435,14 +452,14 @@ static int flexcan_chip_unfreeze(struct flexcan_priv *priv)
unsigned int timeout = FLEXCAN_TIMEOUT_US / 10;
u32 reg;
- reg = flexcan_read(&regs->mcr);
+ reg = priv->read(&regs->mcr);
reg &= ~FLEXCAN_MCR_HALT;
- flexcan_write(reg, &regs->mcr);
+ priv->write(reg, &regs->mcr);
- while (timeout-- && (flexcan_read(&regs->mcr) & FLEXCAN_MCR_FRZ_ACK))
+ while (timeout-- && (priv->read(&regs->mcr) & FLEXCAN_MCR_FRZ_ACK))
udelay(10);
- if (flexcan_read(&regs->mcr) & FLEXCAN_MCR_FRZ_ACK)
+ if (priv->read(&regs->mcr) & FLEXCAN_MCR_FRZ_ACK)
return -ETIMEDOUT;
return 0;
@@ -453,11 +470,11 @@ static int flexcan_chip_softreset(struct flexcan_priv *priv)
struct flexcan_regs __iomem *regs = priv->regs;
unsigned int timeout = FLEXCAN_TIMEOUT_US / 10;
- flexcan_write(FLEXCAN_MCR_SOFTRST, &regs->mcr);
- while (timeout-- && (flexcan_read(&regs->mcr) & FLEXCAN_MCR_SOFTRST))
+ priv->write(FLEXCAN_MCR_SOFTRST, &regs->mcr);
+ while (timeout-- && (priv->read(&regs->mcr) & FLEXCAN_MCR_SOFTRST))
udelay(10);
- if (flexcan_read(&regs->mcr) & FLEXCAN_MCR_SOFTRST)
+ if (priv->read(&regs->mcr) & FLEXCAN_MCR_SOFTRST)
return -ETIMEDOUT;
return 0;
@@ -468,7 +485,7 @@ static int __flexcan_get_berr_counter(const struct net_device *dev,
{
const struct flexcan_priv *priv = netdev_priv(dev);
struct flexcan_regs __iomem *regs = priv->regs;
- u32 reg = flexcan_read(&regs->ecr);
+ u32 reg = priv->read(&regs->ecr);
bec->txerr = (reg >> 0) & 0xff;
bec->rxerr = (reg >> 8) & 0xff;
@@ -524,24 +541,24 @@ static int flexcan_start_xmit(struct sk_buff *skb, struct net_device *dev)
if (cf->can_dlc > 0) {
data = be32_to_cpup((__be32 *)&cf->data[0]);
- flexcan_write(data, &priv->tx_mb->data[0]);
+ priv->write(data, &priv->tx_mb->data[0]);
}
if (cf->can_dlc > 3) {
data = be32_to_cpup((__be32 *)&cf->data[4]);
- flexcan_write(data, &priv->tx_mb->data[1]);
+ priv->write(data, &priv->tx_mb->data[1]);
}
can_put_echo_skb(skb, dev, 0);
- flexcan_write(can_id, &priv->tx_mb->can_id);
- flexcan_write(ctrl, &priv->tx_mb->can_ctrl);
+ priv->write(can_id, &priv->tx_mb->can_id);
+ priv->write(ctrl, &priv->tx_mb->can_ctrl);
/* Errata ERR005829 step8:
* Write twice INACTIVE(0x8) code to first MB.
*/
- flexcan_write(FLEXCAN_MB_CODE_TX_INACTIVE,
+ priv->write(FLEXCAN_MB_CODE_TX_INACTIVE,
&priv->tx_mb_reserved->can_ctrl);
- flexcan_write(FLEXCAN_MB_CODE_TX_INACTIVE,
+ priv->write(FLEXCAN_MB_CODE_TX_INACTIVE,
&priv->tx_mb_reserved->can_ctrl);
return NETDEV_TX_OK;
@@ -660,7 +677,7 @@ static unsigned int flexcan_mailbox_read(struct can_rx_offload *offload,
u32 code;
do {
- reg_ctrl = flexcan_read(&mb->can_ctrl);
+ reg_ctrl = priv->read(&mb->can_ctrl);
} while (reg_ctrl & FLEXCAN_MB_CODE_RX_BUSY_BIT);
/* is this MB empty? */
@@ -675,17 +692,17 @@ static unsigned int flexcan_mailbox_read(struct can_rx_offload *offload,
offload->dev->stats.rx_errors++;
}
} else {
- reg_iflag1 = flexcan_read(&regs->iflag1);
+ reg_iflag1 = priv->read(&regs->iflag1);
if (!(reg_iflag1 & FLEXCAN_IFLAG_RX_FIFO_AVAILABLE))
return 0;
- reg_ctrl = flexcan_read(&mb->can_ctrl);
+ reg_ctrl = priv->read(&mb->can_ctrl);
}
/* increase timstamp to full 32 bit */
*timestamp = reg_ctrl << 16;
- reg_id = flexcan_read(&mb->can_id);
+ reg_id = priv->read(&mb->can_id);
if (reg_ctrl & FLEXCAN_MB_CNT_IDE)
cf->can_id = ((reg_id >> 0) & CAN_EFF_MASK) | CAN_EFF_FLAG;
else
@@ -695,19 +712,19 @@ static unsigned int flexcan_mailbox_read(struct can_rx_offload *offload,
cf->can_id |= CAN_RTR_FLAG;
cf->can_dlc = get_can_dlc((reg_ctrl >> 16) & 0xf);
- *(__be32 *)(cf->data + 0) = cpu_to_be32(flexcan_read(&mb->data[0]));
- *(__be32 *)(cf->data + 4) = cpu_to_be32(flexcan_read(&mb->data[1]));
+ *(__be32 *)(cf->data + 0) = cpu_to_be32(priv->read(&mb->data[0]));
+ *(__be32 *)(cf->data + 4) = cpu_to_be32(priv->read(&mb->data[1]));
/* mark as read */
if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) {
/* Clear IRQ */
if (n < 32)
- flexcan_write(BIT(n), &regs->iflag1);
+ priv->write(BIT(n), &regs->iflag1);
else
- flexcan_write(BIT(n - 32), &regs->iflag2);
+ priv->write(BIT(n - 32), &regs->iflag2);
} else {
- flexcan_write(FLEXCAN_IFLAG_RX_FIFO_AVAILABLE, &regs->iflag1);
- flexcan_read(&regs->timer);
+ priv->write(FLEXCAN_IFLAG_RX_FIFO_AVAILABLE, &regs->iflag1);
+ priv->read(&regs->timer);
}
return 1;
@@ -719,8 +736,8 @@ static inline u64 flexcan_read_reg_iflag_rx(struct flexcan_priv *priv)
struct flexcan_regs __iomem *regs = priv->regs;
u32 iflag1, iflag2;
- iflag2 = flexcan_read(&regs->iflag2) & priv->reg_imask2_default;
- iflag1 = flexcan_read(&regs->iflag1) & priv->reg_imask1_default &
+ iflag2 = priv->read(&regs->iflag2) & priv->reg_imask2_default;
+ iflag1 = priv->read(&regs->iflag1) & priv->reg_imask1_default &
~FLEXCAN_IFLAG_MB(priv->tx_mb_idx);
return (u64)iflag2 << 32 | iflag1;
@@ -736,7 +753,7 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
u32 reg_iflag1, reg_esr;
enum can_state last_state = priv->can.state;
- reg_iflag1 = flexcan_read(&regs->iflag1);
+ reg_iflag1 = priv->read(&regs->iflag1);
/* reception interrupt */
if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) {
@@ -759,7 +776,8 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
/* FIFO overflow interrupt */
if (reg_iflag1 & FLEXCAN_IFLAG_RX_FIFO_OVERFLOW) {
handled = IRQ_HANDLED;
- flexcan_write(FLEXCAN_IFLAG_RX_FIFO_OVERFLOW, &regs->iflag1);
+ priv->write(FLEXCAN_IFLAG_RX_FIFO_OVERFLOW,
+ &regs->iflag1);
dev->stats.rx_over_errors++;
dev->stats.rx_errors++;
}
@@ -773,18 +791,18 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
can_led_event(dev, CAN_LED_EVENT_TX);
/* after sending a RTR frame MB is in RX mode */
- flexcan_write(FLEXCAN_MB_CODE_TX_INACTIVE,
- &priv->tx_mb->can_ctrl);
- flexcan_write(FLEXCAN_IFLAG_MB(priv->tx_mb_idx), &regs->iflag1);
+ priv->write(FLEXCAN_MB_CODE_TX_INACTIVE,
+ &priv->tx_mb->can_ctrl);
+ priv->write(FLEXCAN_IFLAG_MB(priv->tx_mb_idx), &regs->iflag1);
netif_wake_queue(dev);
}
- reg_esr = flexcan_read(&regs->esr);
+ reg_esr = priv->read(&regs->esr);
/* ACK all bus error and state change IRQ sources */
if (reg_esr & FLEXCAN_ESR_ALL_INT) {
handled = IRQ_HANDLED;
- flexcan_write(reg_esr & FLEXCAN_ESR_ALL_INT, &regs->esr);
+ priv->write(reg_esr & FLEXCAN_ESR_ALL_INT, &regs->esr);
}
/* state change interrupt or broken error state quirk fix is enabled */
@@ -846,7 +864,7 @@ static void flexcan_set_bittiming(struct net_device *dev)
struct flexcan_regs __iomem *regs = priv->regs;
u32 reg;
- reg = flexcan_read(&regs->ctrl);
+ reg = priv->read(&regs->ctrl);
reg &= ~(FLEXCAN_CTRL_PRESDIV(0xff) |
FLEXCAN_CTRL_RJW(0x3) |
FLEXCAN_CTRL_PSEG1(0x7) |
@@ -870,11 +888,11 @@ static void flexcan_set_bittiming(struct net_device *dev)
reg |= FLEXCAN_CTRL_SMP;
netdev_dbg(dev, "writing ctrl=0x%08x\n", reg);
- flexcan_write(reg, &regs->ctrl);
+ priv->write(reg, &regs->ctrl);
/* print chip status */
netdev_dbg(dev, "%s: mcr=0x%08x ctrl=0x%08x\n", __func__,
- flexcan_read(&regs->mcr), flexcan_read(&regs->ctrl));
+ priv->read(&regs->mcr), priv->read(&regs->ctrl));
}
/* flexcan_chip_start
@@ -913,7 +931,7 @@ static int flexcan_chip_start(struct net_device *dev)
* choose format C
* set max mailbox number
*/
- reg_mcr = flexcan_read(&regs->mcr);
+ reg_mcr = priv->read(&regs->mcr);
reg_mcr &= ~FLEXCAN_MCR_MAXMB(0xff);
reg_mcr |= FLEXCAN_MCR_FRZ | FLEXCAN_MCR_HALT | FLEXCAN_MCR_SUPV |
FLEXCAN_MCR_WRN_EN | FLEXCAN_MCR_SRX_DIS | FLEXCAN_MCR_IRMQ |
@@ -927,7 +945,7 @@ static int flexcan_chip_start(struct net_device *dev)
FLEXCAN_MCR_MAXMB(priv->tx_mb_idx);
}
netdev_dbg(dev, "%s: writing mcr=0x%08x", __func__, reg_mcr);
- flexcan_write(reg_mcr, &regs->mcr);
+ priv->write(reg_mcr, &regs->mcr);
/* CTRL
*
@@ -940,7 +958,7 @@ static int flexcan_chip_start(struct net_device *dev)
* enable bus off interrupt
* (== FLEXCAN_CTRL_ERR_STATE)
*/
- reg_ctrl = flexcan_read(&regs->ctrl);
+ reg_ctrl = priv->read(&regs->ctrl);
reg_ctrl &= ~FLEXCAN_CTRL_TSYN;
reg_ctrl |= FLEXCAN_CTRL_BOFF_REC | FLEXCAN_CTRL_LBUF |
FLEXCAN_CTRL_ERR_STATE;
@@ -960,45 +978,45 @@ static int flexcan_chip_start(struct net_device *dev)
/* leave interrupts disabled for now */
reg_ctrl &= ~FLEXCAN_CTRL_ERR_ALL;
netdev_dbg(dev, "%s: writing ctrl=0x%08x", __func__, reg_ctrl);
- flexcan_write(reg_ctrl, &regs->ctrl);
+ priv->write(reg_ctrl, &regs->ctrl);
if ((priv->devtype_data->quirks & FLEXCAN_QUIRK_ENABLE_EACEN_RRS)) {
- reg_ctrl2 = flexcan_read(&regs->ctrl2);
+ reg_ctrl2 = priv->read(&regs->ctrl2);
reg_ctrl2 |= FLEXCAN_CTRL2_EACEN | FLEXCAN_CTRL2_RRS;
- flexcan_write(reg_ctrl2, &regs->ctrl2);
+ priv->write(reg_ctrl2, &regs->ctrl2);
}
/* clear and invalidate all mailboxes first */
for (i = priv->tx_mb_idx; i < ARRAY_SIZE(regs->mb); i++) {
- flexcan_write(FLEXCAN_MB_CODE_RX_INACTIVE,
- &regs->mb[i].can_ctrl);
+ priv->write(FLEXCAN_MB_CODE_RX_INACTIVE,
+ &regs->mb[i].can_ctrl);
}
if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) {
for (i = priv->offload.mb_first; i <= priv->offload.mb_last; i++)
- flexcan_write(FLEXCAN_MB_CODE_RX_EMPTY,
- &regs->mb[i].can_ctrl);
+ priv->write(FLEXCAN_MB_CODE_RX_EMPTY,
+ &regs->mb[i].can_ctrl);
}
/* Errata ERR005829: mark first TX mailbox as INACTIVE */
- flexcan_write(FLEXCAN_MB_CODE_TX_INACTIVE,
- &priv->tx_mb_reserved->can_ctrl);
+ priv->write(FLEXCAN_MB_CODE_TX_INACTIVE,
+ &priv->tx_mb_reserved->can_ctrl);
/* mark TX mailbox as INACTIVE */
- flexcan_write(FLEXCAN_MB_CODE_TX_INACTIVE,
- &priv->tx_mb->can_ctrl);
+ priv->write(FLEXCAN_MB_CODE_TX_INACTIVE,
+ &priv->tx_mb->can_ctrl);
/* acceptance mask/acceptance code (accept everything) */
- flexcan_write(0x0, &regs->rxgmask);
- flexcan_write(0x0, &regs->rx14mask);
- flexcan_write(0x0, &regs->rx15mask);
+ priv->write(0x0, &regs->rxgmask);
+ priv->write(0x0, &regs->rx14mask);
+ priv->write(0x0, &regs->rx15mask);
if (priv->devtype_data->quirks & FLEXCAN_QUIRK_DISABLE_RXFG)
- flexcan_write(0x0, &regs->rxfgmask);
+ priv->write(0x0, &regs->rxfgmask);
/* clear acceptance filters */
for (i = 0; i < ARRAY_SIZE(regs->mb); i++)
- flexcan_write(0, &regs->rximr[i]);
+ priv->write(0, &regs->rximr[i]);
/* On Vybrid, disable memory error detection interrupts
* and freeze mode.
@@ -1011,16 +1029,16 @@ static int flexcan_chip_start(struct net_device *dev)
* and Correction of Memory Errors" to write to
* MECR register
*/
- reg_ctrl2 = flexcan_read(&regs->ctrl2);
+ reg_ctrl2 = priv->read(&regs->ctrl2);
reg_ctrl2 |= FLEXCAN_CTRL2_ECRWRE;
- flexcan_write(reg_ctrl2, &regs->ctrl2);
+ priv->write(reg_ctrl2, &regs->ctrl2);
- reg_mecr = flexcan_read(&regs->mecr);
+ reg_mecr = priv->read(&regs->mecr);
reg_mecr &= ~FLEXCAN_MECR_ECRWRDIS;
- flexcan_write(reg_mecr, &regs->mecr);
+ priv->write(reg_mecr, &regs->mecr);
reg_mecr &= ~(FLEXCAN_MECR_NCEFAFRZ | FLEXCAN_MECR_HANCEI_MSK |
FLEXCAN_MECR_FANCEI_MSK);
- flexcan_write(reg_mecr, &regs->mecr);
+ priv->write(reg_mecr, &regs->mecr);
}
err = flexcan_transceiver_enable(priv);
@@ -1036,14 +1054,14 @@ static int flexcan_chip_start(struct net_device *dev)
/* enable interrupts atomically */
disable_irq(dev->irq);
- flexcan_write(priv->reg_ctrl_default, &regs->ctrl);
- flexcan_write(priv->reg_imask1_default, &regs->imask1);
- flexcan_write(priv->reg_imask2_default, &regs->imask2);
+ priv->write(priv->reg_ctrl_default, &regs->ctrl);
+ priv->write(priv->reg_imask1_default, &regs->imask1);
+ priv->write(priv->reg_imask2_default, &regs->imask2);
enable_irq(dev->irq);
/* print chip status */
netdev_dbg(dev, "%s: reading mcr=0x%08x ctrl=0x%08x\n", __func__,
- flexcan_read(&regs->mcr), flexcan_read(&regs->ctrl));
+ priv->read(&regs->mcr), priv->read(&regs->ctrl));
return 0;
@@ -1068,10 +1086,10 @@ static void flexcan_chip_stop(struct net_device *dev)
flexcan_chip_disable(priv);
/* Disable all interrupts */
- flexcan_write(0, &regs->imask2);
- flexcan_write(0, &regs->imask1);
- flexcan_write(priv->reg_ctrl_default & ~FLEXCAN_CTRL_ERR_ALL,
- &regs->ctrl);
+ priv->write(0, &regs->imask2);
+ priv->write(0, &regs->imask1);
+ priv->write(priv->reg_ctrl_default & ~FLEXCAN_CTRL_ERR_ALL,
+ &regs->ctrl);
flexcan_transceiver_disable(priv);
priv->can.state = CAN_STATE_STOPPED;
@@ -1186,26 +1204,26 @@ static int register_flexcandev(struct net_device *dev)
err = flexcan_chip_disable(priv);
if (err)
goto out_disable_per;
- reg = flexcan_read(&regs->ctrl);
+ reg = priv->read(&regs->ctrl);
reg |= FLEXCAN_CTRL_CLK_SRC;
- flexcan_write(reg, &regs->ctrl);
+ priv->write(reg, &regs->ctrl);
err = flexcan_chip_enable(priv);
if (err)
goto out_chip_disable;
/* set freeze, halt and activate FIFO, restrict register access */
- reg = flexcan_read(&regs->mcr);
+ reg = priv->read(&regs->mcr);
reg |= FLEXCAN_MCR_FRZ | FLEXCAN_MCR_HALT |
FLEXCAN_MCR_FEN | FLEXCAN_MCR_SUPV;
- flexcan_write(reg, &regs->mcr);
+ priv->write(reg, &regs->mcr);
/* Currently we only support newer versions of this core
* featuring a RX hardware FIFO (although this driver doesn't
* make use of it on some cores). Older cores, found on some
* Coldfire derivates are not tested.
*/
- reg = flexcan_read(&regs->mcr);
+ reg = priv->read(&regs->mcr);
if (!(reg & FLEXCAN_MCR_FEN)) {
netdev_err(dev, "Could not enable RX FIFO, unsupported core\n");
err = -ENODEV;
@@ -1233,8 +1251,12 @@ static void unregister_flexcandev(struct net_device *dev)
static const struct of_device_id flexcan_of_match[] = {
{ .compatible = "fsl,imx6q-flexcan", .data = &fsl_imx6q_devtype_data, },
{ .compatible = "fsl,imx28-flexcan", .data = &fsl_imx28_devtype_data, },
+ { .compatible = "fsl,imx53-flexcan", .data = &fsl_p1010_devtype_data, },
+ { .compatible = "fsl,imx35-flexcan", .data = &fsl_p1010_devtype_data, },
+ { .compatible = "fsl,imx25-flexcan", .data = &fsl_p1010_devtype_data, },
{ .compatible = "fsl,p1010-flexcan", .data = &fsl_p1010_devtype_data, },
{ .compatible = "fsl,vf610-flexcan", .data = &fsl_vf610_devtype_data, },
+ { .compatible = "fsl,ls1021ar2-flexcan", .data = &fsl_ls1021a_r2_devtype_data, },
{ /* sentinel */ },
};
MODULE_DEVICE_TABLE(of, flexcan_of_match);
@@ -1314,6 +1336,21 @@ static int flexcan_probe(struct platform_device *pdev)
dev->flags |= IFF_ECHO;
priv = netdev_priv(dev);
+
+ if (of_property_read_bool(pdev->dev.of_node, "big-endian")) {
+ priv->read = flexcan_read_be;
+ priv->write = flexcan_write_be;
+ } else {
+ if (of_device_is_compatible(pdev->dev.of_node,
+ "fsl,p1010-flexcan")) {
+ priv->read = flexcan_read_be;
+ priv->write = flexcan_write_be;
+ } else {
+ priv->read = flexcan_read_le;
+ priv->write = flexcan_write_le;
+ }
+ }
+
priv->can.clock.freq = clock_freq;
priv->can.bittiming_const = &flexcan_bittiming_const;
priv->can.do_set_mode = flexcan_set_mode;
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c
index 25a9b79cc42d..f530a80f5051 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb.c
@@ -408,7 +408,6 @@ static int pcan_usb_decode_error(struct pcan_usb_msg_context *mc, u8 n,
{
struct sk_buff *skb;
struct can_frame *cf;
- struct timeval tv;
enum can_state new_state;
/* ignore this error until 1st ts received */
@@ -525,8 +524,8 @@ static int pcan_usb_decode_error(struct pcan_usb_msg_context *mc, u8 n,
if (status_len & PCAN_USB_STATUSLEN_TIMESTAMP) {
struct skb_shared_hwtstamps *hwts = skb_hwtstamps(skb);
- peak_usb_get_ts_tv(&mc->pdev->time_ref, mc->ts16, &tv);
- hwts->hwtstamp = timeval_to_ktime(tv);
+ peak_usb_get_ts_time(&mc->pdev->time_ref, mc->ts16,
+ &hwts->hwtstamp);
}
mc->netdev->stats.rx_packets++;
@@ -610,7 +609,6 @@ static int pcan_usb_decode_data(struct pcan_usb_msg_context *mc, u8 status_len)
u8 rec_len = status_len & PCAN_USB_STATUSLEN_DLC;
struct sk_buff *skb;
struct can_frame *cf;
- struct timeval tv;
struct skb_shared_hwtstamps *hwts;
skb = alloc_can_skb(mc->netdev, &cf);
@@ -658,9 +656,8 @@ static int pcan_usb_decode_data(struct pcan_usb_msg_context *mc, u8 status_len)
}
/* convert timestamp into kernel time */
- peak_usb_get_ts_tv(&mc->pdev->time_ref, mc->ts16, &tv);
hwts = skb_hwtstamps(skb);
- hwts->hwtstamp = timeval_to_ktime(tv);
+ peak_usb_get_ts_time(&mc->pdev->time_ref, mc->ts16, &hwts->hwtstamp);
/* update statistics */
mc->netdev->stats.rx_packets++;
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
index 1ca76e03e965..8f699ee6a528 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
@@ -80,21 +80,6 @@ void peak_usb_init_time_ref(struct peak_time_ref *time_ref,
}
}
-static void peak_usb_add_us(struct timeval *tv, u32 delta_us)
-{
- /* number of s. to add to final time */
- u32 delta_s = delta_us / 1000000;
-
- delta_us -= delta_s * 1000000;
-
- tv->tv_usec += delta_us;
- if (tv->tv_usec >= 1000000) {
- tv->tv_usec -= 1000000;
- delta_s++;
- }
- tv->tv_sec += delta_s;
-}
-
/*
* sometimes, another now may be more recent than current one...
*/
@@ -103,7 +88,7 @@ void peak_usb_update_ts_now(struct peak_time_ref *time_ref, u32 ts_now)
time_ref->ts_dev_2 = ts_now;
/* should wait at least two passes before computing */
- if (time_ref->tv_host.tv_sec > 0) {
+ if (ktime_to_ns(time_ref->tv_host) > 0) {
u32 delta_ts = time_ref->ts_dev_2 - time_ref->ts_dev_1;
if (time_ref->ts_dev_2 < time_ref->ts_dev_1)
@@ -118,26 +103,26 @@ void peak_usb_update_ts_now(struct peak_time_ref *time_ref, u32 ts_now)
*/
void peak_usb_set_ts_now(struct peak_time_ref *time_ref, u32 ts_now)
{
- if (time_ref->tv_host_0.tv_sec == 0) {
+ if (ktime_to_ns(time_ref->tv_host_0) == 0) {
/* use monotonic clock to correctly compute further deltas */
- time_ref->tv_host_0 = ktime_to_timeval(ktime_get());
- time_ref->tv_host.tv_sec = 0;
+ time_ref->tv_host_0 = ktime_get();
+ time_ref->tv_host = ktime_set(0, 0);
} else {
/*
- * delta_us should not be >= 2^32 => delta_s should be < 4294
+ * delta_us should not be >= 2^32 => delta should be < 4294s
* handle 32-bits wrapping here: if count of s. reaches 4200,
* reset counters and change time base
*/
- if (time_ref->tv_host.tv_sec != 0) {
- u32 delta_s = time_ref->tv_host.tv_sec
- - time_ref->tv_host_0.tv_sec;
- if (delta_s > 4200) {
+ if (ktime_to_ns(time_ref->tv_host)) {
+ ktime_t delta = ktime_sub(time_ref->tv_host,
+ time_ref->tv_host_0);
+ if (ktime_to_ns(delta) > (4200ull * NSEC_PER_SEC)) {
time_ref->tv_host_0 = time_ref->tv_host;
time_ref->ts_total = 0;
}
}
- time_ref->tv_host = ktime_to_timeval(ktime_get());
+ time_ref->tv_host = ktime_get();
time_ref->tick_count++;
}
@@ -146,13 +131,12 @@ void peak_usb_set_ts_now(struct peak_time_ref *time_ref, u32 ts_now)
}
/*
- * compute timeval according to current ts and time_ref data
+ * compute time according to current ts and time_ref data
*/
-void peak_usb_get_ts_tv(struct peak_time_ref *time_ref, u32 ts,
- struct timeval *tv)
+void peak_usb_get_ts_time(struct peak_time_ref *time_ref, u32 ts, ktime_t *time)
{
- /* protect from getting timeval before setting now */
- if (time_ref->tv_host.tv_sec > 0) {
+ /* protect from getting time before setting now */
+ if (ktime_to_ns(time_ref->tv_host)) {
u64 delta_us;
delta_us = ts - time_ref->ts_dev_2;
@@ -164,10 +148,9 @@ void peak_usb_get_ts_tv(struct peak_time_ref *time_ref, u32 ts,
delta_us *= time_ref->adapter->us_per_ts_scale;
delta_us >>= time_ref->adapter->us_per_ts_shift;
- *tv = time_ref->tv_host_0;
- peak_usb_add_us(tv, (u32)delta_us);
+ *time = ktime_add_us(time_ref->tv_host_0, delta_us);
} else {
- *tv = ktime_to_timeval(ktime_get());
+ *time = ktime_get();
}
}
@@ -178,10 +161,8 @@ int peak_usb_netif_rx(struct sk_buff *skb,
struct peak_time_ref *time_ref, u32 ts_low, u32 ts_high)
{
struct skb_shared_hwtstamps *hwts = skb_hwtstamps(skb);
- struct timeval tv;
- peak_usb_get_ts_tv(time_ref, ts_low, &tv);
- hwts->hwtstamp = timeval_to_ktime(tv);
+ peak_usb_get_ts_time(time_ref, ts_low, &hwts->hwtstamp);
return netif_rx(skb);
}
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.h b/drivers/net/can/usb/peak_usb/pcan_usb_core.h
index c01316cac354..29f03dccca10 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.h
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.h
@@ -96,7 +96,7 @@ extern const struct peak_usb_adapter pcan_usb_pro_fd;
extern const struct peak_usb_adapter pcan_usb_x6;
struct peak_time_ref {
- struct timeval tv_host_0, tv_host;
+ ktime_t tv_host_0, tv_host;
u32 ts_dev_1, ts_dev_2;
u64 ts_total;
u32 tick_count;
@@ -151,8 +151,7 @@ void peak_usb_init_time_ref(struct peak_time_ref *time_ref,
const struct peak_usb_adapter *adapter);
void peak_usb_update_ts_now(struct peak_time_ref *time_ref, u32 ts_now);
void peak_usb_set_ts_now(struct peak_time_ref *time_ref, u32 ts_now);
-void peak_usb_get_ts_tv(struct peak_time_ref *time_ref, u32 ts,
- struct timeval *tv);
+void peak_usb_get_ts_time(struct peak_time_ref *time_ref, u32 ts, ktime_t *tv);
int peak_usb_netif_rx(struct sk_buff *skb,
struct peak_time_ref *time_ref, u32 ts_low, u32 ts_high);
void peak_usb_async_complete(struct urb *urb);
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c
index bbdd6058cd2f..0105fbfea273 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c
@@ -531,7 +531,6 @@ static int pcan_usb_pro_handle_canmsg(struct pcan_usb_pro_interface *usb_if,
struct net_device *netdev = dev->netdev;
struct can_frame *can_frame;
struct sk_buff *skb;
- struct timeval tv;
struct skb_shared_hwtstamps *hwts;
skb = alloc_can_skb(netdev, &can_frame);
@@ -549,9 +548,9 @@ static int pcan_usb_pro_handle_canmsg(struct pcan_usb_pro_interface *usb_if,
else
memcpy(can_frame->data, rx->data, can_frame->can_dlc);
- peak_usb_get_ts_tv(&usb_if->time_ref, le32_to_cpu(rx->ts32), &tv);
hwts = skb_hwtstamps(skb);
- hwts->hwtstamp = timeval_to_ktime(tv);
+ peak_usb_get_ts_time(&usb_if->time_ref, le32_to_cpu(rx->ts32),
+ &hwts->hwtstamp);
netdev->stats.rx_packets++;
netdev->stats.rx_bytes += can_frame->can_dlc;
@@ -571,7 +570,6 @@ static int pcan_usb_pro_handle_error(struct pcan_usb_pro_interface *usb_if,
enum can_state new_state = CAN_STATE_ERROR_ACTIVE;
u8 err_mask = 0;
struct sk_buff *skb;
- struct timeval tv;
struct skb_shared_hwtstamps *hwts;
/* nothing should be sent while in BUS_OFF state */
@@ -667,9 +665,8 @@ static int pcan_usb_pro_handle_error(struct pcan_usb_pro_interface *usb_if,
dev->can.state = new_state;
- peak_usb_get_ts_tv(&usb_if->time_ref, le32_to_cpu(er->ts32), &tv);
hwts = skb_hwtstamps(skb);
- hwts->hwtstamp = timeval_to_ktime(tv);
+ peak_usb_get_ts_time(&usb_if->time_ref, le32_to_cpu(er->ts32), &hwts->hwtstamp);
netdev->stats.rx_packets++;
netdev->stats.rx_bytes += can_frame->can_dlc;
netif_rx(skb);
diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c
index 8404e8852a0f..5d1753cfacea 100644
--- a/drivers/net/can/vxcan.c
+++ b/drivers/net/can/vxcan.c
@@ -227,10 +227,8 @@ static int vxcan_newlink(struct net *net, struct net_device *dev,
netif_carrier_off(peer);
err = rtnl_configure_link(peer, ifmp);
- if (err < 0) {
- unregister_netdevice(peer);
- return err;
- }
+ if (err < 0)
+ goto unregister_network_device;
/* register first device */
if (tb[IFLA_IFNAME])
@@ -239,10 +237,8 @@ static int vxcan_newlink(struct net *net, struct net_device *dev,
snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d");
err = register_netdevice(dev);
- if (err < 0) {
- unregister_netdevice(peer);
- return err;
- }
+ if (err < 0)
+ goto unregister_network_device;
netif_carrier_off(dev);
@@ -254,6 +250,10 @@ static int vxcan_newlink(struct net *net, struct net_device *dev,
rcu_assign_pointer(priv->peer, dev);
return 0;
+
+unregister_network_device:
+ unregister_netdevice(peer);
+ return err;
}
static void vxcan_dellink(struct net_device *dev, struct list_head *head)
diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig
index 83a9bc892a3b..2b81b97e994f 100644
--- a/drivers/net/dsa/Kconfig
+++ b/drivers/net/dsa/Kconfig
@@ -33,7 +33,7 @@ config NET_DSA_MT7530
config NET_DSA_MV88E6060
tristate "Marvell 88E6060 ethernet switch chip support"
- depends on NET_DSA
+ depends on NET_DSA && NET_DSA_LEGACY
select NET_DSA_TAG_TRAILER
---help---
This enables support for the Marvell 88E6060 ethernet switch
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index f5a8dd96fd75..561b05089cb6 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1029,8 +1029,7 @@ int b53_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering)
EXPORT_SYMBOL(b53_vlan_filtering);
int b53_vlan_prepare(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_vlan *vlan,
- struct switchdev_trans *trans)
+ const struct switchdev_obj_port_vlan *vlan)
{
struct b53_device *dev = ds->priv;
@@ -1047,8 +1046,7 @@ int b53_vlan_prepare(struct dsa_switch *ds, int port,
EXPORT_SYMBOL(b53_vlan_prepare);
void b53_vlan_add(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_vlan *vlan,
- struct switchdev_trans *trans)
+ const struct switchdev_obj_port_vlan *vlan)
{
struct b53_device *dev = ds->priv;
bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
@@ -1495,8 +1493,7 @@ static bool b53_can_enable_brcm_tags(struct dsa_switch *ds, int port)
return false;
}
-static enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds,
- int port)
+enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds, int port)
{
struct b53_device *dev = ds->priv;
@@ -1514,6 +1511,7 @@ static enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds,
return DSA_TAG_PROTO_BRCM;
}
+EXPORT_SYMBOL(b53_get_tag_protocol);
int b53_mirror_add(struct dsa_switch *ds, int port,
struct dsa_mall_mirror_tc_entry *mirror, bool ingress)
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index daaaa1ecb996..d954cf36ecd8 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -295,11 +295,9 @@ void b53_br_set_stp_state(struct dsa_switch *ds, int port, u8 state);
void b53_br_fast_age(struct dsa_switch *ds, int port);
int b53_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering);
int b53_vlan_prepare(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_vlan *vlan,
- struct switchdev_trans *trans);
+ const struct switchdev_obj_port_vlan *vlan);
void b53_vlan_add(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_vlan *vlan,
- struct switchdev_trans *trans);
+ const struct switchdev_obj_port_vlan *vlan);
int b53_vlan_del(struct dsa_switch *ds, int port,
const struct switchdev_obj_port_vlan *vlan);
int b53_fdb_add(struct dsa_switch *ds, int port,
@@ -310,6 +308,7 @@ int b53_fdb_dump(struct dsa_switch *ds, int port,
dsa_fdb_dump_cb_t *cb, void *data);
int b53_mirror_add(struct dsa_switch *ds, int port,
struct dsa_mall_mirror_tc_entry *mirror, bool ingress);
+enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds, int port);
void b53_mirror_del(struct dsa_switch *ds, int port,
struct dsa_mall_mirror_tc_entry *mirror);
int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy);
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index b62d47210db8..0378eded31f2 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -34,12 +34,6 @@
#include "b53/b53_priv.h"
#include "b53/b53_regs.h"
-static enum dsa_tag_protocol bcm_sf2_sw_get_tag_protocol(struct dsa_switch *ds,
- int port)
-{
- return DSA_TAG_PROTO_BRCM;
-}
-
static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port)
{
struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
@@ -860,7 +854,7 @@ static const struct b53_io_ops bcm_sf2_io_ops = {
};
static const struct dsa_switch_ops bcm_sf2_ops = {
- .get_tag_protocol = bcm_sf2_sw_get_tag_protocol,
+ .get_tag_protocol = b53_get_tag_protocol,
.setup = bcm_sf2_sw_setup,
.get_strings = b53_get_strings,
.get_ethtool_stats = b53_get_ethtool_stats,
@@ -954,6 +948,9 @@ static const struct of_device_id bcm_sf2_of_match[] = {
{ .compatible = "brcm,bcm7278-switch-v4.0",
.data = &bcm_sf2_7278_data
},
+ { .compatible = "brcm,bcm7278-switch-v4.8",
+ .data = &bcm_sf2_7278_data
+ },
{ /* sentinel */ },
};
MODULE_DEVICE_TABLE(of, bcm_sf2_of_match);
diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c
index bb71d3d6f65b..7aa84ee4e771 100644
--- a/drivers/net/dsa/dsa_loop.c
+++ b/drivers/net/dsa/dsa_loop.c
@@ -174,9 +174,9 @@ static int dsa_loop_port_vlan_filtering(struct dsa_switch *ds, int port,
return 0;
}
-static int dsa_loop_port_vlan_prepare(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_vlan *vlan,
- struct switchdev_trans *trans)
+static int
+dsa_loop_port_vlan_prepare(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_vlan *vlan)
{
struct dsa_loop_priv *ps = ds->priv;
struct mii_bus *bus = ps->bus;
@@ -193,8 +193,7 @@ static int dsa_loop_port_vlan_prepare(struct dsa_switch *ds, int port,
}
static void dsa_loop_port_vlan_add(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_vlan *vlan,
- struct switchdev_trans *trans)
+ const struct switchdev_obj_port_vlan *vlan)
{
bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID;
diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c
index b24566bb74d2..f412aad58253 100644
--- a/drivers/net/dsa/lan9303-core.c
+++ b/drivers/net/dsa/lan9303-core.c
@@ -249,6 +249,29 @@ static int lan9303_read(struct regmap *regmap, unsigned int offset, u32 *reg)
return -EIO;
}
+/* Wait a while until mask & reg == value. Otherwise return timeout. */
+static int lan9303_read_wait(struct lan9303 *chip, int offset, u32 mask)
+{
+ int i;
+
+ for (i = 0; i < 25; i++) {
+ u32 reg;
+ int ret;
+
+ ret = lan9303_read(chip->regmap, offset, &reg);
+ if (ret) {
+ dev_err(chip->dev, "%s failed to read offset %d: %d\n",
+ __func__, offset, ret);
+ return ret;
+ }
+ if (!(reg & mask))
+ return 0;
+ usleep_range(1000, 2000);
+ }
+
+ return -ETIMEDOUT;
+}
+
static int lan9303_virt_phy_reg_read(struct lan9303 *chip, int regnum)
{
int ret;
@@ -274,22 +297,8 @@ static int lan9303_virt_phy_reg_write(struct lan9303 *chip, int regnum, u16 val)
static int lan9303_indirect_phy_wait_for_completion(struct lan9303 *chip)
{
- int ret, i;
- u32 reg;
-
- for (i = 0; i < 25; i++) {
- ret = lan9303_read(chip->regmap, LAN9303_PMI_ACCESS, &reg);
- if (ret) {
- dev_err(chip->dev,
- "Failed to read pmi access status: %d\n", ret);
- return ret;
- }
- if (!(reg & LAN9303_PMI_ACCESS_MII_BUSY))
- return 0;
- usleep_range(1000, 2000);
- }
-
- return -EIO;
+ return lan9303_read_wait(chip, LAN9303_PMI_ACCESS,
+ LAN9303_PMI_ACCESS_MII_BUSY);
}
static int lan9303_indirect_phy_read(struct lan9303 *chip, int addr, int regnum)
@@ -366,22 +375,8 @@ EXPORT_SYMBOL_GPL(lan9303_indirect_phy_ops);
static int lan9303_switch_wait_for_completion(struct lan9303 *chip)
{
- int ret, i;
- u32 reg;
-
- for (i = 0; i < 25; i++) {
- ret = lan9303_read(chip->regmap, LAN9303_SWITCH_CSR_CMD, &reg);
- if (ret) {
- dev_err(chip->dev,
- "Failed to read csr command status: %d\n", ret);
- return ret;
- }
- if (!(reg & LAN9303_SWITCH_CSR_CMD_BUSY))
- return 0;
- usleep_range(1000, 2000);
- }
-
- return -EIO;
+ return lan9303_read_wait(chip, LAN9303_SWITCH_CSR_CMD,
+ LAN9303_SWITCH_CSR_CMD_BUSY);
}
static int lan9303_write_switch_reg(struct lan9303 *chip, u16 regnum, u32 val)
@@ -583,6 +578,7 @@ static void lan9303_alr_loop(struct lan9303 *chip, alr_loop_cb_t *cb, void *ctx)
{
int i;
+ mutex_lock(&chip->alr_mutex);
lan9303_write_switch_reg(chip, LAN9303_SWE_ALR_CMD,
LAN9303_ALR_CMD_GET_FIRST);
lan9303_write_switch_reg(chip, LAN9303_SWE_ALR_CMD, 0);
@@ -606,6 +602,7 @@ static void lan9303_alr_loop(struct lan9303 *chip, alr_loop_cb_t *cb, void *ctx)
LAN9303_ALR_CMD_GET_NEXT);
lan9303_write_switch_reg(chip, LAN9303_SWE_ALR_CMD, 0);
}
+ mutex_unlock(&chip->alr_mutex);
}
static void alr_reg_to_mac(u32 dat0, u32 dat1, u8 mac[6])
@@ -694,16 +691,20 @@ static int lan9303_alr_add_port(struct lan9303 *chip, const u8 *mac, int port,
{
struct lan9303_alr_cache_entry *entr;
+ mutex_lock(&chip->alr_mutex);
entr = lan9303_alr_cache_find_mac(chip, mac);
if (!entr) { /*New entry */
entr = lan9303_alr_cache_find_free(chip);
- if (!entr)
+ if (!entr) {
+ mutex_unlock(&chip->alr_mutex);
return -ENOSPC;
+ }
ether_addr_copy(entr->mac_addr, mac);
}
entr->port_map |= BIT(port);
entr->stp_override = stp_override;
lan9303_alr_set_entry(chip, mac, entr->port_map, stp_override);
+ mutex_unlock(&chip->alr_mutex);
return 0;
}
@@ -713,15 +714,18 @@ static int lan9303_alr_del_port(struct lan9303 *chip, const u8 *mac, int port)
{
struct lan9303_alr_cache_entry *entr;
+ mutex_lock(&chip->alr_mutex);
entr = lan9303_alr_cache_find_mac(chip, mac);
if (!entr)
- return 0; /* no static entry found */
+ goto out; /* no static entry found */
entr->port_map &= ~BIT(port);
if (entr->port_map == 0) /* zero means its free again */
eth_zero_addr(entr->mac_addr);
lan9303_alr_set_entry(chip, mac, entr->port_map, entr->stp_override);
+out:
+ mutex_unlock(&chip->alr_mutex);
return 0;
}
@@ -1217,8 +1221,7 @@ static int lan9303_port_fdb_dump(struct dsa_switch *ds, int port,
}
static int lan9303_port_mdb_prepare(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_mdb *mdb,
- struct switchdev_trans *trans)
+ const struct switchdev_obj_port_mdb *mdb)
{
struct lan9303 *chip = ds->priv;
@@ -1235,8 +1238,7 @@ static int lan9303_port_mdb_prepare(struct dsa_switch *ds, int port,
}
static void lan9303_port_mdb_add(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_mdb *mdb,
- struct switchdev_trans *trans)
+ const struct switchdev_obj_port_mdb *mdb)
{
struct lan9303 *chip = ds->priv;
@@ -1325,6 +1327,7 @@ int lan9303_probe(struct lan9303 *chip, struct device_node *np)
int ret;
mutex_init(&chip->indirect_mutex);
+ mutex_init(&chip->alr_mutex);
lan9303_probe_reset_gpio(chip, np);
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index b5be93a1e0df..663b0d5b982b 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -559,8 +559,7 @@ static int ksz_port_vlan_filtering(struct dsa_switch *ds, int port, bool flag)
}
static int ksz_port_vlan_prepare(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_vlan *vlan,
- struct switchdev_trans *trans)
+ const struct switchdev_obj_port_vlan *vlan)
{
/* nothing needed */
@@ -568,8 +567,7 @@ static int ksz_port_vlan_prepare(struct dsa_switch *ds, int port,
}
static void ksz_port_vlan_add(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_vlan *vlan,
- struct switchdev_trans *trans)
+ const struct switchdev_obj_port_vlan *vlan)
{
struct ksz_device *dev = ds->priv;
u32 vlan_table[3];
@@ -858,16 +856,14 @@ exit:
}
static int ksz_port_mdb_prepare(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_mdb *mdb,
- struct switchdev_trans *trans)
+ const struct switchdev_obj_port_mdb *mdb)
{
/* nothing to do */
return 0;
}
static void ksz_port_mdb_add(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_mdb *mdb,
- struct switchdev_trans *trans)
+ const struct switchdev_obj_port_mdb *mdb)
{
struct ksz_device *dev = ds->priv;
u32 static_table[4];
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 2820d69810b3..8a0bb000d056 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -805,6 +805,69 @@ mt7530_port_bridge_join(struct dsa_switch *ds, int port,
}
static void
+mt7530_port_set_vlan_unaware(struct dsa_switch *ds, int port)
+{
+ struct mt7530_priv *priv = ds->priv;
+ bool all_user_ports_removed = true;
+ int i;
+
+ /* When a port is removed from the bridge, the port would be set up
+ * back to the default as is at initial boot which is a VLAN-unaware
+ * port.
+ */
+ mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
+ MT7530_PORT_MATRIX_MODE);
+ mt7530_rmw(priv, MT7530_PVC_P(port), VLAN_ATTR_MASK,
+ VLAN_ATTR(MT7530_VLAN_TRANSPARENT));
+
+ priv->ports[port].vlan_filtering = false;
+
+ for (i = 0; i < MT7530_NUM_PORTS; i++) {
+ if (dsa_is_user_port(ds, i) &&
+ priv->ports[i].vlan_filtering) {
+ all_user_ports_removed = false;
+ break;
+ }
+ }
+
+ /* CPU port also does the same thing until all user ports belonging to
+ * the CPU port get out of VLAN filtering mode.
+ */
+ if (all_user_ports_removed) {
+ mt7530_write(priv, MT7530_PCR_P(MT7530_CPU_PORT),
+ PCR_MATRIX(dsa_user_ports(priv->ds)));
+ mt7530_write(priv, MT7530_PVC_P(MT7530_CPU_PORT),
+ PORT_SPEC_TAG);
+ }
+}
+
+static void
+mt7530_port_set_vlan_aware(struct dsa_switch *ds, int port)
+{
+ struct mt7530_priv *priv = ds->priv;
+
+ /* The real fabric path would be decided on the membership in the
+ * entry of VLAN table. PCR_MATRIX set up here with ALL_MEMBERS
+ * means potential VLAN can be consisting of certain subset of all
+ * ports.
+ */
+ mt7530_rmw(priv, MT7530_PCR_P(port),
+ PCR_MATRIX_MASK, PCR_MATRIX(MT7530_ALL_MEMBERS));
+
+ /* Trapped into security mode allows packet forwarding through VLAN
+ * table lookup.
+ */
+ mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
+ MT7530_PORT_SECURITY_MODE);
+
+ /* Set the port as a user port which is to be able to recognize VID
+ * from incoming packets before fetching entry within the VLAN table.
+ */
+ mt7530_rmw(priv, MT7530_PVC_P(port), VLAN_ATTR_MASK,
+ VLAN_ATTR(MT7530_VLAN_USER));
+}
+
+static void
mt7530_port_bridge_leave(struct dsa_switch *ds, int port,
struct net_device *bridge)
{
@@ -817,8 +880,11 @@ mt7530_port_bridge_leave(struct dsa_switch *ds, int port,
/* Remove this port from the port matrix of the other ports
* in the same bridge. If the port is disabled, port matrix
* is kept and not being setup until the port becomes enabled.
+ * And the other port's port matrix cannot be broken when the
+ * other port is still a VLAN-aware port.
*/
- if (dsa_is_user_port(ds, i) && i != port) {
+ if (!priv->ports[i].vlan_filtering &&
+ dsa_is_user_port(ds, i) && i != port) {
if (dsa_to_port(ds, i)->bridge_dev != bridge)
continue;
if (priv->ports[i].enable)
@@ -836,6 +902,8 @@ mt7530_port_bridge_leave(struct dsa_switch *ds, int port,
PCR_MATRIX(BIT(MT7530_CPU_PORT)));
priv->ports[port].pm = PCR_MATRIX(BIT(MT7530_CPU_PORT));
+ mt7530_port_set_vlan_unaware(ds, port);
+
mutex_unlock(&priv->reg_mutex);
}
@@ -906,6 +974,220 @@ err:
return 0;
}
+static int
+mt7530_vlan_cmd(struct mt7530_priv *priv, enum mt7530_vlan_cmd cmd, u16 vid)
+{
+ struct mt7530_dummy_poll p;
+ u32 val;
+ int ret;
+
+ val = VTCR_BUSY | VTCR_FUNC(cmd) | vid;
+ mt7530_write(priv, MT7530_VTCR, val);
+
+ INIT_MT7530_DUMMY_POLL(&p, priv, MT7530_VTCR);
+ ret = readx_poll_timeout(_mt7530_read, &p, val,
+ !(val & VTCR_BUSY), 20, 20000);
+ if (ret < 0) {
+ dev_err(priv->dev, "poll timeout\n");
+ return ret;
+ }
+
+ val = mt7530_read(priv, MT7530_VTCR);
+ if (val & VTCR_INVALID) {
+ dev_err(priv->dev, "read VTCR invalid\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+mt7530_port_vlan_filtering(struct dsa_switch *ds, int port,
+ bool vlan_filtering)
+{
+ struct mt7530_priv *priv = ds->priv;
+
+ priv->ports[port].vlan_filtering = vlan_filtering;
+
+ if (vlan_filtering) {
+ /* The port is being kept as VLAN-unaware port when bridge is
+ * set up with vlan_filtering not being set, Otherwise, the
+ * port and the corresponding CPU port is required the setup
+ * for becoming a VLAN-aware port.
+ */
+ mt7530_port_set_vlan_aware(ds, port);
+ mt7530_port_set_vlan_aware(ds, MT7530_CPU_PORT);
+ }
+
+ return 0;
+}
+
+static int
+mt7530_port_vlan_prepare(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_vlan *vlan)
+{
+ /* nothing needed */
+
+ return 0;
+}
+
+static void
+mt7530_hw_vlan_add(struct mt7530_priv *priv,
+ struct mt7530_hw_vlan_entry *entry)
+{
+ u8 new_members;
+ u32 val;
+
+ new_members = entry->old_members | BIT(entry->port) |
+ BIT(MT7530_CPU_PORT);
+
+ /* Validate the entry with independent learning, create egress tag per
+ * VLAN and joining the port as one of the port members.
+ */
+ val = IVL_MAC | VTAG_EN | PORT_MEM(new_members) | VLAN_VALID;
+ mt7530_write(priv, MT7530_VAWD1, val);
+
+ /* Decide whether adding tag or not for those outgoing packets from the
+ * port inside the VLAN.
+ */
+ val = entry->untagged ? MT7530_VLAN_EGRESS_UNTAG :
+ MT7530_VLAN_EGRESS_TAG;
+ mt7530_rmw(priv, MT7530_VAWD2,
+ ETAG_CTRL_P_MASK(entry->port),
+ ETAG_CTRL_P(entry->port, val));
+
+ /* CPU port is always taken as a tagged port for serving more than one
+ * VLANs across and also being applied with egress type stack mode for
+ * that VLAN tags would be appended after hardware special tag used as
+ * DSA tag.
+ */
+ mt7530_rmw(priv, MT7530_VAWD2,
+ ETAG_CTRL_P_MASK(MT7530_CPU_PORT),
+ ETAG_CTRL_P(MT7530_CPU_PORT,
+ MT7530_VLAN_EGRESS_STACK));
+}
+
+static void
+mt7530_hw_vlan_del(struct mt7530_priv *priv,
+ struct mt7530_hw_vlan_entry *entry)
+{
+ u8 new_members;
+ u32 val;
+
+ new_members = entry->old_members & ~BIT(entry->port);
+
+ val = mt7530_read(priv, MT7530_VAWD1);
+ if (!(val & VLAN_VALID)) {
+ dev_err(priv->dev,
+ "Cannot be deleted due to invalid entry\n");
+ return;
+ }
+
+ /* If certain member apart from CPU port is still alive in the VLAN,
+ * the entry would be kept valid. Otherwise, the entry is got to be
+ * disabled.
+ */
+ if (new_members && new_members != BIT(MT7530_CPU_PORT)) {
+ val = IVL_MAC | VTAG_EN | PORT_MEM(new_members) |
+ VLAN_VALID;
+ mt7530_write(priv, MT7530_VAWD1, val);
+ } else {
+ mt7530_write(priv, MT7530_VAWD1, 0);
+ mt7530_write(priv, MT7530_VAWD2, 0);
+ }
+}
+
+static void
+mt7530_hw_vlan_update(struct mt7530_priv *priv, u16 vid,
+ struct mt7530_hw_vlan_entry *entry,
+ mt7530_vlan_op vlan_op)
+{
+ u32 val;
+
+ /* Fetch entry */
+ mt7530_vlan_cmd(priv, MT7530_VTCR_RD_VID, vid);
+
+ val = mt7530_read(priv, MT7530_VAWD1);
+
+ entry->old_members = (val >> PORT_MEM_SHFT) & PORT_MEM_MASK;
+
+ /* Manipulate entry */
+ vlan_op(priv, entry);
+
+ /* Flush result to hardware */
+ mt7530_vlan_cmd(priv, MT7530_VTCR_WR_VID, vid);
+}
+
+static void
+mt7530_port_vlan_add(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_vlan *vlan)
+{
+ bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
+ bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID;
+ struct mt7530_hw_vlan_entry new_entry;
+ struct mt7530_priv *priv = ds->priv;
+ u16 vid;
+
+ /* The port is kept as VLAN-unaware if bridge with vlan_filtering not
+ * being set.
+ */
+ if (!priv->ports[port].vlan_filtering)
+ return;
+
+ mutex_lock(&priv->reg_mutex);
+
+ for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) {
+ mt7530_hw_vlan_entry_init(&new_entry, port, untagged);
+ mt7530_hw_vlan_update(priv, vid, &new_entry,
+ mt7530_hw_vlan_add);
+ }
+
+ if (pvid) {
+ mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK,
+ G0_PORT_VID(vlan->vid_end));
+ priv->ports[port].pvid = vlan->vid_end;
+ }
+
+ mutex_unlock(&priv->reg_mutex);
+}
+
+static int
+mt7530_port_vlan_del(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_vlan *vlan)
+{
+ struct mt7530_hw_vlan_entry target_entry;
+ struct mt7530_priv *priv = ds->priv;
+ u16 vid, pvid;
+
+ /* The port is kept as VLAN-unaware if bridge with vlan_filtering not
+ * being set.
+ */
+ if (!priv->ports[port].vlan_filtering)
+ return 0;
+
+ mutex_lock(&priv->reg_mutex);
+
+ pvid = priv->ports[port].pvid;
+ for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) {
+ mt7530_hw_vlan_entry_init(&target_entry, port, 0);
+ mt7530_hw_vlan_update(priv, vid, &target_entry,
+ mt7530_hw_vlan_del);
+
+ /* PVID is being restored to the default whenever the PVID port
+ * is being removed from the VLAN.
+ */
+ if (pvid == vid)
+ pvid = G0_PORT_VID_DEF;
+ }
+
+ mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK, pvid);
+ priv->ports[port].pvid = pvid;
+
+ mutex_unlock(&priv->reg_mutex);
+
+ return 0;
+}
+
static enum dsa_tag_protocol
mtk_get_tag_protocol(struct dsa_switch *ds, int port)
{
@@ -1035,6 +1317,10 @@ static const struct dsa_switch_ops mt7530_switch_ops = {
.port_fdb_add = mt7530_port_fdb_add,
.port_fdb_del = mt7530_port_fdb_del,
.port_fdb_dump = mt7530_port_fdb_dump,
+ .port_vlan_filtering = mt7530_port_vlan_filtering,
+ .port_vlan_prepare = mt7530_port_vlan_prepare,
+ .port_vlan_add = mt7530_port_vlan_add,
+ .port_vlan_del = mt7530_port_vlan_del,
};
static int
diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h
index 74db9822eb40..d9b407a22a58 100644
--- a/drivers/net/dsa/mt7530.h
+++ b/drivers/net/dsa/mt7530.h
@@ -17,6 +17,7 @@
#define MT7530_NUM_PORTS 7
#define MT7530_CPU_PORT 6
#define MT7530_NUM_FDB_RECORDS 2048
+#define MT7530_ALL_MEMBERS 0xff
#define NUM_TRGMII_CTRL 5
@@ -88,21 +89,42 @@ enum mt7530_fdb_cmd {
/* Register for vlan table control */
#define MT7530_VTCR 0x90
#define VTCR_BUSY BIT(31)
-#define VTCR_FUNC (((x) & 0xf) << 12)
-#define VTCR_FUNC_RD_VID 0x1
-#define VTCR_FUNC_WR_VID 0x2
-#define VTCR_FUNC_INV_VID 0x3
-#define VTCR_FUNC_VAL_VID 0x4
+#define VTCR_INVALID BIT(16)
+#define VTCR_FUNC(x) (((x) & 0xf) << 12)
#define VTCR_VID ((x) & 0xfff)
+enum mt7530_vlan_cmd {
+ /* Read/Write the specified VID entry from VAWD register based
+ * on VID.
+ */
+ MT7530_VTCR_RD_VID = 0,
+ MT7530_VTCR_WR_VID = 1,
+};
+
/* Register for setup vlan and acl write data */
#define MT7530_VAWD1 0x94
#define PORT_STAG BIT(31)
+/* Independent VLAN Learning */
#define IVL_MAC BIT(30)
+/* Per VLAN Egress Tag Control */
+#define VTAG_EN BIT(28)
+/* VLAN Member Control */
#define PORT_MEM(x) (((x) & 0xff) << 16)
-#define VALID BIT(1)
+/* VLAN Entry Valid */
+#define VLAN_VALID BIT(0)
+#define PORT_MEM_SHFT 16
+#define PORT_MEM_MASK 0xff
#define MT7530_VAWD2 0x98
+/* Egress Tag Control */
+#define ETAG_CTRL_P(p, x) (((x) & 0x3) << ((p) << 1))
+#define ETAG_CTRL_P_MASK(p) ETAG_CTRL_P(p, 3)
+
+enum mt7530_vlan_egress_attr {
+ MT7530_VLAN_EGRESS_UNTAG = 0,
+ MT7530_VLAN_EGRESS_TAG = 2,
+ MT7530_VLAN_EGRESS_STACK = 3,
+};
/* Register for port STP state control */
#define MT7530_SSP_P(x) (0x2000 + ((x) * 0x100))
@@ -120,11 +142,23 @@ enum mt7530_stp_state {
/* Register for port control */
#define MT7530_PCR_P(x) (0x2004 + ((x) * 0x100))
#define PORT_VLAN(x) ((x) & 0x3)
+
+enum mt7530_port_mode {
+ /* Port Matrix Mode: Frames are forwarded by the PCR_MATRIX members. */
+ MT7530_PORT_MATRIX_MODE = PORT_VLAN(0),
+
+ /* Security Mode: Discard any frame due to ingress membership
+ * violation or VID missed on the VLAN table.
+ */
+ MT7530_PORT_SECURITY_MODE = PORT_VLAN(3),
+};
+
#define PCR_MATRIX(x) (((x) & 0xff) << 16)
#define PORT_PRI(x) (((x) & 0x7) << 24)
#define EG_TAG(x) (((x) & 0x3) << 28)
#define PCR_MATRIX_MASK PCR_MATRIX(0xff)
#define PCR_MATRIX_CLR PCR_MATRIX(0)
+#define PCR_PORT_VLAN_MASK PORT_VLAN(3)
/* Register for port security control */
#define MT7530_PSC_P(x) (0x200c + ((x) * 0x100))
@@ -134,10 +168,20 @@ enum mt7530_stp_state {
#define MT7530_PVC_P(x) (0x2010 + ((x) * 0x100))
#define PORT_SPEC_TAG BIT(5)
#define VLAN_ATTR(x) (((x) & 0x3) << 6)
+#define VLAN_ATTR_MASK VLAN_ATTR(3)
+
+enum mt7530_vlan_port_attr {
+ MT7530_VLAN_USER = 0,
+ MT7530_VLAN_TRANSPARENT = 3,
+};
+
#define STAG_VPID (((x) & 0xffff) << 16)
/* Register for port port-and-protocol based vlan 1 control */
#define MT7530_PPBV1_P(x) (0x2014 + ((x) * 0x100))
+#define G0_PORT_VID(x) (((x) & 0xfff) << 0)
+#define G0_PORT_VID_MASK G0_PORT_VID(0xfff)
+#define G0_PORT_VID_DEF G0_PORT_VID(1)
/* Register for port MAC control register */
#define MT7530_PMCR_P(x) (0x3000 + ((x) * 0x100))
@@ -345,9 +389,20 @@ struct mt7530_fdb {
bool noarp;
};
+/* struct mt7530_port - This is the main data structure for holding the state
+ * of the port.
+ * @enable: The status used for show port is enabled or not.
+ * @pm: The matrix used to show all connections with the port.
+ * @pvid: The VLAN specified is to be considered a PVID at ingress. Any
+ * untagged frames will be assigned to the related VLAN.
+ * @vlan_filtering: The flags indicating whether the port that can recognize
+ * VLAN-tagged frames.
+ */
struct mt7530_port {
bool enable;
u32 pm;
+ u16 pvid;
+ bool vlan_filtering;
};
/* struct mt7530_priv - This is the main data structure for holding the state
@@ -382,6 +437,22 @@ struct mt7530_priv {
struct mutex reg_mutex;
};
+struct mt7530_hw_vlan_entry {
+ int port;
+ u8 old_members;
+ bool untagged;
+};
+
+static inline void mt7530_hw_vlan_entry_init(struct mt7530_hw_vlan_entry *e,
+ int port, bool untagged)
+{
+ e->port = port;
+ e->untagged = untagged;
+}
+
+typedef void (*mt7530_vlan_op)(struct mt7530_priv *,
+ struct mt7530_hw_vlan_entry *);
+
struct mt7530_hw_stats {
const char *string;
u16 reg;
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 66d33e97cbc5..fc512c98f2f8 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -1185,8 +1185,7 @@ static int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port,
static int
mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_vlan *vlan,
- struct switchdev_trans *trans)
+ const struct switchdev_obj_port_vlan *vlan)
{
struct mv88e6xxx_chip *chip = ds->priv;
int err;
@@ -1295,8 +1294,7 @@ static int _mv88e6xxx_port_vlan_add(struct mv88e6xxx_chip *chip, int port,
}
static void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_vlan *vlan,
- struct switchdev_trans *trans)
+ const struct switchdev_obj_port_vlan *vlan)
{
struct mv88e6xxx_chip *chip = ds->priv;
bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
@@ -1725,9 +1723,11 @@ static int mv88e6xxx_setup_message_port(struct mv88e6xxx_chip *chip, int port)
static int mv88e6xxx_setup_egress_floods(struct mv88e6xxx_chip *chip, int port)
{
- bool flood = port == dsa_upstream_port(chip->ds);
+ struct dsa_switch *ds = chip->ds;
+ bool flood;
/* Upstream ports flood frames with unknown unicast or multicast DA */
+ flood = dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port);
if (chip->info->ops->port_set_egress_floods)
return chip->info->ops->port_set_egress_floods(chip, port,
flood, flood);
@@ -1744,6 +1744,39 @@ static int mv88e6xxx_serdes_power(struct mv88e6xxx_chip *chip, int port,
return 0;
}
+static int mv88e6xxx_setup_upstream_port(struct mv88e6xxx_chip *chip, int port)
+{
+ struct dsa_switch *ds = chip->ds;
+ int upstream_port;
+ int err;
+
+ upstream_port = dsa_upstream_port(ds, port);
+ if (chip->info->ops->port_set_upstream_port) {
+ err = chip->info->ops->port_set_upstream_port(chip, port,
+ upstream_port);
+ if (err)
+ return err;
+ }
+
+ if (port == upstream_port) {
+ if (chip->info->ops->set_cpu_port) {
+ err = chip->info->ops->set_cpu_port(chip,
+ upstream_port);
+ if (err)
+ return err;
+ }
+
+ if (chip->info->ops->set_egress_port) {
+ err = chip->info->ops->set_egress_port(chip,
+ upstream_port);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
{
struct dsa_switch *ds = chip->ds;
@@ -1814,13 +1847,9 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
if (err)
return err;
- reg = 0;
- if (chip->info->ops->port_set_upstream_port) {
- err = chip->info->ops->port_set_upstream_port(
- chip, port, dsa_upstream_port(ds));
- if (err)
- return err;
- }
+ err = mv88e6xxx_setup_upstream_port(chip, port);
+ if (err)
+ return err;
err = mv88e6xxx_port_set_8021q_mode(chip, port,
MV88E6XXX_PORT_CTL2_8021Q_MODE_DISABLED);
@@ -1946,21 +1975,8 @@ static int mv88e6xxx_set_ageing_time(struct dsa_switch *ds,
static int mv88e6xxx_g1_setup(struct mv88e6xxx_chip *chip)
{
struct dsa_switch *ds = chip->ds;
- u32 upstream_port = dsa_upstream_port(ds);
int err;
- if (chip->info->ops->set_cpu_port) {
- err = chip->info->ops->set_cpu_port(chip, upstream_port);
- if (err)
- return err;
- }
-
- if (chip->info->ops->set_egress_port) {
- err = chip->info->ops->set_egress_port(chip, upstream_port);
- if (err)
- return err;
- }
-
/* Disable remote management, and set the switch's DSA device number. */
err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_CTL2,
MV88E6XXX_G1_CTL2_MULTIPLE_CASCADE |
@@ -3741,6 +3757,7 @@ static enum dsa_tag_protocol mv88e6xxx_get_tag_protocol(struct dsa_switch *ds,
return chip->info->tag_protocol;
}
+#if IS_ENABLED(CONFIG_NET_DSA_LEGACY)
static const char *mv88e6xxx_drv_probe(struct device *dsa_dev,
struct device *host_dev, int sw_addr,
void **priv)
@@ -3788,10 +3805,10 @@ free:
return NULL;
}
+#endif
static int mv88e6xxx_port_mdb_prepare(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_mdb *mdb,
- struct switchdev_trans *trans)
+ const struct switchdev_obj_port_mdb *mdb)
{
/* We don't need any dynamic resource from the kernel (yet),
* so skip the prepare phase.
@@ -3801,8 +3818,7 @@ static int mv88e6xxx_port_mdb_prepare(struct dsa_switch *ds, int port,
}
static void mv88e6xxx_port_mdb_add(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_mdb *mdb,
- struct switchdev_trans *trans)
+ const struct switchdev_obj_port_mdb *mdb)
{
struct mv88e6xxx_chip *chip = ds->priv;
@@ -3829,7 +3845,9 @@ static int mv88e6xxx_port_mdb_del(struct dsa_switch *ds, int port,
}
static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
+#if IS_ENABLED(CONFIG_NET_DSA_LEGACY)
.probe = mv88e6xxx_drv_probe,
+#endif
.get_tag_protocol = mv88e6xxx_get_tag_protocol,
.setup = mv88e6xxx_setup,
.adjust_link = mv88e6xxx_adjust_link,
diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
index 58483af80bdb..30b1c8512049 100644
--- a/drivers/net/dummy.c
+++ b/drivers/net/dummy.c
@@ -42,48 +42,7 @@
#define DRV_NAME "dummy"
#define DRV_VERSION "1.0"
-#undef pr_fmt
-#define pr_fmt(fmt) DRV_NAME ": " fmt
-
static int numdummies = 1;
-static int num_vfs;
-
-struct vf_data_storage {
- u8 vf_mac[ETH_ALEN];
- u16 pf_vlan; /* When set, guest VLAN config not allowed. */
- u16 pf_qos;
- __be16 vlan_proto;
- u16 min_tx_rate;
- u16 max_tx_rate;
- u8 spoofchk_enabled;
- bool rss_query_enabled;
- u8 trusted;
- int link_state;
-};
-
-struct dummy_priv {
- struct vf_data_storage *vfinfo;
-};
-
-static int dummy_num_vf(struct device *dev)
-{
- return num_vfs;
-}
-
-static struct bus_type dummy_bus = {
- .name = "dummy",
- .num_vf = dummy_num_vf,
-};
-
-static void release_dummy_parent(struct device *dev)
-{
-}
-
-static struct device dummy_parent = {
- .init_name = "dummy",
- .bus = &dummy_bus,
- .release = release_dummy_parent,
-};
/* fake multicast ability */
static void set_multicast_list(struct net_device *dev)
@@ -133,25 +92,10 @@ static netdev_tx_t dummy_xmit(struct sk_buff *skb, struct net_device *dev)
static int dummy_dev_init(struct net_device *dev)
{
- struct dummy_priv *priv = netdev_priv(dev);
-
dev->dstats = netdev_alloc_pcpu_stats(struct pcpu_dstats);
if (!dev->dstats)
return -ENOMEM;
- priv->vfinfo = NULL;
-
- if (!num_vfs)
- return 0;
-
- dev->dev.parent = &dummy_parent;
- priv->vfinfo = kcalloc(num_vfs, sizeof(struct vf_data_storage),
- GFP_KERNEL);
- if (!priv->vfinfo) {
- free_percpu(dev->dstats);
- return -ENOMEM;
- }
-
return 0;
}
@@ -169,117 +113,6 @@ static int dummy_change_carrier(struct net_device *dev, bool new_carrier)
return 0;
}
-static int dummy_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
-{
- struct dummy_priv *priv = netdev_priv(dev);
-
- if (!is_valid_ether_addr(mac) || (vf >= num_vfs))
- return -EINVAL;
-
- memcpy(priv->vfinfo[vf].vf_mac, mac, ETH_ALEN);
-
- return 0;
-}
-
-static int dummy_set_vf_vlan(struct net_device *dev, int vf,
- u16 vlan, u8 qos, __be16 vlan_proto)
-{
- struct dummy_priv *priv = netdev_priv(dev);
-
- if ((vf >= num_vfs) || (vlan > 4095) || (qos > 7))
- return -EINVAL;
-
- priv->vfinfo[vf].pf_vlan = vlan;
- priv->vfinfo[vf].pf_qos = qos;
- priv->vfinfo[vf].vlan_proto = vlan_proto;
-
- return 0;
-}
-
-static int dummy_set_vf_rate(struct net_device *dev, int vf, int min, int max)
-{
- struct dummy_priv *priv = netdev_priv(dev);
-
- if (vf >= num_vfs)
- return -EINVAL;
-
- priv->vfinfo[vf].min_tx_rate = min;
- priv->vfinfo[vf].max_tx_rate = max;
-
- return 0;
-}
-
-static int dummy_set_vf_spoofchk(struct net_device *dev, int vf, bool val)
-{
- struct dummy_priv *priv = netdev_priv(dev);
-
- if (vf >= num_vfs)
- return -EINVAL;
-
- priv->vfinfo[vf].spoofchk_enabled = val;
-
- return 0;
-}
-
-static int dummy_set_vf_rss_query_en(struct net_device *dev, int vf, bool val)
-{
- struct dummy_priv *priv = netdev_priv(dev);
-
- if (vf >= num_vfs)
- return -EINVAL;
-
- priv->vfinfo[vf].rss_query_enabled = val;
-
- return 0;
-}
-
-static int dummy_set_vf_trust(struct net_device *dev, int vf, bool val)
-{
- struct dummy_priv *priv = netdev_priv(dev);
-
- if (vf >= num_vfs)
- return -EINVAL;
-
- priv->vfinfo[vf].trusted = val;
-
- return 0;
-}
-
-static int dummy_get_vf_config(struct net_device *dev,
- int vf, struct ifla_vf_info *ivi)
-{
- struct dummy_priv *priv = netdev_priv(dev);
-
- if (vf >= num_vfs)
- return -EINVAL;
-
- ivi->vf = vf;
- memcpy(&ivi->mac, priv->vfinfo[vf].vf_mac, ETH_ALEN);
- ivi->vlan = priv->vfinfo[vf].pf_vlan;
- ivi->qos = priv->vfinfo[vf].pf_qos;
- ivi->spoofchk = priv->vfinfo[vf].spoofchk_enabled;
- ivi->linkstate = priv->vfinfo[vf].link_state;
- ivi->min_tx_rate = priv->vfinfo[vf].min_tx_rate;
- ivi->max_tx_rate = priv->vfinfo[vf].max_tx_rate;
- ivi->rss_query_en = priv->vfinfo[vf].rss_query_enabled;
- ivi->trusted = priv->vfinfo[vf].trusted;
- ivi->vlan_proto = priv->vfinfo[vf].vlan_proto;
-
- return 0;
-}
-
-static int dummy_set_vf_link_state(struct net_device *dev, int vf, int state)
-{
- struct dummy_priv *priv = netdev_priv(dev);
-
- if (vf >= num_vfs)
- return -EINVAL;
-
- priv->vfinfo[vf].link_state = state;
-
- return 0;
-}
-
static const struct net_device_ops dummy_netdev_ops = {
.ndo_init = dummy_dev_init,
.ndo_uninit = dummy_dev_uninit,
@@ -289,14 +122,6 @@ static const struct net_device_ops dummy_netdev_ops = {
.ndo_set_mac_address = eth_mac_addr,
.ndo_get_stats64 = dummy_get_stats64,
.ndo_change_carrier = dummy_change_carrier,
- .ndo_set_vf_mac = dummy_set_vf_mac,
- .ndo_set_vf_vlan = dummy_set_vf_vlan,
- .ndo_set_vf_rate = dummy_set_vf_rate,
- .ndo_set_vf_spoofchk = dummy_set_vf_spoofchk,
- .ndo_set_vf_trust = dummy_set_vf_trust,
- .ndo_get_vf_config = dummy_get_vf_config,
- .ndo_set_vf_link_state = dummy_set_vf_link_state,
- .ndo_set_vf_rss_query_en = dummy_set_vf_rss_query_en,
};
static void dummy_get_drvinfo(struct net_device *dev,
@@ -323,13 +148,6 @@ static const struct ethtool_ops dummy_ethtool_ops = {
.get_ts_info = dummy_get_ts_info,
};
-static void dummy_free_netdev(struct net_device *dev)
-{
- struct dummy_priv *priv = netdev_priv(dev);
-
- kfree(priv->vfinfo);
-}
-
static void dummy_setup(struct net_device *dev)
{
ether_setup(dev);
@@ -338,7 +156,6 @@ static void dummy_setup(struct net_device *dev)
dev->netdev_ops = &dummy_netdev_ops;
dev->ethtool_ops = &dummy_ethtool_ops;
dev->needs_free_netdev = true;
- dev->priv_destructor = dummy_free_netdev;
/* Fill in device structure with ethernet-generic values. */
dev->flags |= IFF_NOARP;
@@ -370,7 +187,6 @@ static int dummy_validate(struct nlattr *tb[], struct nlattr *data[],
static struct rtnl_link_ops dummy_link_ops __read_mostly = {
.kind = DRV_NAME,
- .priv_size = sizeof(struct dummy_priv),
.setup = dummy_setup,
.validate = dummy_validate,
};
@@ -379,16 +195,12 @@ static struct rtnl_link_ops dummy_link_ops __read_mostly = {
module_param(numdummies, int, 0);
MODULE_PARM_DESC(numdummies, "Number of dummy pseudo devices");
-module_param(num_vfs, int, 0);
-MODULE_PARM_DESC(num_vfs, "Number of dummy VFs per dummy device");
-
static int __init dummy_init_one(void)
{
struct net_device *dev_dummy;
int err;
- dev_dummy = alloc_netdev(sizeof(struct dummy_priv),
- "dummy%d", NET_NAME_ENUM, dummy_setup);
+ dev_dummy = alloc_netdev(0, "dummy%d", NET_NAME_ENUM, dummy_setup);
if (!dev_dummy)
return -ENOMEM;
@@ -407,21 +219,6 @@ static int __init dummy_init_module(void)
{
int i, err = 0;
- if (num_vfs) {
- err = bus_register(&dummy_bus);
- if (err < 0) {
- pr_err("registering dummy bus failed\n");
- return err;
- }
-
- err = device_register(&dummy_parent);
- if (err < 0) {
- pr_err("registering dummy parent device failed\n");
- bus_unregister(&dummy_bus);
- return err;
- }
- }
-
rtnl_lock();
err = __rtnl_link_register(&dummy_link_ops);
if (err < 0)
@@ -437,22 +234,12 @@ static int __init dummy_init_module(void)
out:
rtnl_unlock();
- if (err && num_vfs) {
- device_unregister(&dummy_parent);
- bus_unregister(&dummy_bus);
- }
-
return err;
}
static void __exit dummy_cleanup_module(void)
{
rtnl_link_unregister(&dummy_link_ops);
-
- if (num_vfs) {
- device_unregister(&dummy_parent);
- bus_unregister(&dummy_bus);
- }
}
module_init(dummy_init_module);
diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c
index a1a52eb53b14..8f71b79b4949 100644
--- a/drivers/net/ethernet/alteon/acenic.c
+++ b/drivers/net/ethernet/alteon/acenic.c
@@ -1436,13 +1436,13 @@ static int ace_init(struct net_device *dev)
ace_set_txprd(regs, ap, 0);
writel(0, &regs->RxRetCsm);
- /*
- * Enable DMA engine now.
- * If we do this sooner, Mckinley box pukes.
- * I assume it's because Tigon II DMA engine wants to check
- * *something* even before the CPU is started.
- */
- writel(1, &regs->AssistState); /* enable DMA */
+ /*
+ * Enable DMA engine now.
+ * If we do this sooner, Mckinley box pukes.
+ * I assume it's because Tigon II DMA engine wants to check
+ * *something* even before the CPU is started.
+ */
+ writel(1, &regs->AssistState); /* enable DMA */
/*
* Start the NIC CPU
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 61ca4eb7c6fa..1d865ae201db 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1706,12 +1706,16 @@ static int bnxt_async_event_process(struct bnxt *bp,
if (BNXT_VF(bp))
goto async_event_process_exit;
- if (data1 & 0x20000) {
+
+ /* print unsupported speed warning in forced speed mode only */
+ if (!(link_info->autoneg & BNXT_AUTONEG_SPEED) &&
+ (data1 & 0x20000)) {
u16 fw_speed = link_info->force_link_speed;
u32 speed = bnxt_fw_to_ethtool_speed(fw_speed);
- netdev_warn(bp->dev, "Link speed %d no longer supported\n",
- speed);
+ if (speed != SPEED_UNKNOWN)
+ netdev_warn(bp->dev, "Link speed %d no longer supported\n",
+ speed);
}
set_bit(BNXT_LINK_SPEED_CHNG_SP_EVENT, &bp->sp_event);
/* fall thru */
@@ -7800,8 +7804,6 @@ static void bnxt_remove_one(struct pci_dev *pdev)
bnxt_dcb_free(bp);
kfree(bp->edev);
bp->edev = NULL;
- if (bp->xdp_prog)
- bpf_prog_put(bp->xdp_prog);
bnxt_cleanup_pci(bp);
free_netdev(dev);
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index b13ce5ebde8d..fe7599f404bf 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -1376,6 +1376,9 @@ static int bnxt_firmware_reset(struct net_device *dev,
req.embedded_proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_CHIP;
req.selfrst_status = FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP;
break;
+ case BNXT_FW_RESET_AP:
+ req.embedded_proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_AP;
+ break;
default:
return -EINVAL;
}
@@ -2522,6 +2525,14 @@ static int bnxt_reset(struct net_device *dev, u32 *flags)
rc = bnxt_firmware_reset(dev, BNXT_FW_RESET_CHIP);
if (!rc)
netdev_info(dev, "Reset request successful. Reload driver to complete reset\n");
+ } else if (*flags == ETH_RESET_AP) {
+ /* This feature is not supported in older firmware versions */
+ if (bp->hwrm_spec_code < 0x10803)
+ return -EOPNOTSUPP;
+
+ rc = bnxt_firmware_reset(dev, BNXT_FW_RESET_AP);
+ if (!rc)
+ netdev_info(dev, "Reset Application Processor request successful.\n");
} else {
rc = -EINVAL;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
index ff601b42fcc8..836ef682f24c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
@@ -34,6 +34,7 @@ struct bnxt_led_cfg {
#define BNXT_LED_DFLT_ENABLES(x) \
cpu_to_le32(BNXT_LED_DFLT_ENA << (BNXT_LED_DFLT_ENA_SHIFT * (x)))
+#define BNXT_FW_RESET_AP 0xfffe
#define BNXT_FW_RESET_CHIP 0xffff
extern const struct ethtool_ops bnxt_ethtool_ops;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
index 3d201d7324bd..2ae5ed151369 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -54,12 +54,10 @@ static int bnxt_tc_parse_redir(struct bnxt *bp,
struct bnxt_tc_actions *actions,
const struct tc_action *tc_act)
{
- int ifindex = tcf_mirred_ifindex(tc_act);
- struct net_device *dev;
+ struct net_device *dev = tcf_mirred_dev(tc_act);
- dev = __dev_get_by_index(dev_net(bp->dev), ifindex);
if (!dev) {
- netdev_info(bp->dev, "no dev for ifindex=%d", ifindex);
+ netdev_info(bp->dev, "no dev in mirred action");
return -EINVAL;
}
@@ -148,9 +146,6 @@ static int bnxt_tc_parse_actions(struct bnxt *bp,
}
}
- if (rc)
- return rc;
-
if (actions->flags & BNXT_TC_ACTION_FLAG_FWD) {
if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) {
/* dst_fid is PF's fid */
@@ -164,7 +159,7 @@ static int bnxt_tc_parse_actions(struct bnxt *bp,
}
}
- return rc;
+ return 0;
}
#define GET_KEY(flow_cmd, key_type) \
@@ -1417,11 +1412,7 @@ bnxt_tc_flow_stats_batch_prep(struct bnxt *bp,
void *flow_node;
int rc, i;
- rc = rhashtable_walk_start(iter);
- if (rc && rc != -EAGAIN) {
- i = 0;
- goto done;
- }
+ rhashtable_walk_start(iter);
rc = 0;
for (i = 0; i < BNXT_FLOW_STATS_BATCH_MAX; i++) {
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index c93f3a2dc6c1..c50c5ec49b1d 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -164,14 +164,38 @@
#define GEM_DCFG5 0x0290 /* Design Config 5 */
#define GEM_DCFG6 0x0294 /* Design Config 6 */
#define GEM_DCFG7 0x0298 /* Design Config 7 */
+#define GEM_DCFG8 0x029C /* Design Config 8 */
#define GEM_TXBDCTRL 0x04cc /* TX Buffer Descriptor control register */
#define GEM_RXBDCTRL 0x04d0 /* RX Buffer Descriptor control register */
+/* Screener Type 2 match registers */
+#define GEM_SCRT2 0x540
+
+/* EtherType registers */
+#define GEM_ETHT 0x06E0
+
+/* Type 2 compare registers */
+#define GEM_T2CMPW0 0x0700
+#define GEM_T2CMPW1 0x0704
+#define T2CMP_OFST(t2idx) (t2idx * 2)
+
+/* type 2 compare registers
+ * each location requires 3 compare regs
+ */
+#define GEM_IP4SRC_CMP(idx) (idx * 3)
+#define GEM_IP4DST_CMP(idx) (idx * 3 + 1)
+#define GEM_PORT_CMP(idx) (idx * 3 + 2)
+
+/* Which screening type 2 EtherType register will be used (0 - 7) */
+#define SCRT2_ETHT 0
+
#define GEM_ISR(hw_q) (0x0400 + ((hw_q) << 2))
#define GEM_TBQP(hw_q) (0x0440 + ((hw_q) << 2))
#define GEM_TBQPH(hw_q) (0x04C8)
#define GEM_RBQP(hw_q) (0x0480 + ((hw_q) << 2))
+#define GEM_RBQS(hw_q) (0x04A0 + ((hw_q) << 2))
+#define GEM_RBQPH(hw_q) (0x04D4)
#define GEM_IER(hw_q) (0x0600 + ((hw_q) << 2))
#define GEM_IDR(hw_q) (0x0620 + ((hw_q) << 2))
#define GEM_IMR(hw_q) (0x0640 + ((hw_q) << 2))
@@ -455,6 +479,16 @@
#define GEM_DAW64_OFFSET 23
#define GEM_DAW64_SIZE 1
+/* Bitfields in DCFG8. */
+#define GEM_T1SCR_OFFSET 24
+#define GEM_T1SCR_SIZE 8
+#define GEM_T2SCR_OFFSET 16
+#define GEM_T2SCR_SIZE 8
+#define GEM_SCR2ETH_OFFSET 8
+#define GEM_SCR2ETH_SIZE 8
+#define GEM_SCR2CMP_OFFSET 0
+#define GEM_SCR2CMP_SIZE 8
+
/* Bitfields in TISUBN */
#define GEM_SUBNSINCR_OFFSET 0
#define GEM_SUBNSINCR_SIZE 16
@@ -483,6 +517,66 @@
#define GEM_RXTSMODE_OFFSET 4 /* RX Descriptor Timestamp Insertion mode */
#define GEM_RXTSMODE_SIZE 2
+/* Bitfields in SCRT2 */
+#define GEM_QUEUE_OFFSET 0 /* Queue Number */
+#define GEM_QUEUE_SIZE 4
+#define GEM_VLANPR_OFFSET 4 /* VLAN Priority */
+#define GEM_VLANPR_SIZE 3
+#define GEM_VLANEN_OFFSET 8 /* VLAN Enable */
+#define GEM_VLANEN_SIZE 1
+#define GEM_ETHT2IDX_OFFSET 9 /* Index to screener type 2 EtherType register */
+#define GEM_ETHT2IDX_SIZE 3
+#define GEM_ETHTEN_OFFSET 12 /* EtherType Enable */
+#define GEM_ETHTEN_SIZE 1
+#define GEM_CMPA_OFFSET 13 /* Compare A - Index to screener type 2 Compare register */
+#define GEM_CMPA_SIZE 5
+#define GEM_CMPAEN_OFFSET 18 /* Compare A Enable */
+#define GEM_CMPAEN_SIZE 1
+#define GEM_CMPB_OFFSET 19 /* Compare B - Index to screener type 2 Compare register */
+#define GEM_CMPB_SIZE 5
+#define GEM_CMPBEN_OFFSET 24 /* Compare B Enable */
+#define GEM_CMPBEN_SIZE 1
+#define GEM_CMPC_OFFSET 25 /* Compare C - Index to screener type 2 Compare register */
+#define GEM_CMPC_SIZE 5
+#define GEM_CMPCEN_OFFSET 30 /* Compare C Enable */
+#define GEM_CMPCEN_SIZE 1
+
+/* Bitfields in ETHT */
+#define GEM_ETHTCMP_OFFSET 0 /* EtherType compare value */
+#define GEM_ETHTCMP_SIZE 16
+
+/* Bitfields in T2CMPW0 */
+#define GEM_T2CMP_OFFSET 16 /* 0xFFFF0000 compare value */
+#define GEM_T2CMP_SIZE 16
+#define GEM_T2MASK_OFFSET 0 /* 0x0000FFFF compare value or mask */
+#define GEM_T2MASK_SIZE 16
+
+/* Bitfields in T2CMPW1 */
+#define GEM_T2DISMSK_OFFSET 9 /* disable mask */
+#define GEM_T2DISMSK_SIZE 1
+#define GEM_T2CMPOFST_OFFSET 7 /* compare offset */
+#define GEM_T2CMPOFST_SIZE 2
+#define GEM_T2OFST_OFFSET 0 /* offset value */
+#define GEM_T2OFST_SIZE 7
+
+/* Offset for screener type 2 compare values (T2CMPOFST).
+ * Note the offset is applied after the specified point,
+ * e.g. GEM_T2COMPOFST_ETYPE denotes the EtherType field, so an offset
+ * of 12 bytes from this would be the source IP address in an IP header
+ */
+#define GEM_T2COMPOFST_SOF 0
+#define GEM_T2COMPOFST_ETYPE 1
+#define GEM_T2COMPOFST_IPHDR 2
+#define GEM_T2COMPOFST_TCPUDP 3
+
+/* offset from EtherType to IP address */
+#define ETYPE_SRCIP_OFFSET 12
+#define ETYPE_DSTIP_OFFSET 16
+
+/* offset from IP header to port */
+#define IPHDR_SRCPORT_OFFSET 0
+#define IPHDR_DSTPORT_OFFSET 2
+
/* Transmit DMA buffer descriptor Word 1 */
#define GEM_DMA_TXVALID_OFFSET 23 /* timestamp has been captured in the Buffer Descriptor */
#define GEM_DMA_TXVALID_SIZE 1
@@ -583,6 +677,8 @@
#define gem_writel(port, reg, value) (port)->macb_reg_writel((port), GEM_##reg, (value))
#define queue_readl(queue, reg) (queue)->bp->macb_reg_readl((queue)->bp, (queue)->reg)
#define queue_writel(queue, reg, value) (queue)->bp->macb_reg_writel((queue)->bp, (queue)->reg, (value))
+#define gem_readl_n(port, reg, idx) (port)->macb_reg_readl((port), GEM_##reg + idx * 4)
+#define gem_writel_n(port, reg, idx, value) (port)->macb_reg_writel((port), GEM_##reg + idx * 4, (value))
#define PTP_TS_BUFFER_SIZE 128 /* must be power of 2 */
@@ -920,13 +1016,42 @@ static const struct gem_statistic gem_statistics[] = {
#define GEM_STATS_LEN ARRAY_SIZE(gem_statistics)
+#define QUEUE_STAT_TITLE(title) { \
+ .stat_string = title, \
+}
+
+/* per queue statistics, each should be unsigned long type */
+struct queue_stats {
+ union {
+ unsigned long first;
+ unsigned long rx_packets;
+ };
+ unsigned long rx_bytes;
+ unsigned long rx_dropped;
+ unsigned long tx_packets;
+ unsigned long tx_bytes;
+ unsigned long tx_dropped;
+};
+
+static const struct gem_statistic queue_statistics[] = {
+ QUEUE_STAT_TITLE("rx_packets"),
+ QUEUE_STAT_TITLE("rx_bytes"),
+ QUEUE_STAT_TITLE("rx_dropped"),
+ QUEUE_STAT_TITLE("tx_packets"),
+ QUEUE_STAT_TITLE("tx_bytes"),
+ QUEUE_STAT_TITLE("tx_dropped"),
+};
+
+#define QUEUE_STATS_LEN ARRAY_SIZE(queue_statistics)
+
struct macb;
+struct macb_queue;
struct macb_or_gem_ops {
int (*mog_alloc_rx_buffers)(struct macb *bp);
void (*mog_free_rx_buffers)(struct macb *bp);
void (*mog_init_rings)(struct macb *bp);
- int (*mog_rx)(struct macb *bp, int budget);
+ int (*mog_rx)(struct macb_queue *queue, int budget);
};
/* MACB-PTP interface: adapt to platform needs. */
@@ -968,6 +1093,9 @@ struct macb_queue {
unsigned int IMR;
unsigned int TBQP;
unsigned int TBQPH;
+ unsigned int RBQS;
+ unsigned int RBQP;
+ unsigned int RBQPH;
unsigned int tx_head, tx_tail;
struct macb_dma_desc *tx_ring;
@@ -975,6 +1103,16 @@ struct macb_queue {
dma_addr_t tx_ring_dma;
struct work_struct tx_error_task;
+ dma_addr_t rx_ring_dma;
+ dma_addr_t rx_buffers_dma;
+ unsigned int rx_tail;
+ unsigned int rx_prepared_head;
+ struct macb_dma_desc *rx_ring;
+ struct sk_buff **rx_skbuff;
+ void *rx_buffers;
+ struct napi_struct napi;
+ struct queue_stats stats;
+
#ifdef CONFIG_MACB_USE_HWSTAMP
struct work_struct tx_ts_task;
unsigned int tx_ts_head, tx_ts_tail;
@@ -982,6 +1120,16 @@ struct macb_queue {
#endif
};
+struct ethtool_rx_fs_item {
+ struct ethtool_rx_flow_spec fs;
+ struct list_head list;
+};
+
+struct ethtool_rx_fs_list {
+ struct list_head list;
+ unsigned int count;
+};
+
struct macb {
void __iomem *regs;
bool native_io;
@@ -990,11 +1138,6 @@ struct macb {
u32 (*macb_reg_readl)(struct macb *bp, int offset);
void (*macb_reg_writel)(struct macb *bp, int offset, u32 value);
- unsigned int rx_tail;
- unsigned int rx_prepared_head;
- struct macb_dma_desc *rx_ring;
- struct sk_buff **rx_skbuff;
- void *rx_buffers;
size_t rx_buffer_size;
unsigned int rx_ring_size;
@@ -1011,15 +1154,11 @@ struct macb {
struct clk *tx_clk;
struct clk *rx_clk;
struct net_device *dev;
- struct napi_struct napi;
union {
struct macb_stats macb;
struct gem_stats gem;
} hw_stats;
- dma_addr_t rx_ring_dma;
- dma_addr_t rx_buffers_dma;
-
struct macb_or_gem_ops macbgem_ops;
struct mii_bus *mii_bus;
@@ -1032,7 +1171,6 @@ struct macb {
unsigned int dma_burst_length;
phy_interface_t phy_interface;
- struct gpio_desc *reset_gpio;
/* AT91RM9200 transmit */
struct sk_buff *skb; /* holds skb until xmit interrupt completes */
@@ -1040,7 +1178,7 @@ struct macb {
int skb_length; /* saved skb length for pci_unmap_single */
unsigned int max_tx_length;
- u64 ethtool_stats[GEM_STATS_LEN];
+ u64 ethtool_stats[GEM_STATS_LEN + QUEUE_STATS_LEN * MACB_MAX_QUEUES];
unsigned int rx_frm_len_mask;
unsigned int jumbo_max_len;
@@ -1057,6 +1195,11 @@ struct macb {
struct ptp_clock_info ptp_clock_info;
struct tsu_incr tsu_incr;
struct hwtstamp_config tstamp_config;
+
+ /* RX queue filer rule set*/
+ struct ethtool_rx_fs_list rx_fs_list;
+ spinlock_t rx_fs_lock;
+ unsigned int max_tuples;
};
#ifdef CONFIG_MACB_USE_HWSTAMP
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 72a67f74b97b..234667eaaa92 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -194,17 +194,17 @@ static unsigned int macb_rx_ring_wrap(struct macb *bp, unsigned int index)
return index & (bp->rx_ring_size - 1);
}
-static struct macb_dma_desc *macb_rx_desc(struct macb *bp, unsigned int index)
+static struct macb_dma_desc *macb_rx_desc(struct macb_queue *queue, unsigned int index)
{
- index = macb_rx_ring_wrap(bp, index);
- index = macb_adj_dma_desc_idx(bp, index);
- return &bp->rx_ring[index];
+ index = macb_rx_ring_wrap(queue->bp, index);
+ index = macb_adj_dma_desc_idx(queue->bp, index);
+ return &queue->rx_ring[index];
}
-static void *macb_rx_buffer(struct macb *bp, unsigned int index)
+static void *macb_rx_buffer(struct macb_queue *queue, unsigned int index)
{
- return bp->rx_buffers + bp->rx_buffer_size *
- macb_rx_ring_wrap(bp, index);
+ return queue->rx_buffers + queue->bp->rx_buffer_size *
+ macb_rx_ring_wrap(queue->bp, index);
}
/* I/O accessors */
@@ -759,7 +759,9 @@ static void macb_tx_error_task(struct work_struct *work)
macb_tx_ring_wrap(bp, tail),
skb->data);
bp->dev->stats.tx_packets++;
+ queue->stats.tx_packets++;
bp->dev->stats.tx_bytes += skb->len;
+ queue->stats.tx_bytes += skb->len;
}
} else {
/* "Buffers exhausted mid-frame" errors may only happen
@@ -859,7 +861,9 @@ static void macb_tx_interrupt(struct macb_queue *queue)
macb_tx_ring_wrap(bp, tail),
skb->data);
bp->dev->stats.tx_packets++;
+ queue->stats.tx_packets++;
bp->dev->stats.tx_bytes += skb->len;
+ queue->stats.tx_bytes += skb->len;
}
/* Now we can safely release resources */
@@ -881,24 +885,25 @@ static void macb_tx_interrupt(struct macb_queue *queue)
netif_wake_subqueue(bp->dev, queue_index);
}
-static void gem_rx_refill(struct macb *bp)
+static void gem_rx_refill(struct macb_queue *queue)
{
unsigned int entry;
struct sk_buff *skb;
dma_addr_t paddr;
+ struct macb *bp = queue->bp;
struct macb_dma_desc *desc;
- while (CIRC_SPACE(bp->rx_prepared_head, bp->rx_tail,
- bp->rx_ring_size) > 0) {
- entry = macb_rx_ring_wrap(bp, bp->rx_prepared_head);
+ while (CIRC_SPACE(queue->rx_prepared_head, queue->rx_tail,
+ bp->rx_ring_size) > 0) {
+ entry = macb_rx_ring_wrap(bp, queue->rx_prepared_head);
/* Make hw descriptor updates visible to CPU */
rmb();
- bp->rx_prepared_head++;
- desc = macb_rx_desc(bp, entry);
+ queue->rx_prepared_head++;
+ desc = macb_rx_desc(queue, entry);
- if (!bp->rx_skbuff[entry]) {
+ if (!queue->rx_skbuff[entry]) {
/* allocate sk_buff for this free entry in ring */
skb = netdev_alloc_skb(bp->dev, bp->rx_buffer_size);
if (unlikely(!skb)) {
@@ -916,7 +921,7 @@ static void gem_rx_refill(struct macb *bp)
break;
}
- bp->rx_skbuff[entry] = skb;
+ queue->rx_skbuff[entry] = skb;
if (entry == bp->rx_ring_size - 1)
paddr |= MACB_BIT(RX_WRAP);
@@ -934,18 +939,18 @@ static void gem_rx_refill(struct macb *bp)
/* Make descriptor updates visible to hardware */
wmb();
- netdev_vdbg(bp->dev, "rx ring: prepared head %d, tail %d\n",
- bp->rx_prepared_head, bp->rx_tail);
+ netdev_vdbg(bp->dev, "rx ring: queue: %p, prepared head %d, tail %d\n",
+ queue, queue->rx_prepared_head, queue->rx_tail);
}
/* Mark DMA descriptors from begin up to and not including end as unused */
-static void discard_partial_frame(struct macb *bp, unsigned int begin,
+static void discard_partial_frame(struct macb_queue *queue, unsigned int begin,
unsigned int end)
{
unsigned int frag;
for (frag = begin; frag != end; frag++) {
- struct macb_dma_desc *desc = macb_rx_desc(bp, frag);
+ struct macb_dma_desc *desc = macb_rx_desc(queue, frag);
desc->addr &= ~MACB_BIT(RX_USED);
}
@@ -959,8 +964,9 @@ static void discard_partial_frame(struct macb *bp, unsigned int begin,
*/
}
-static int gem_rx(struct macb *bp, int budget)
+static int gem_rx(struct macb_queue *queue, int budget)
{
+ struct macb *bp = queue->bp;
unsigned int len;
unsigned int entry;
struct sk_buff *skb;
@@ -972,8 +978,8 @@ static int gem_rx(struct macb *bp, int budget)
dma_addr_t addr;
bool rxused;
- entry = macb_rx_ring_wrap(bp, bp->rx_tail);
- desc = macb_rx_desc(bp, entry);
+ entry = macb_rx_ring_wrap(bp, queue->rx_tail);
+ desc = macb_rx_desc(queue, entry);
/* Make hw descriptor updates visible to CPU */
rmb();
@@ -985,24 +991,26 @@ static int gem_rx(struct macb *bp, int budget)
if (!rxused)
break;
- bp->rx_tail++;
+ queue->rx_tail++;
count++;
if (!(ctrl & MACB_BIT(RX_SOF) && ctrl & MACB_BIT(RX_EOF))) {
netdev_err(bp->dev,
"not whole frame pointed by descriptor\n");
bp->dev->stats.rx_dropped++;
+ queue->stats.rx_dropped++;
break;
}
- skb = bp->rx_skbuff[entry];
+ skb = queue->rx_skbuff[entry];
if (unlikely(!skb)) {
netdev_err(bp->dev,
"inconsistent Rx descriptor chain\n");
bp->dev->stats.rx_dropped++;
+ queue->stats.rx_dropped++;
break;
}
/* now everything is ready for receiving packet */
- bp->rx_skbuff[entry] = NULL;
+ queue->rx_skbuff[entry] = NULL;
len = ctrl & bp->rx_frm_len_mask;
netdev_vdbg(bp->dev, "gem_rx %u (len %u)\n", entry, len);
@@ -1019,7 +1027,9 @@ static int gem_rx(struct macb *bp, int budget)
skb->ip_summed = CHECKSUM_UNNECESSARY;
bp->dev->stats.rx_packets++;
+ queue->stats.rx_packets++;
bp->dev->stats.rx_bytes += skb->len;
+ queue->stats.rx_bytes += skb->len;
gem_ptp_do_rxstamp(bp, skb, desc);
@@ -1035,12 +1045,12 @@ static int gem_rx(struct macb *bp, int budget)
netif_receive_skb(skb);
}
- gem_rx_refill(bp);
+ gem_rx_refill(queue);
return count;
}
-static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
+static int macb_rx_frame(struct macb_queue *queue, unsigned int first_frag,
unsigned int last_frag)
{
unsigned int len;
@@ -1048,8 +1058,9 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
unsigned int offset;
struct sk_buff *skb;
struct macb_dma_desc *desc;
+ struct macb *bp = queue->bp;
- desc = macb_rx_desc(bp, last_frag);
+ desc = macb_rx_desc(queue, last_frag);
len = desc->ctrl & bp->rx_frm_len_mask;
netdev_vdbg(bp->dev, "macb_rx_frame frags %u - %u (len %u)\n",
@@ -1068,7 +1079,7 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
if (!skb) {
bp->dev->stats.rx_dropped++;
for (frag = first_frag; ; frag++) {
- desc = macb_rx_desc(bp, frag);
+ desc = macb_rx_desc(queue, frag);
desc->addr &= ~MACB_BIT(RX_USED);
if (frag == last_frag)
break;
@@ -1096,10 +1107,10 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
frag_len = len - offset;
}
skb_copy_to_linear_data_offset(skb, offset,
- macb_rx_buffer(bp, frag),
+ macb_rx_buffer(queue, frag),
frag_len);
offset += bp->rx_buffer_size;
- desc = macb_rx_desc(bp, frag);
+ desc = macb_rx_desc(queue, frag);
desc->addr &= ~MACB_BIT(RX_USED);
if (frag == last_frag)
@@ -1121,32 +1132,34 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
return 0;
}
-static inline void macb_init_rx_ring(struct macb *bp)
+static inline void macb_init_rx_ring(struct macb_queue *queue)
{
+ struct macb *bp = queue->bp;
dma_addr_t addr;
struct macb_dma_desc *desc = NULL;
int i;
- addr = bp->rx_buffers_dma;
+ addr = queue->rx_buffers_dma;
for (i = 0; i < bp->rx_ring_size; i++) {
- desc = macb_rx_desc(bp, i);
+ desc = macb_rx_desc(queue, i);
macb_set_addr(bp, desc, addr);
desc->ctrl = 0;
addr += bp->rx_buffer_size;
}
desc->addr |= MACB_BIT(RX_WRAP);
- bp->rx_tail = 0;
+ queue->rx_tail = 0;
}
-static int macb_rx(struct macb *bp, int budget)
+static int macb_rx(struct macb_queue *queue, int budget)
{
+ struct macb *bp = queue->bp;
bool reset_rx_queue = false;
int received = 0;
unsigned int tail;
int first_frag = -1;
- for (tail = bp->rx_tail; budget > 0; tail++) {
- struct macb_dma_desc *desc = macb_rx_desc(bp, tail);
+ for (tail = queue->rx_tail; budget > 0; tail++) {
+ struct macb_dma_desc *desc = macb_rx_desc(queue, tail);
u32 ctrl;
/* Make hw descriptor updates visible to CPU */
@@ -1159,7 +1172,7 @@ static int macb_rx(struct macb *bp, int budget)
if (ctrl & MACB_BIT(RX_SOF)) {
if (first_frag != -1)
- discard_partial_frame(bp, first_frag, tail);
+ discard_partial_frame(queue, first_frag, tail);
first_frag = tail;
}
@@ -1171,7 +1184,7 @@ static int macb_rx(struct macb *bp, int budget)
continue;
}
- dropped = macb_rx_frame(bp, first_frag, tail);
+ dropped = macb_rx_frame(queue, first_frag, tail);
first_frag = -1;
if (unlikely(dropped < 0)) {
reset_rx_queue = true;
@@ -1195,8 +1208,8 @@ static int macb_rx(struct macb *bp, int budget)
ctrl = macb_readl(bp, NCR);
macb_writel(bp, NCR, ctrl & ~MACB_BIT(RE));
- macb_init_rx_ring(bp);
- macb_writel(bp, RBQP, bp->rx_ring_dma);
+ macb_init_rx_ring(queue);
+ queue_writel(queue, RBQP, queue->rx_ring_dma);
macb_writel(bp, NCR, ctrl | MACB_BIT(RE));
@@ -1205,16 +1218,17 @@ static int macb_rx(struct macb *bp, int budget)
}
if (first_frag != -1)
- bp->rx_tail = first_frag;
+ queue->rx_tail = first_frag;
else
- bp->rx_tail = tail;
+ queue->rx_tail = tail;
return received;
}
static int macb_poll(struct napi_struct *napi, int budget)
{
- struct macb *bp = container_of(napi, struct macb, napi);
+ struct macb_queue *queue = container_of(napi, struct macb_queue, napi);
+ struct macb *bp = queue->bp;
int work_done;
u32 status;
@@ -1224,7 +1238,7 @@ static int macb_poll(struct napi_struct *napi, int budget)
netdev_vdbg(bp->dev, "poll: status = %08lx, budget = %d\n",
(unsigned long)status, budget);
- work_done = bp->macbgem_ops.mog_rx(bp, budget);
+ work_done = bp->macbgem_ops.mog_rx(queue, budget);
if (work_done < budget) {
napi_complete_done(napi, work_done);
@@ -1232,10 +1246,10 @@ static int macb_poll(struct napi_struct *napi, int budget)
status = macb_readl(bp, RSR);
if (status) {
if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
- macb_writel(bp, ISR, MACB_BIT(RCOMP));
+ queue_writel(queue, ISR, MACB_BIT(RCOMP));
napi_reschedule(napi);
} else {
- macb_writel(bp, IER, MACB_RX_INT_FLAGS);
+ queue_writel(queue, IER, MACB_RX_INT_FLAGS);
}
}
@@ -1282,9 +1296,9 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
queue_writel(queue, ISR, MACB_BIT(RCOMP));
- if (napi_schedule_prep(&bp->napi)) {
+ if (napi_schedule_prep(&queue->napi)) {
netdev_vdbg(bp->dev, "scheduling RX softirq\n");
- __napi_schedule(&bp->napi);
+ __napi_schedule(&queue->napi);
}
}
@@ -1708,38 +1722,44 @@ static void gem_free_rx_buffers(struct macb *bp)
{
struct sk_buff *skb;
struct macb_dma_desc *desc;
+ struct macb_queue *queue;
dma_addr_t addr;
+ unsigned int q;
int i;
- if (!bp->rx_skbuff)
- return;
+ for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
+ if (!queue->rx_skbuff)
+ continue;
- for (i = 0; i < bp->rx_ring_size; i++) {
- skb = bp->rx_skbuff[i];
+ for (i = 0; i < bp->rx_ring_size; i++) {
+ skb = queue->rx_skbuff[i];
- if (!skb)
- continue;
+ if (!skb)
+ continue;
- desc = macb_rx_desc(bp, i);
- addr = macb_get_addr(bp, desc);
+ desc = macb_rx_desc(queue, i);
+ addr = macb_get_addr(bp, desc);
- dma_unmap_single(&bp->pdev->dev, addr, bp->rx_buffer_size,
- DMA_FROM_DEVICE);
- dev_kfree_skb_any(skb);
- skb = NULL;
- }
+ dma_unmap_single(&bp->pdev->dev, addr, bp->rx_buffer_size,
+ DMA_FROM_DEVICE);
+ dev_kfree_skb_any(skb);
+ skb = NULL;
+ }
- kfree(bp->rx_skbuff);
- bp->rx_skbuff = NULL;
+ kfree(queue->rx_skbuff);
+ queue->rx_skbuff = NULL;
+ }
}
static void macb_free_rx_buffers(struct macb *bp)
{
- if (bp->rx_buffers) {
+ struct macb_queue *queue = &bp->queues[0];
+
+ if (queue->rx_buffers) {
dma_free_coherent(&bp->pdev->dev,
bp->rx_ring_size * bp->rx_buffer_size,
- bp->rx_buffers, bp->rx_buffers_dma);
- bp->rx_buffers = NULL;
+ queue->rx_buffers, queue->rx_buffers_dma);
+ queue->rx_buffers = NULL;
}
}
@@ -1748,11 +1768,12 @@ static void macb_free_consistent(struct macb *bp)
struct macb_queue *queue;
unsigned int q;
+ queue = &bp->queues[0];
bp->macbgem_ops.mog_free_rx_buffers(bp);
- if (bp->rx_ring) {
+ if (queue->rx_ring) {
dma_free_coherent(&bp->pdev->dev, RX_RING_BYTES(bp),
- bp->rx_ring, bp->rx_ring_dma);
- bp->rx_ring = NULL;
+ queue->rx_ring, queue->rx_ring_dma);
+ queue->rx_ring = NULL;
}
for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
@@ -1768,32 +1789,37 @@ static void macb_free_consistent(struct macb *bp)
static int gem_alloc_rx_buffers(struct macb *bp)
{
+ struct macb_queue *queue;
+ unsigned int q;
int size;
- size = bp->rx_ring_size * sizeof(struct sk_buff *);
- bp->rx_skbuff = kzalloc(size, GFP_KERNEL);
- if (!bp->rx_skbuff)
- return -ENOMEM;
- else
- netdev_dbg(bp->dev,
- "Allocated %d RX struct sk_buff entries at %p\n",
- bp->rx_ring_size, bp->rx_skbuff);
+ for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
+ size = bp->rx_ring_size * sizeof(struct sk_buff *);
+ queue->rx_skbuff = kzalloc(size, GFP_KERNEL);
+ if (!queue->rx_skbuff)
+ return -ENOMEM;
+ else
+ netdev_dbg(bp->dev,
+ "Allocated %d RX struct sk_buff entries at %p\n",
+ bp->rx_ring_size, queue->rx_skbuff);
+ }
return 0;
}
static int macb_alloc_rx_buffers(struct macb *bp)
{
+ struct macb_queue *queue = &bp->queues[0];
int size;
size = bp->rx_ring_size * bp->rx_buffer_size;
- bp->rx_buffers = dma_alloc_coherent(&bp->pdev->dev, size,
- &bp->rx_buffers_dma, GFP_KERNEL);
- if (!bp->rx_buffers)
+ queue->rx_buffers = dma_alloc_coherent(&bp->pdev->dev, size,
+ &queue->rx_buffers_dma, GFP_KERNEL);
+ if (!queue->rx_buffers)
return -ENOMEM;
netdev_dbg(bp->dev,
"Allocated RX buffers of %d bytes at %08lx (mapped %p)\n",
- size, (unsigned long)bp->rx_buffers_dma, bp->rx_buffers);
+ size, (unsigned long)queue->rx_buffers_dma, queue->rx_buffers);
return 0;
}
@@ -1819,17 +1845,16 @@ static int macb_alloc_consistent(struct macb *bp)
queue->tx_skb = kmalloc(size, GFP_KERNEL);
if (!queue->tx_skb)
goto out_err;
- }
-
- size = RX_RING_BYTES(bp);
- bp->rx_ring = dma_alloc_coherent(&bp->pdev->dev, size,
- &bp->rx_ring_dma, GFP_KERNEL);
- if (!bp->rx_ring)
- goto out_err;
- netdev_dbg(bp->dev,
- "Allocated RX ring of %d bytes at %08lx (mapped %p)\n",
- size, (unsigned long)bp->rx_ring_dma, bp->rx_ring);
+ size = RX_RING_BYTES(bp);
+ queue->rx_ring = dma_alloc_coherent(&bp->pdev->dev, size,
+ &queue->rx_ring_dma, GFP_KERNEL);
+ if (!queue->rx_ring)
+ goto out_err;
+ netdev_dbg(bp->dev,
+ "Allocated RX ring of %d bytes at %08lx (mapped %p)\n",
+ size, (unsigned long)queue->rx_ring_dma, queue->rx_ring);
+ }
if (bp->macbgem_ops.mog_alloc_rx_buffers(bp))
goto out_err;
@@ -1856,12 +1881,13 @@ static void gem_init_rings(struct macb *bp)
desc->ctrl |= MACB_BIT(TX_WRAP);
queue->tx_head = 0;
queue->tx_tail = 0;
- }
- bp->rx_tail = 0;
- bp->rx_prepared_head = 0;
+ queue->rx_tail = 0;
+ queue->rx_prepared_head = 0;
+
+ gem_rx_refill(queue);
+ }
- gem_rx_refill(bp);
}
static void macb_init_rings(struct macb *bp)
@@ -1869,7 +1895,7 @@ static void macb_init_rings(struct macb *bp)
int i;
struct macb_dma_desc *desc = NULL;
- macb_init_rx_ring(bp);
+ macb_init_rx_ring(&bp->queues[0]);
for (i = 0; i < bp->tx_ring_size; i++) {
desc = macb_tx_desc(&bp->queues[0], i);
@@ -1978,11 +2004,20 @@ static u32 macb_dbw(struct macb *bp)
*/
static void macb_configure_dma(struct macb *bp)
{
+ struct macb_queue *queue;
+ u32 buffer_size;
+ unsigned int q;
u32 dmacfg;
+ buffer_size = bp->rx_buffer_size / RX_BUFFER_MULTIPLE;
if (macb_is_gem(bp)) {
dmacfg = gem_readl(bp, DMACFG) & ~GEM_BF(RXBS, -1L);
- dmacfg |= GEM_BF(RXBS, bp->rx_buffer_size / RX_BUFFER_MULTIPLE);
+ for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
+ if (q)
+ queue_writel(queue, RBQS, buffer_size);
+ else
+ dmacfg |= GEM_BF(RXBS, buffer_size);
+ }
if (bp->dma_burst_length)
dmacfg = GEM_BFINS(FBLDO, bp->dma_burst_length, dmacfg);
dmacfg |= GEM_BIT(TXPBMS) | GEM_BF(RXBMS, -1L);
@@ -2051,12 +2086,12 @@ static void macb_init_hw(struct macb *bp)
macb_configure_dma(bp);
/* Initialize TX and RX buffers */
- macb_writel(bp, RBQP, lower_32_bits(bp->rx_ring_dma));
+ for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
+ queue_writel(queue, RBQP, lower_32_bits(queue->rx_ring_dma));
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
- if (bp->hw_dma_cap & HW_DMA_CAP_64B)
- macb_writel(bp, RBQPH, upper_32_bits(bp->rx_ring_dma));
+ if (bp->hw_dma_cap & HW_DMA_CAP_64B)
+ queue_writel(queue, RBQPH, upper_32_bits(queue->rx_ring_dma));
#endif
- for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
queue_writel(queue, TBQP, lower_32_bits(queue->tx_ring_dma));
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
if (bp->hw_dma_cap & HW_DMA_CAP_64B)
@@ -2197,6 +2232,8 @@ static int macb_open(struct net_device *dev)
{
struct macb *bp = netdev_priv(dev);
size_t bufsz = dev->mtu + ETH_HLEN + ETH_FCS_LEN + NET_IP_ALIGN;
+ struct macb_queue *queue;
+ unsigned int q;
int err;
netdev_dbg(bp->dev, "open\n");
@@ -2218,11 +2255,12 @@ static int macb_open(struct net_device *dev)
return err;
}
- napi_enable(&bp->napi);
-
bp->macbgem_ops.mog_init_rings(bp);
macb_init_hw(bp);
+ for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
+ napi_enable(&queue->napi);
+
/* schedule a link state check */
phy_start(dev->phydev);
@@ -2237,10 +2275,14 @@ static int macb_open(struct net_device *dev)
static int macb_close(struct net_device *dev)
{
struct macb *bp = netdev_priv(dev);
+ struct macb_queue *queue;
unsigned long flags;
+ unsigned int q;
netif_tx_stop_all_queues(dev);
- napi_disable(&bp->napi);
+
+ for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
+ napi_disable(&queue->napi);
if (dev->phydev)
phy_stop(dev->phydev);
@@ -2270,7 +2312,10 @@ static int macb_change_mtu(struct net_device *dev, int new_mtu)
static void gem_update_stats(struct macb *bp)
{
- unsigned int i;
+ struct macb_queue *queue;
+ unsigned int i, q, idx;
+ unsigned long *stat;
+
u32 *p = &bp->hw_stats.gem.tx_octets_31_0;
for (i = 0; i < GEM_STATS_LEN; ++i, ++p) {
@@ -2287,6 +2332,11 @@ static void gem_update_stats(struct macb *bp)
*(++p) += val;
}
}
+
+ idx = GEM_STATS_LEN;
+ for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
+ for (i = 0, stat = &queue->stats.first; i < QUEUE_STATS_LEN; ++i, ++stat)
+ bp->ethtool_stats[idx++] = *stat;
}
static struct net_device_stats *gem_get_stats(struct macb *bp)
@@ -2334,14 +2384,17 @@ static void gem_get_ethtool_stats(struct net_device *dev,
bp = netdev_priv(dev);
gem_update_stats(bp);
- memcpy(data, &bp->ethtool_stats, sizeof(u64) * GEM_STATS_LEN);
+ memcpy(data, &bp->ethtool_stats, sizeof(u64)
+ * (GEM_STATS_LEN + QUEUE_STATS_LEN * MACB_MAX_QUEUES));
}
static int gem_get_sset_count(struct net_device *dev, int sset)
{
+ struct macb *bp = netdev_priv(dev);
+
switch (sset) {
case ETH_SS_STATS:
- return GEM_STATS_LEN;
+ return GEM_STATS_LEN + bp->num_queues * QUEUE_STATS_LEN;
default:
return -EOPNOTSUPP;
}
@@ -2349,13 +2402,25 @@ static int gem_get_sset_count(struct net_device *dev, int sset)
static void gem_get_ethtool_strings(struct net_device *dev, u32 sset, u8 *p)
{
+ char stat_string[ETH_GSTRING_LEN];
+ struct macb *bp = netdev_priv(dev);
+ struct macb_queue *queue;
unsigned int i;
+ unsigned int q;
switch (sset) {
case ETH_SS_STATS:
for (i = 0; i < GEM_STATS_LEN; i++, p += ETH_GSTRING_LEN)
memcpy(p, gem_statistics[i].stat_string,
ETH_GSTRING_LEN);
+
+ for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
+ for (i = 0; i < QUEUE_STATS_LEN; i++, p += ETH_GSTRING_LEN) {
+ snprintf(stat_string, ETH_GSTRING_LEN, "q%d_%s",
+ q, queue_statistics[i].stat_string);
+ memcpy(p, stat_string, ETH_GSTRING_LEN);
+ }
+ }
break;
}
}
@@ -2603,6 +2668,307 @@ static int macb_get_ts_info(struct net_device *netdev,
return ethtool_op_get_ts_info(netdev, info);
}
+static void gem_enable_flow_filters(struct macb *bp, bool enable)
+{
+ struct ethtool_rx_fs_item *item;
+ u32 t2_scr;
+ int num_t2_scr;
+
+ num_t2_scr = GEM_BFEXT(T2SCR, gem_readl(bp, DCFG8));
+
+ list_for_each_entry(item, &bp->rx_fs_list.list, list) {
+ struct ethtool_rx_flow_spec *fs = &item->fs;
+ struct ethtool_tcpip4_spec *tp4sp_m;
+
+ if (fs->location >= num_t2_scr)
+ continue;
+
+ t2_scr = gem_readl_n(bp, SCRT2, fs->location);
+
+ /* enable/disable screener regs for the flow entry */
+ t2_scr = GEM_BFINS(ETHTEN, enable, t2_scr);
+
+ /* only enable fields with no masking */
+ tp4sp_m = &(fs->m_u.tcp_ip4_spec);
+
+ if (enable && (tp4sp_m->ip4src == 0xFFFFFFFF))
+ t2_scr = GEM_BFINS(CMPAEN, 1, t2_scr);
+ else
+ t2_scr = GEM_BFINS(CMPAEN, 0, t2_scr);
+
+ if (enable && (tp4sp_m->ip4dst == 0xFFFFFFFF))
+ t2_scr = GEM_BFINS(CMPBEN, 1, t2_scr);
+ else
+ t2_scr = GEM_BFINS(CMPBEN, 0, t2_scr);
+
+ if (enable && ((tp4sp_m->psrc == 0xFFFF) || (tp4sp_m->pdst == 0xFFFF)))
+ t2_scr = GEM_BFINS(CMPCEN, 1, t2_scr);
+ else
+ t2_scr = GEM_BFINS(CMPCEN, 0, t2_scr);
+
+ gem_writel_n(bp, SCRT2, fs->location, t2_scr);
+ }
+}
+
+static void gem_prog_cmp_regs(struct macb *bp, struct ethtool_rx_flow_spec *fs)
+{
+ struct ethtool_tcpip4_spec *tp4sp_v, *tp4sp_m;
+ uint16_t index = fs->location;
+ u32 w0, w1, t2_scr;
+ bool cmp_a = false;
+ bool cmp_b = false;
+ bool cmp_c = false;
+
+ tp4sp_v = &(fs->h_u.tcp_ip4_spec);
+ tp4sp_m = &(fs->m_u.tcp_ip4_spec);
+
+ /* ignore field if any masking set */
+ if (tp4sp_m->ip4src == 0xFFFFFFFF) {
+ /* 1st compare reg - IP source address */
+ w0 = 0;
+ w1 = 0;
+ w0 = tp4sp_v->ip4src;
+ w1 = GEM_BFINS(T2DISMSK, 1, w1); /* 32-bit compare */
+ w1 = GEM_BFINS(T2CMPOFST, GEM_T2COMPOFST_ETYPE, w1);
+ w1 = GEM_BFINS(T2OFST, ETYPE_SRCIP_OFFSET, w1);
+ gem_writel_n(bp, T2CMPW0, T2CMP_OFST(GEM_IP4SRC_CMP(index)), w0);
+ gem_writel_n(bp, T2CMPW1, T2CMP_OFST(GEM_IP4SRC_CMP(index)), w1);
+ cmp_a = true;
+ }
+
+ /* ignore field if any masking set */
+ if (tp4sp_m->ip4dst == 0xFFFFFFFF) {
+ /* 2nd compare reg - IP destination address */
+ w0 = 0;
+ w1 = 0;
+ w0 = tp4sp_v->ip4dst;
+ w1 = GEM_BFINS(T2DISMSK, 1, w1); /* 32-bit compare */
+ w1 = GEM_BFINS(T2CMPOFST, GEM_T2COMPOFST_ETYPE, w1);
+ w1 = GEM_BFINS(T2OFST, ETYPE_DSTIP_OFFSET, w1);
+ gem_writel_n(bp, T2CMPW0, T2CMP_OFST(GEM_IP4DST_CMP(index)), w0);
+ gem_writel_n(bp, T2CMPW1, T2CMP_OFST(GEM_IP4DST_CMP(index)), w1);
+ cmp_b = true;
+ }
+
+ /* ignore both port fields if masking set in both */
+ if ((tp4sp_m->psrc == 0xFFFF) || (tp4sp_m->pdst == 0xFFFF)) {
+ /* 3rd compare reg - source port, destination port */
+ w0 = 0;
+ w1 = 0;
+ w1 = GEM_BFINS(T2CMPOFST, GEM_T2COMPOFST_IPHDR, w1);
+ if (tp4sp_m->psrc == tp4sp_m->pdst) {
+ w0 = GEM_BFINS(T2MASK, tp4sp_v->psrc, w0);
+ w0 = GEM_BFINS(T2CMP, tp4sp_v->pdst, w0);
+ w1 = GEM_BFINS(T2DISMSK, 1, w1); /* 32-bit compare */
+ w1 = GEM_BFINS(T2OFST, IPHDR_SRCPORT_OFFSET, w1);
+ } else {
+ /* only one port definition */
+ w1 = GEM_BFINS(T2DISMSK, 0, w1); /* 16-bit compare */
+ w0 = GEM_BFINS(T2MASK, 0xFFFF, w0);
+ if (tp4sp_m->psrc == 0xFFFF) { /* src port */
+ w0 = GEM_BFINS(T2CMP, tp4sp_v->psrc, w0);
+ w1 = GEM_BFINS(T2OFST, IPHDR_SRCPORT_OFFSET, w1);
+ } else { /* dst port */
+ w0 = GEM_BFINS(T2CMP, tp4sp_v->pdst, w0);
+ w1 = GEM_BFINS(T2OFST, IPHDR_DSTPORT_OFFSET, w1);
+ }
+ }
+ gem_writel_n(bp, T2CMPW0, T2CMP_OFST(GEM_PORT_CMP(index)), w0);
+ gem_writel_n(bp, T2CMPW1, T2CMP_OFST(GEM_PORT_CMP(index)), w1);
+ cmp_c = true;
+ }
+
+ t2_scr = 0;
+ t2_scr = GEM_BFINS(QUEUE, (fs->ring_cookie) & 0xFF, t2_scr);
+ t2_scr = GEM_BFINS(ETHT2IDX, SCRT2_ETHT, t2_scr);
+ if (cmp_a)
+ t2_scr = GEM_BFINS(CMPA, GEM_IP4SRC_CMP(index), t2_scr);
+ if (cmp_b)
+ t2_scr = GEM_BFINS(CMPB, GEM_IP4DST_CMP(index), t2_scr);
+ if (cmp_c)
+ t2_scr = GEM_BFINS(CMPC, GEM_PORT_CMP(index), t2_scr);
+ gem_writel_n(bp, SCRT2, index, t2_scr);
+}
+
+static int gem_add_flow_filter(struct net_device *netdev,
+ struct ethtool_rxnfc *cmd)
+{
+ struct macb *bp = netdev_priv(netdev);
+ struct ethtool_rx_flow_spec *fs = &cmd->fs;
+ struct ethtool_rx_fs_item *item, *newfs;
+ unsigned long flags;
+ int ret = -EINVAL;
+ bool added = false;
+
+ newfs = kmalloc(sizeof(*newfs), GFP_KERNEL);
+ if (newfs == NULL)
+ return -ENOMEM;
+ memcpy(&newfs->fs, fs, sizeof(newfs->fs));
+
+ netdev_dbg(netdev,
+ "Adding flow filter entry,type=%u,queue=%u,loc=%u,src=%08X,dst=%08X,ps=%u,pd=%u\n",
+ fs->flow_type, (int)fs->ring_cookie, fs->location,
+ htonl(fs->h_u.tcp_ip4_spec.ip4src),
+ htonl(fs->h_u.tcp_ip4_spec.ip4dst),
+ htons(fs->h_u.tcp_ip4_spec.psrc), htons(fs->h_u.tcp_ip4_spec.pdst));
+
+ spin_lock_irqsave(&bp->rx_fs_lock, flags);
+
+ /* find correct place to add in list */
+ list_for_each_entry(item, &bp->rx_fs_list.list, list) {
+ if (item->fs.location > newfs->fs.location) {
+ list_add_tail(&newfs->list, &item->list);
+ added = true;
+ break;
+ } else if (item->fs.location == fs->location) {
+ netdev_err(netdev, "Rule not added: location %d not free!\n",
+ fs->location);
+ ret = -EBUSY;
+ goto err;
+ }
+ }
+ if (!added)
+ list_add_tail(&newfs->list, &bp->rx_fs_list.list);
+
+ gem_prog_cmp_regs(bp, fs);
+ bp->rx_fs_list.count++;
+ /* enable filtering if NTUPLE on */
+ if (netdev->features & NETIF_F_NTUPLE)
+ gem_enable_flow_filters(bp, 1);
+
+ spin_unlock_irqrestore(&bp->rx_fs_lock, flags);
+ return 0;
+
+err:
+ spin_unlock_irqrestore(&bp->rx_fs_lock, flags);
+ kfree(newfs);
+ return ret;
+}
+
+static int gem_del_flow_filter(struct net_device *netdev,
+ struct ethtool_rxnfc *cmd)
+{
+ struct macb *bp = netdev_priv(netdev);
+ struct ethtool_rx_fs_item *item;
+ struct ethtool_rx_flow_spec *fs;
+ unsigned long flags;
+
+ spin_lock_irqsave(&bp->rx_fs_lock, flags);
+
+ list_for_each_entry(item, &bp->rx_fs_list.list, list) {
+ if (item->fs.location == cmd->fs.location) {
+ /* disable screener regs for the flow entry */
+ fs = &(item->fs);
+ netdev_dbg(netdev,
+ "Deleting flow filter entry,type=%u,queue=%u,loc=%u,src=%08X,dst=%08X,ps=%u,pd=%u\n",
+ fs->flow_type, (int)fs->ring_cookie, fs->location,
+ htonl(fs->h_u.tcp_ip4_spec.ip4src),
+ htonl(fs->h_u.tcp_ip4_spec.ip4dst),
+ htons(fs->h_u.tcp_ip4_spec.psrc),
+ htons(fs->h_u.tcp_ip4_spec.pdst));
+
+ gem_writel_n(bp, SCRT2, fs->location, 0);
+
+ list_del(&item->list);
+ bp->rx_fs_list.count--;
+ spin_unlock_irqrestore(&bp->rx_fs_lock, flags);
+ kfree(item);
+ return 0;
+ }
+ }
+
+ spin_unlock_irqrestore(&bp->rx_fs_lock, flags);
+ return -EINVAL;
+}
+
+static int gem_get_flow_entry(struct net_device *netdev,
+ struct ethtool_rxnfc *cmd)
+{
+ struct macb *bp = netdev_priv(netdev);
+ struct ethtool_rx_fs_item *item;
+
+ list_for_each_entry(item, &bp->rx_fs_list.list, list) {
+ if (item->fs.location == cmd->fs.location) {
+ memcpy(&cmd->fs, &item->fs, sizeof(cmd->fs));
+ return 0;
+ }
+ }
+ return -EINVAL;
+}
+
+static int gem_get_all_flow_entries(struct net_device *netdev,
+ struct ethtool_rxnfc *cmd, u32 *rule_locs)
+{
+ struct macb *bp = netdev_priv(netdev);
+ struct ethtool_rx_fs_item *item;
+ uint32_t cnt = 0;
+
+ list_for_each_entry(item, &bp->rx_fs_list.list, list) {
+ if (cnt == cmd->rule_cnt)
+ return -EMSGSIZE;
+ rule_locs[cnt] = item->fs.location;
+ cnt++;
+ }
+ cmd->data = bp->max_tuples;
+ cmd->rule_cnt = cnt;
+
+ return 0;
+}
+
+static int gem_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
+ u32 *rule_locs)
+{
+ struct macb *bp = netdev_priv(netdev);
+ int ret = 0;
+
+ switch (cmd->cmd) {
+ case ETHTOOL_GRXRINGS:
+ cmd->data = bp->num_queues;
+ break;
+ case ETHTOOL_GRXCLSRLCNT:
+ cmd->rule_cnt = bp->rx_fs_list.count;
+ break;
+ case ETHTOOL_GRXCLSRULE:
+ ret = gem_get_flow_entry(netdev, cmd);
+ break;
+ case ETHTOOL_GRXCLSRLALL:
+ ret = gem_get_all_flow_entries(netdev, cmd, rule_locs);
+ break;
+ default:
+ netdev_err(netdev,
+ "Command parameter %d is not supported\n", cmd->cmd);
+ ret = -EOPNOTSUPP;
+ }
+
+ return ret;
+}
+
+static int gem_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
+{
+ struct macb *bp = netdev_priv(netdev);
+ int ret;
+
+ switch (cmd->cmd) {
+ case ETHTOOL_SRXCLSRLINS:
+ if ((cmd->fs.location >= bp->max_tuples)
+ || (cmd->fs.ring_cookie >= bp->num_queues)) {
+ ret = -EINVAL;
+ break;
+ }
+ ret = gem_add_flow_filter(netdev, cmd);
+ break;
+ case ETHTOOL_SRXCLSRLDEL:
+ ret = gem_del_flow_filter(netdev, cmd);
+ break;
+ default:
+ netdev_err(netdev,
+ "Command parameter %d is not supported\n", cmd->cmd);
+ ret = -EOPNOTSUPP;
+ }
+
+ return ret;
+}
+
static const struct ethtool_ops macb_ethtool_ops = {
.get_regs_len = macb_get_regs_len,
.get_regs = macb_get_regs,
@@ -2628,6 +2994,8 @@ static const struct ethtool_ops gem_ethtool_ops = {
.set_link_ksettings = phy_ethtool_set_link_ksettings,
.get_ringparam = macb_get_ringparam,
.set_ringparam = macb_set_ringparam,
+ .get_rxnfc = gem_get_rxnfc,
+ .set_rxnfc = gem_set_rxnfc,
};
static int macb_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
@@ -2685,6 +3053,12 @@ static int macb_set_features(struct net_device *netdev,
gem_writel(bp, NCFGR, netcfg);
}
+ /* RX Flow Filters */
+ if ((changed & NETIF_F_NTUPLE) && macb_is_gem(bp)) {
+ bool turn_on = features & NETIF_F_NTUPLE;
+
+ gem_enable_flow_filters(bp, turn_on);
+ }
return 0;
}
@@ -2850,7 +3224,7 @@ static int macb_init(struct platform_device *pdev)
struct macb *bp = netdev_priv(dev);
struct macb_queue *queue;
int err;
- u32 val;
+ u32 val, reg;
bp->tx_ring_size = DEFAULT_TX_RING_SIZE;
bp->rx_ring_size = DEFAULT_RX_RING_SIZE;
@@ -2865,15 +3239,20 @@ static int macb_init(struct platform_device *pdev)
queue = &bp->queues[q];
queue->bp = bp;
+ netif_napi_add(dev, &queue->napi, macb_poll, 64);
if (hw_q) {
queue->ISR = GEM_ISR(hw_q - 1);
queue->IER = GEM_IER(hw_q - 1);
queue->IDR = GEM_IDR(hw_q - 1);
queue->IMR = GEM_IMR(hw_q - 1);
queue->TBQP = GEM_TBQP(hw_q - 1);
+ queue->RBQP = GEM_RBQP(hw_q - 1);
+ queue->RBQS = GEM_RBQS(hw_q - 1);
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
- if (bp->hw_dma_cap & HW_DMA_CAP_64B)
+ if (bp->hw_dma_cap & HW_DMA_CAP_64B) {
queue->TBQPH = GEM_TBQPH(hw_q - 1);
+ queue->RBQPH = GEM_RBQPH(hw_q - 1);
+ }
#endif
} else {
/* queue0 uses legacy registers */
@@ -2882,9 +3261,12 @@ static int macb_init(struct platform_device *pdev)
queue->IDR = MACB_IDR;
queue->IMR = MACB_IMR;
queue->TBQP = MACB_TBQP;
+ queue->RBQP = MACB_RBQP;
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
- if (bp->hw_dma_cap & HW_DMA_CAP_64B)
+ if (bp->hw_dma_cap & HW_DMA_CAP_64B) {
queue->TBQPH = MACB_TBQPH;
+ queue->RBQPH = MACB_RBQPH;
+ }
#endif
}
@@ -2908,7 +3290,6 @@ static int macb_init(struct platform_device *pdev)
}
dev->netdev_ops = &macb_netdev_ops;
- netif_napi_add(dev, &bp->napi, macb_poll, 64);
/* setup appropriated routines according to adapter type */
if (macb_is_gem(bp)) {
@@ -2941,6 +3322,30 @@ static int macb_init(struct platform_device *pdev)
dev->hw_features &= ~NETIF_F_SG;
dev->features = dev->hw_features;
+ /* Check RX Flow Filters support.
+ * Max Rx flows set by availability of screeners & compare regs:
+ * each 4-tuple define requires 1 T2 screener reg + 3 compare regs
+ */
+ reg = gem_readl(bp, DCFG8);
+ bp->max_tuples = min((GEM_BFEXT(SCR2CMP, reg) / 3),
+ GEM_BFEXT(T2SCR, reg));
+ if (bp->max_tuples > 0) {
+ /* also needs one ethtype match to check IPv4 */
+ if (GEM_BFEXT(SCR2ETH, reg) > 0) {
+ /* program this reg now */
+ reg = 0;
+ reg = GEM_BFINS(ETHTCMP, (uint16_t)ETH_P_IP, reg);
+ gem_writel_n(bp, ETHT, SCRT2_ETHT, reg);
+ /* Filtering is supported in hw but don't enable it in kernel now */
+ dev->hw_features |= NETIF_F_NTUPLE;
+ /* init Rx flow definitions */
+ INIT_LIST_HEAD(&bp->rx_fs_list.list);
+ bp->rx_fs_list.count = 0;
+ spin_lock_init(&bp->rx_fs_lock);
+ } else
+ bp->max_tuples = 0;
+ }
+
if (!(bp->caps & MACB_CAPS_USRIO_DISABLED)) {
val = 0;
if (bp->phy_interface == PHY_INTERFACE_MODE_RGMII)
@@ -2977,34 +3382,35 @@ static int macb_init(struct platform_device *pdev)
static int at91ether_start(struct net_device *dev)
{
struct macb *lp = netdev_priv(dev);
+ struct macb_queue *q = &lp->queues[0];
struct macb_dma_desc *desc;
dma_addr_t addr;
u32 ctl;
int i;
- lp->rx_ring = dma_alloc_coherent(&lp->pdev->dev,
+ q->rx_ring = dma_alloc_coherent(&lp->pdev->dev,
(AT91ETHER_MAX_RX_DESCR *
macb_dma_desc_get_size(lp)),
- &lp->rx_ring_dma, GFP_KERNEL);
- if (!lp->rx_ring)
+ &q->rx_ring_dma, GFP_KERNEL);
+ if (!q->rx_ring)
return -ENOMEM;
- lp->rx_buffers = dma_alloc_coherent(&lp->pdev->dev,
+ q->rx_buffers = dma_alloc_coherent(&lp->pdev->dev,
AT91ETHER_MAX_RX_DESCR *
AT91ETHER_MAX_RBUFF_SZ,
- &lp->rx_buffers_dma, GFP_KERNEL);
- if (!lp->rx_buffers) {
+ &q->rx_buffers_dma, GFP_KERNEL);
+ if (!q->rx_buffers) {
dma_free_coherent(&lp->pdev->dev,
AT91ETHER_MAX_RX_DESCR *
macb_dma_desc_get_size(lp),
- lp->rx_ring, lp->rx_ring_dma);
- lp->rx_ring = NULL;
+ q->rx_ring, q->rx_ring_dma);
+ q->rx_ring = NULL;
return -ENOMEM;
}
- addr = lp->rx_buffers_dma;
+ addr = q->rx_buffers_dma;
for (i = 0; i < AT91ETHER_MAX_RX_DESCR; i++) {
- desc = macb_rx_desc(lp, i);
+ desc = macb_rx_desc(q, i);
macb_set_addr(lp, desc, addr);
desc->ctrl = 0;
addr += AT91ETHER_MAX_RBUFF_SZ;
@@ -3014,10 +3420,10 @@ static int at91ether_start(struct net_device *dev)
desc->addr |= MACB_BIT(RX_WRAP);
/* Reset buffer index */
- lp->rx_tail = 0;
+ q->rx_tail = 0;
/* Program address of descriptor list in Rx Buffer Queue register */
- macb_writel(lp, RBQP, lp->rx_ring_dma);
+ macb_writel(lp, RBQP, q->rx_ring_dma);
/* Enable Receive and Transmit */
ctl = macb_readl(lp, NCR);
@@ -3064,6 +3470,7 @@ static int at91ether_open(struct net_device *dev)
static int at91ether_close(struct net_device *dev)
{
struct macb *lp = netdev_priv(dev);
+ struct macb_queue *q = &lp->queues[0];
u32 ctl;
/* Disable Receiver and Transmitter */
@@ -3084,13 +3491,13 @@ static int at91ether_close(struct net_device *dev)
dma_free_coherent(&lp->pdev->dev,
AT91ETHER_MAX_RX_DESCR *
macb_dma_desc_get_size(lp),
- lp->rx_ring, lp->rx_ring_dma);
- lp->rx_ring = NULL;
+ q->rx_ring, q->rx_ring_dma);
+ q->rx_ring = NULL;
dma_free_coherent(&lp->pdev->dev,
AT91ETHER_MAX_RX_DESCR * AT91ETHER_MAX_RBUFF_SZ,
- lp->rx_buffers, lp->rx_buffers_dma);
- lp->rx_buffers = NULL;
+ q->rx_buffers, q->rx_buffers_dma);
+ q->rx_buffers = NULL;
return 0;
}
@@ -3134,14 +3541,15 @@ static int at91ether_start_xmit(struct sk_buff *skb, struct net_device *dev)
static void at91ether_rx(struct net_device *dev)
{
struct macb *lp = netdev_priv(dev);
+ struct macb_queue *q = &lp->queues[0];
struct macb_dma_desc *desc;
unsigned char *p_recv;
struct sk_buff *skb;
unsigned int pktlen;
- desc = macb_rx_desc(lp, lp->rx_tail);
+ desc = macb_rx_desc(q, q->rx_tail);
while (desc->addr & MACB_BIT(RX_USED)) {
- p_recv = lp->rx_buffers + lp->rx_tail * AT91ETHER_MAX_RBUFF_SZ;
+ p_recv = q->rx_buffers + q->rx_tail * AT91ETHER_MAX_RBUFF_SZ;
pktlen = MACB_BF(RX_FRMLEN, desc->ctrl);
skb = netdev_alloc_skb(dev, pktlen + 2);
if (skb) {
@@ -3163,12 +3571,12 @@ static void at91ether_rx(struct net_device *dev)
desc->addr &= ~MACB_BIT(RX_USED);
/* wrap after last buffer */
- if (lp->rx_tail == AT91ETHER_MAX_RX_DESCR - 1)
- lp->rx_tail = 0;
+ if (q->rx_tail == AT91ETHER_MAX_RX_DESCR - 1)
+ q->rx_tail = 0;
else
- lp->rx_tail++;
+ q->rx_tail++;
- desc = macb_rx_desc(lp, lp->rx_tail);
+ desc = macb_rx_desc(q, q->rx_tail);
}
}
@@ -3394,7 +3802,6 @@ static int macb_probe(struct platform_device *pdev)
= macb_config->clk_init;
int (*init)(struct platform_device *) = macb_config->init;
struct device_node *np = pdev->dev.of_node;
- struct device_node *phy_node;
struct clk *pclk, *hclk = NULL, *tx_clk = NULL, *rx_clk = NULL;
unsigned int queue_mask, num_queues;
struct macb_platform_data *pdata;
@@ -3500,18 +3907,6 @@ static int macb_probe(struct platform_device *pdev)
else
macb_get_hwaddr(bp);
- /* Power up the PHY if there is a GPIO reset */
- phy_node = of_get_next_available_child(np, NULL);
- if (phy_node) {
- int gpio = of_get_named_gpio(phy_node, "reset-gpios", 0);
-
- if (gpio_is_valid(gpio)) {
- bp->reset_gpio = gpio_to_desc(gpio);
- gpiod_direction_output(bp->reset_gpio, 1);
- }
- }
- of_node_put(phy_node);
-
err = of_get_phy_mode(np);
if (err < 0) {
pdata = dev_get_platdata(&pdev->dev);
@@ -3558,10 +3953,6 @@ err_out_unregister_mdio:
of_phy_deregister_fixed_link(np);
mdiobus_free(bp->mii_bus);
- /* Shutdown the PHY if there is a GPIO reset */
- if (bp->reset_gpio)
- gpiod_set_value(bp->reset_gpio, 0);
-
err_out_free_netdev:
free_netdev(dev);
@@ -3592,10 +3983,6 @@ static int macb_remove(struct platform_device *pdev)
dev->phydev = NULL;
mdiobus_free(bp->mii_bus);
- /* Shutdown the PHY if there is a GPIO reset */
- if (bp->reset_gpio)
- gpiod_set_value(bp->reset_gpio, 0);
-
unregister_netdev(dev);
clk_disable_unprepare(bp->tx_clk);
clk_disable_unprepare(bp->hclk);
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index a063c36c4c58..52b3a6044f85 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -65,6 +65,11 @@ module_param(cpi_alg, int, S_IRUGO);
MODULE_PARM_DESC(cpi_alg,
"PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)");
+struct nicvf_xdp_tx {
+ u64 dma_addr;
+ u8 qidx;
+};
+
static inline u8 nicvf_netdev_qidx(struct nicvf *nic, u8 qidx)
{
if (nic->sqs_mode)
@@ -500,14 +505,29 @@ static int nicvf_init_resources(struct nicvf *nic)
return 0;
}
+static void nicvf_unmap_page(struct nicvf *nic, struct page *page, u64 dma_addr)
+{
+ /* Check if it's a recycled page, if not unmap the DMA mapping.
+ * Recycled page holds an extra reference.
+ */
+ if (page_ref_count(page) == 1) {
+ dma_addr &= PAGE_MASK;
+ dma_unmap_page_attrs(&nic->pdev->dev, dma_addr,
+ RCV_FRAG_LEN + XDP_HEADROOM,
+ DMA_FROM_DEVICE,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ }
+}
+
static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
struct cqe_rx_t *cqe_rx, struct snd_queue *sq,
struct sk_buff **skb)
{
struct xdp_buff xdp;
struct page *page;
+ struct nicvf_xdp_tx *xdp_tx = NULL;
u32 action;
- u16 len, offset = 0;
+ u16 len, err, offset = 0;
u64 dma_addr, cpu_addr;
void *orig_data;
@@ -521,7 +541,7 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
cpu_addr = (u64)phys_to_virt(cpu_addr);
page = virt_to_page((void *)cpu_addr);
- xdp.data_hard_start = page_address(page);
+ xdp.data_hard_start = page_address(page) + RCV_BUF_HEADROOM;
xdp.data = (void *)cpu_addr;
xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + len;
@@ -540,18 +560,7 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
switch (action) {
case XDP_PASS:
- /* Check if it's a recycled page, if not
- * unmap the DMA mapping.
- *
- * Recycled page holds an extra reference.
- */
- if (page_ref_count(page) == 1) {
- dma_addr &= PAGE_MASK;
- dma_unmap_page_attrs(&nic->pdev->dev, dma_addr,
- RCV_FRAG_LEN + XDP_PACKET_HEADROOM,
- DMA_FROM_DEVICE,
- DMA_ATTR_SKIP_CPU_SYNC);
- }
+ nicvf_unmap_page(nic, page, dma_addr);
/* Build SKB and pass on packet to network stack */
*skb = build_skb(xdp.data,
@@ -564,6 +573,20 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
case XDP_TX:
nicvf_xdp_sq_append_pkt(nic, sq, (u64)xdp.data, dma_addr, len);
return true;
+ case XDP_REDIRECT:
+ /* Save DMA address for use while transmitting */
+ xdp_tx = (struct nicvf_xdp_tx *)page_address(page);
+ xdp_tx->dma_addr = dma_addr;
+ xdp_tx->qidx = nicvf_netdev_qidx(nic, cqe_rx->rq_idx);
+
+ err = xdp_do_redirect(nic->pnicvf->netdev, &xdp, prog);
+ if (!err)
+ return true;
+
+ /* Free the page on error */
+ nicvf_unmap_page(nic, page, dma_addr);
+ put_page(page);
+ break;
default:
bpf_warn_invalid_xdp_action(action);
/* fall through */
@@ -571,18 +594,7 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
trace_xdp_exception(nic->netdev, prog, action);
/* fall through */
case XDP_DROP:
- /* Check if it's a recycled page, if not
- * unmap the DMA mapping.
- *
- * Recycled page holds an extra reference.
- */
- if (page_ref_count(page) == 1) {
- dma_addr &= PAGE_MASK;
- dma_unmap_page_attrs(&nic->pdev->dev, dma_addr,
- RCV_FRAG_LEN + XDP_PACKET_HEADROOM,
- DMA_FROM_DEVICE,
- DMA_ATTR_SKIP_CPU_SYNC);
- }
+ nicvf_unmap_page(nic, page, dma_addr);
put_page(page);
return true;
}
@@ -1764,6 +1776,50 @@ static int nicvf_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
}
}
+static int nicvf_xdp_xmit(struct net_device *netdev, struct xdp_buff *xdp)
+{
+ struct nicvf *nic = netdev_priv(netdev);
+ struct nicvf *snic = nic;
+ struct nicvf_xdp_tx *xdp_tx;
+ struct snd_queue *sq;
+ struct page *page;
+ int err, qidx;
+
+ if (!netif_running(netdev) || !nic->xdp_prog)
+ return -EINVAL;
+
+ page = virt_to_page(xdp->data);
+ xdp_tx = (struct nicvf_xdp_tx *)page_address(page);
+ qidx = xdp_tx->qidx;
+
+ if (xdp_tx->qidx >= nic->xdp_tx_queues)
+ return -EINVAL;
+
+ /* Get secondary Qset's info */
+ if (xdp_tx->qidx >= MAX_SND_QUEUES_PER_QS) {
+ qidx = xdp_tx->qidx / MAX_SND_QUEUES_PER_QS;
+ snic = (struct nicvf *)nic->snicvf[qidx - 1];
+ if (!snic)
+ return -EINVAL;
+ qidx = xdp_tx->qidx % MAX_SND_QUEUES_PER_QS;
+ }
+
+ sq = &snic->qs->sq[qidx];
+ err = nicvf_xdp_sq_append_pkt(snic, sq, (u64)xdp->data,
+ xdp_tx->dma_addr,
+ xdp->data_end - xdp->data);
+ if (err)
+ return -ENOMEM;
+
+ nicvf_xdp_sq_doorbell(snic, sq, qidx);
+ return 0;
+}
+
+static void nicvf_xdp_flush(struct net_device *dev)
+{
+ return;
+}
+
static const struct net_device_ops nicvf_netdev_ops = {
.ndo_open = nicvf_open,
.ndo_stop = nicvf_stop,
@@ -1775,6 +1831,8 @@ static const struct net_device_ops nicvf_netdev_ops = {
.ndo_fix_features = nicvf_fix_features,
.ndo_set_features = nicvf_set_features,
.ndo_bpf = nicvf_xdp,
+ .ndo_xdp_xmit = nicvf_xdp_xmit,
+ .ndo_xdp_flush = nicvf_xdp_flush,
};
static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
@@ -1833,6 +1891,11 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
nic->pdev = pdev;
nic->pnicvf = nic;
nic->max_queues = qcount;
+ /* If no of CPUs are too low, there won't be any queues left
+ * for XDP_TX, hence double it.
+ */
+ if (!nic->t88)
+ nic->max_queues *= 2;
/* MAP VF's configuration registers */
nic->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0);
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index a3d12dbde95b..f38ea349aa00 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -204,7 +204,7 @@ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, struct rbdr *rbdr,
/* Reserve space for header modifications by BPF program */
if (rbdr->is_xdp)
- buf_len += XDP_PACKET_HEADROOM;
+ buf_len += XDP_HEADROOM;
/* Check if it's recycled */
if (pgcache)
@@ -224,8 +224,9 @@ ret:
nic->rb_page = NULL;
return -ENOMEM;
}
+
if (pgcache)
- pgcache->dma_addr = *rbuf + XDP_PACKET_HEADROOM;
+ pgcache->dma_addr = *rbuf + XDP_HEADROOM;
nic->rb_page_offset += buf_len;
}
@@ -1236,7 +1237,7 @@ int nicvf_xdp_sq_append_pkt(struct nicvf *nic, struct snd_queue *sq,
int qentry;
if (subdesc_cnt > sq->xdp_free_cnt)
- return 0;
+ return -1;
qentry = nicvf_get_sq_desc(sq, subdesc_cnt);
@@ -1247,7 +1248,7 @@ int nicvf_xdp_sq_append_pkt(struct nicvf *nic, struct snd_queue *sq,
sq->xdp_desc_cnt += subdesc_cnt;
- return 1;
+ return 0;
}
/* Calculate no of SQ subdescriptors needed to transmit all
@@ -1625,7 +1626,7 @@ static void nicvf_unmap_rcv_buffer(struct nicvf *nic, u64 dma_addr,
if (page_ref_count(page) != 1)
return;
- len += XDP_PACKET_HEADROOM;
+ len += XDP_HEADROOM;
/* Receive buffers in XDP mode are mapped from page start */
dma_addr &= PAGE_MASK;
}
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index 67d1a3230773..178ab6e8e3c5 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
@@ -11,6 +11,7 @@
#include <linux/netdevice.h>
#include <linux/iommu.h>
+#include <linux/bpf.h>
#include "q_struct.h"
#define MAX_QUEUE_SET 128
@@ -92,6 +93,9 @@
#define RCV_FRAG_LEN (SKB_DATA_ALIGN(DMA_BUFFER_LEN + NET_SKB_PAD) + \
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+#define RCV_BUF_HEADROOM 128 /* To store dma address for XDP redirect */
+#define XDP_HEADROOM (XDP_PACKET_HEADROOM + RCV_BUF_HEADROOM)
+
#define MAX_CQES_FOR_TX ((SND_QUEUE_LEN / MIN_SQ_DESC_PER_PKT_XMIT) * \
MAX_CQE_PER_PKT_XMIT)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
index 605689957496..2e71e334d819 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
@@ -18,17 +18,15 @@
#ifndef __CUDBG_ENTITY_H__
#define __CUDBG_ENTITY_H__
-#define EDC0_FLAG 3
-#define EDC1_FLAG 4
+#define EDC0_FLAG 0
+#define EDC1_FLAG 1
+#define MC_FLAG 2
+#define MC0_FLAG 3
+#define MC1_FLAG 4
+#define HMA_FLAG 5
#define CUDBG_ENTITY_SIGNATURE 0xCCEDB001
-struct card_mem {
- u16 size_edc0;
- u16 size_edc1;
- u16 mem_flag;
-};
-
struct cudbg_mbox_log {
struct mbox_cmd entry;
u32 hi[MBOX_LEN / 8];
@@ -87,6 +85,48 @@ struct cudbg_tp_la {
u8 data[0];
};
+static const char * const cudbg_region[] = {
+ "DBQ contexts:", "IMSG contexts:", "FLM cache:", "TCBs:",
+ "Pstructs:", "Timers:", "Rx FL:", "Tx FL:", "Pstruct FL:",
+ "Tx payload:", "Rx payload:", "LE hash:", "iSCSI region:",
+ "TDDP region:", "TPT region:", "STAG region:", "RQ region:",
+ "RQUDP region:", "PBL region:", "TXPBL region:",
+ "DBVFIFO region:", "ULPRX state:", "ULPTX state:",
+ "On-chip queues:"
+};
+
+/* Memory region info relative to current memory (i.e. wrt 0). */
+struct cudbg_region_info {
+ bool exist; /* Does region exists in current memory? */
+ u32 start; /* Start wrt 0 */
+ u32 end; /* End wrt 0 */
+};
+
+struct cudbg_mem_desc {
+ u32 base;
+ u32 limit;
+ u32 idx;
+};
+
+struct cudbg_meminfo {
+ struct cudbg_mem_desc avail[4];
+ struct cudbg_mem_desc mem[ARRAY_SIZE(cudbg_region) + 3];
+ u32 avail_c;
+ u32 mem_c;
+ u32 up_ram_lo;
+ u32 up_ram_hi;
+ u32 up_extmem2_lo;
+ u32 up_extmem2_hi;
+ u32 rx_pages_data[3];
+ u32 tx_pages_data[4];
+ u32 p_structs;
+ u32 reserved[12];
+ u32 port_used[4];
+ u32 port_alloc[4];
+ u32 loopback_used[NCHAN];
+ u32 loopback_alloc[NCHAN];
+};
+
struct cudbg_cim_pif_la {
int size;
u8 data[0];
@@ -145,6 +185,7 @@ struct cudbg_tid_info_region_rev1 {
u32 reserved[16];
};
+#define CUDBG_LOWMEM_MAX_CTXT_QIDS 256
#define CUDBG_MAX_FL_QIDS 1024
struct cudbg_ch_cntxt {
@@ -334,6 +375,25 @@ static const u32 t5_pm_tx_array[][IREG_NUM_ELEM] = {
{0x8FF0, 0x8FF4, 0x10021, 0x1D}, /* t5_pm_tx_regs_10021_to_1003c */
};
+#define CUDBG_NUM_PCIE_CONFIG_REGS 0x61
+
+static const u32 t5_pcie_config_array[][2] = {
+ {0x0, 0x34},
+ {0x3c, 0x40},
+ {0x50, 0x64},
+ {0x70, 0x80},
+ {0x94, 0xa0},
+ {0xb0, 0xb8},
+ {0xd0, 0xd4},
+ {0x100, 0x128},
+ {0x140, 0x148},
+ {0x150, 0x164},
+ {0x170, 0x178},
+ {0x180, 0x194},
+ {0x1a0, 0x1b8},
+ {0x1c0, 0x208},
+};
+
static const u32 t6_ma_ireg_array[][IREG_NUM_ELEM] = {
{0x78f8, 0x78fc, 0xa000, 23}, /* t6_ma_regs_a000_to_a016 */
{0x78f8, 0x78fc, 0xa400, 30}, /* t6_ma_regs_a400_to_a41e */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h
index e10ff1ee62c5..e8173ae32158 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h
@@ -47,6 +47,8 @@ enum cudbg_dbg_entity_type {
CUDBG_CIM_OBQ_NCSI = 17,
CUDBG_EDC0 = 18,
CUDBG_EDC1 = 19,
+ CUDBG_MC0 = 20,
+ CUDBG_MC1 = 21,
CUDBG_RSS = 22,
CUDBG_RSS_VF_CONF = 25,
CUDBG_PATH_MTU = 27,
@@ -56,6 +58,7 @@ enum cudbg_dbg_entity_type {
CUDBG_SGE_INDIRECT = 37,
CUDBG_ULPRX_LA = 41,
CUDBG_TP_LA = 43,
+ CUDBG_MEMINFO = 44,
CUDBG_CIM_PIF_LA = 45,
CUDBG_CLK = 46,
CUDBG_CIM_OBQ_RXQ0 = 47,
@@ -63,6 +66,7 @@ enum cudbg_dbg_entity_type {
CUDBG_PCIE_INDIRECT = 50,
CUDBG_PM_INDIRECT = 51,
CUDBG_TID_INFO = 54,
+ CUDBG_PCIE_CONFIG = 55,
CUDBG_DUMP_CONTEXT = 56,
CUDBG_MPS_TCAM = 57,
CUDBG_VPD_DATA = 58,
@@ -74,6 +78,7 @@ enum cudbg_dbg_entity_type {
CUDBG_PBT_TABLE = 65,
CUDBG_MBOX_LOG = 66,
CUDBG_HMA_INDIRECT = 67,
+ CUDBG_HMA = 68,
CUDBG_MAX_ENTITY = 70,
};
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
index d699bf88d18f..d73fb6a85f8e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
@@ -15,12 +15,14 @@
*
*/
+#include <linux/sort.h>
+
#include "t4_regs.h"
#include "cxgb4.h"
#include "cudbg_if.h"
#include "cudbg_lib_common.h"
-#include "cudbg_lib.h"
#include "cudbg_entity.h"
+#include "cudbg_lib.h"
static void cudbg_write_and_release_buff(struct cudbg_buffer *pin_buff,
struct cudbg_buffer *dbg_buff)
@@ -84,6 +86,277 @@ static int cudbg_read_vpd_reg(struct adapter *padap, u32 addr, u32 len,
return 0;
}
+static int cudbg_mem_desc_cmp(const void *a, const void *b)
+{
+ return ((const struct cudbg_mem_desc *)a)->base -
+ ((const struct cudbg_mem_desc *)b)->base;
+}
+
+int cudbg_fill_meminfo(struct adapter *padap,
+ struct cudbg_meminfo *meminfo_buff)
+{
+ struct cudbg_mem_desc *md;
+ u32 lo, hi, used, alloc;
+ int n, i;
+
+ memset(meminfo_buff->avail, 0,
+ ARRAY_SIZE(meminfo_buff->avail) *
+ sizeof(struct cudbg_mem_desc));
+ memset(meminfo_buff->mem, 0,
+ (ARRAY_SIZE(cudbg_region) + 3) * sizeof(struct cudbg_mem_desc));
+ md = meminfo_buff->mem;
+
+ for (i = 0; i < ARRAY_SIZE(meminfo_buff->mem); i++) {
+ meminfo_buff->mem[i].limit = 0;
+ meminfo_buff->mem[i].idx = i;
+ }
+
+ /* Find and sort the populated memory ranges */
+ i = 0;
+ lo = t4_read_reg(padap, MA_TARGET_MEM_ENABLE_A);
+ if (lo & EDRAM0_ENABLE_F) {
+ hi = t4_read_reg(padap, MA_EDRAM0_BAR_A);
+ meminfo_buff->avail[i].base =
+ cudbg_mbytes_to_bytes(EDRAM0_BASE_G(hi));
+ meminfo_buff->avail[i].limit =
+ meminfo_buff->avail[i].base +
+ cudbg_mbytes_to_bytes(EDRAM0_SIZE_G(hi));
+ meminfo_buff->avail[i].idx = 0;
+ i++;
+ }
+
+ if (lo & EDRAM1_ENABLE_F) {
+ hi = t4_read_reg(padap, MA_EDRAM1_BAR_A);
+ meminfo_buff->avail[i].base =
+ cudbg_mbytes_to_bytes(EDRAM1_BASE_G(hi));
+ meminfo_buff->avail[i].limit =
+ meminfo_buff->avail[i].base +
+ cudbg_mbytes_to_bytes(EDRAM1_SIZE_G(hi));
+ meminfo_buff->avail[i].idx = 1;
+ i++;
+ }
+
+ if (is_t5(padap->params.chip)) {
+ if (lo & EXT_MEM0_ENABLE_F) {
+ hi = t4_read_reg(padap, MA_EXT_MEMORY0_BAR_A);
+ meminfo_buff->avail[i].base =
+ cudbg_mbytes_to_bytes(EXT_MEM_BASE_G(hi));
+ meminfo_buff->avail[i].limit =
+ meminfo_buff->avail[i].base +
+ cudbg_mbytes_to_bytes(EXT_MEM_SIZE_G(hi));
+ meminfo_buff->avail[i].idx = 3;
+ i++;
+ }
+
+ if (lo & EXT_MEM1_ENABLE_F) {
+ hi = t4_read_reg(padap, MA_EXT_MEMORY1_BAR_A);
+ meminfo_buff->avail[i].base =
+ cudbg_mbytes_to_bytes(EXT_MEM1_BASE_G(hi));
+ meminfo_buff->avail[i].limit =
+ meminfo_buff->avail[i].base +
+ cudbg_mbytes_to_bytes(EXT_MEM1_SIZE_G(hi));
+ meminfo_buff->avail[i].idx = 4;
+ i++;
+ }
+ } else {
+ if (lo & EXT_MEM_ENABLE_F) {
+ hi = t4_read_reg(padap, MA_EXT_MEMORY_BAR_A);
+ meminfo_buff->avail[i].base =
+ cudbg_mbytes_to_bytes(EXT_MEM_BASE_G(hi));
+ meminfo_buff->avail[i].limit =
+ meminfo_buff->avail[i].base +
+ cudbg_mbytes_to_bytes(EXT_MEM_SIZE_G(hi));
+ meminfo_buff->avail[i].idx = 2;
+ i++;
+ }
+
+ if (lo & HMA_MUX_F) {
+ hi = t4_read_reg(padap, MA_EXT_MEMORY1_BAR_A);
+ meminfo_buff->avail[i].base =
+ cudbg_mbytes_to_bytes(EXT_MEM1_BASE_G(hi));
+ meminfo_buff->avail[i].limit =
+ meminfo_buff->avail[i].base +
+ cudbg_mbytes_to_bytes(EXT_MEM1_SIZE_G(hi));
+ meminfo_buff->avail[i].idx = 5;
+ i++;
+ }
+ }
+
+ if (!i) /* no memory available */
+ return CUDBG_STATUS_ENTITY_NOT_FOUND;
+
+ meminfo_buff->avail_c = i;
+ sort(meminfo_buff->avail, i, sizeof(struct cudbg_mem_desc),
+ cudbg_mem_desc_cmp, NULL);
+ (md++)->base = t4_read_reg(padap, SGE_DBQ_CTXT_BADDR_A);
+ (md++)->base = t4_read_reg(padap, SGE_IMSG_CTXT_BADDR_A);
+ (md++)->base = t4_read_reg(padap, SGE_FLM_CACHE_BADDR_A);
+ (md++)->base = t4_read_reg(padap, TP_CMM_TCB_BASE_A);
+ (md++)->base = t4_read_reg(padap, TP_CMM_MM_BASE_A);
+ (md++)->base = t4_read_reg(padap, TP_CMM_TIMER_BASE_A);
+ (md++)->base = t4_read_reg(padap, TP_CMM_MM_RX_FLST_BASE_A);
+ (md++)->base = t4_read_reg(padap, TP_CMM_MM_TX_FLST_BASE_A);
+ (md++)->base = t4_read_reg(padap, TP_CMM_MM_PS_FLST_BASE_A);
+
+ /* the next few have explicit upper bounds */
+ md->base = t4_read_reg(padap, TP_PMM_TX_BASE_A);
+ md->limit = md->base - 1 +
+ t4_read_reg(padap, TP_PMM_TX_PAGE_SIZE_A) *
+ PMTXMAXPAGE_G(t4_read_reg(padap, TP_PMM_TX_MAX_PAGE_A));
+ md++;
+
+ md->base = t4_read_reg(padap, TP_PMM_RX_BASE_A);
+ md->limit = md->base - 1 +
+ t4_read_reg(padap, TP_PMM_RX_PAGE_SIZE_A) *
+ PMRXMAXPAGE_G(t4_read_reg(padap, TP_PMM_RX_MAX_PAGE_A));
+ md++;
+
+ if (t4_read_reg(padap, LE_DB_CONFIG_A) & HASHEN_F) {
+ if (CHELSIO_CHIP_VERSION(padap->params.chip) <= CHELSIO_T5) {
+ hi = t4_read_reg(padap, LE_DB_TID_HASHBASE_A) / 4;
+ md->base = t4_read_reg(padap, LE_DB_HASH_TID_BASE_A);
+ } else {
+ hi = t4_read_reg(padap, LE_DB_HASH_TID_BASE_A);
+ md->base = t4_read_reg(padap,
+ LE_DB_HASH_TBL_BASE_ADDR_A);
+ }
+ md->limit = 0;
+ } else {
+ md->base = 0;
+ md->idx = ARRAY_SIZE(cudbg_region); /* hide it */
+ }
+ md++;
+
+#define ulp_region(reg) do { \
+ md->base = t4_read_reg(padap, ULP_ ## reg ## _LLIMIT_A);\
+ (md++)->limit = t4_read_reg(padap, ULP_ ## reg ## _ULIMIT_A);\
+} while (0)
+
+ ulp_region(RX_ISCSI);
+ ulp_region(RX_TDDP);
+ ulp_region(TX_TPT);
+ ulp_region(RX_STAG);
+ ulp_region(RX_RQ);
+ ulp_region(RX_RQUDP);
+ ulp_region(RX_PBL);
+ ulp_region(TX_PBL);
+#undef ulp_region
+ md->base = 0;
+ md->idx = ARRAY_SIZE(cudbg_region);
+ if (!is_t4(padap->params.chip)) {
+ u32 fifo_size = t4_read_reg(padap, SGE_DBVFIFO_SIZE_A);
+ u32 sge_ctrl = t4_read_reg(padap, SGE_CONTROL2_A);
+ u32 size = 0;
+
+ if (is_t5(padap->params.chip)) {
+ if (sge_ctrl & VFIFO_ENABLE_F)
+ size = DBVFIFO_SIZE_G(fifo_size);
+ } else {
+ size = T6_DBVFIFO_SIZE_G(fifo_size);
+ }
+
+ if (size) {
+ md->base = BASEADDR_G(t4_read_reg(padap,
+ SGE_DBVFIFO_BADDR_A));
+ md->limit = md->base + (size << 2) - 1;
+ }
+ }
+
+ md++;
+
+ md->base = t4_read_reg(padap, ULP_RX_CTX_BASE_A);
+ md->limit = 0;
+ md++;
+ md->base = t4_read_reg(padap, ULP_TX_ERR_TABLE_BASE_A);
+ md->limit = 0;
+ md++;
+
+ md->base = padap->vres.ocq.start;
+ if (padap->vres.ocq.size)
+ md->limit = md->base + padap->vres.ocq.size - 1;
+ else
+ md->idx = ARRAY_SIZE(cudbg_region); /* hide it */
+ md++;
+
+ /* add any address-space holes, there can be up to 3 */
+ for (n = 0; n < i - 1; n++)
+ if (meminfo_buff->avail[n].limit <
+ meminfo_buff->avail[n + 1].base)
+ (md++)->base = meminfo_buff->avail[n].limit;
+
+ if (meminfo_buff->avail[n].limit)
+ (md++)->base = meminfo_buff->avail[n].limit;
+
+ n = md - meminfo_buff->mem;
+ meminfo_buff->mem_c = n;
+
+ sort(meminfo_buff->mem, n, sizeof(struct cudbg_mem_desc),
+ cudbg_mem_desc_cmp, NULL);
+
+ lo = t4_read_reg(padap, CIM_SDRAM_BASE_ADDR_A);
+ hi = t4_read_reg(padap, CIM_SDRAM_ADDR_SIZE_A) + lo - 1;
+ meminfo_buff->up_ram_lo = lo;
+ meminfo_buff->up_ram_hi = hi;
+
+ lo = t4_read_reg(padap, CIM_EXTMEM2_BASE_ADDR_A);
+ hi = t4_read_reg(padap, CIM_EXTMEM2_ADDR_SIZE_A) + lo - 1;
+ meminfo_buff->up_extmem2_lo = lo;
+ meminfo_buff->up_extmem2_hi = hi;
+
+ lo = t4_read_reg(padap, TP_PMM_RX_MAX_PAGE_A);
+ meminfo_buff->rx_pages_data[0] = PMRXMAXPAGE_G(lo);
+ meminfo_buff->rx_pages_data[1] =
+ t4_read_reg(padap, TP_PMM_RX_PAGE_SIZE_A) >> 10;
+ meminfo_buff->rx_pages_data[2] = (lo & PMRXNUMCHN_F) ? 2 : 1;
+
+ lo = t4_read_reg(padap, TP_PMM_TX_MAX_PAGE_A);
+ hi = t4_read_reg(padap, TP_PMM_TX_PAGE_SIZE_A);
+ meminfo_buff->tx_pages_data[0] = PMTXMAXPAGE_G(lo);
+ meminfo_buff->tx_pages_data[1] =
+ hi >= (1 << 20) ? (hi >> 20) : (hi >> 10);
+ meminfo_buff->tx_pages_data[2] =
+ hi >= (1 << 20) ? 'M' : 'K';
+ meminfo_buff->tx_pages_data[3] = 1 << PMTXNUMCHN_G(lo);
+
+ meminfo_buff->p_structs = t4_read_reg(padap, TP_CMM_MM_MAX_PSTRUCT_A);
+
+ for (i = 0; i < 4; i++) {
+ if (CHELSIO_CHIP_VERSION(padap->params.chip) > CHELSIO_T5)
+ lo = t4_read_reg(padap,
+ MPS_RX_MAC_BG_PG_CNT0_A + i * 4);
+ else
+ lo = t4_read_reg(padap, MPS_RX_PG_RSV0_A + i * 4);
+ if (is_t5(padap->params.chip)) {
+ used = T5_USED_G(lo);
+ alloc = T5_ALLOC_G(lo);
+ } else {
+ used = USED_G(lo);
+ alloc = ALLOC_G(lo);
+ }
+ meminfo_buff->port_used[i] = used;
+ meminfo_buff->port_alloc[i] = alloc;
+ }
+
+ for (i = 0; i < padap->params.arch.nchan; i++) {
+ if (CHELSIO_CHIP_VERSION(padap->params.chip) > CHELSIO_T5)
+ lo = t4_read_reg(padap,
+ MPS_RX_LPBK_BG_PG_CNT0_A + i * 4);
+ else
+ lo = t4_read_reg(padap, MPS_RX_PG_RSV4_A + i * 4);
+ if (is_t5(padap->params.chip)) {
+ used = T5_USED_G(lo);
+ alloc = T5_ALLOC_G(lo);
+ } else {
+ used = USED_G(lo);
+ alloc = ALLOC_G(lo);
+ }
+ meminfo_buff->loopback_used[i] = used;
+ meminfo_buff->loopback_alloc[i] = alloc;
+ }
+
+ return 0;
+}
+
int cudbg_collect_reg_dump(struct cudbg_init *pdbg_init,
struct cudbg_buffer *dbg_buff,
struct cudbg_error *cudbg_err)
@@ -420,23 +693,211 @@ int cudbg_collect_obq_sge_rx_q1(struct cudbg_init *pdbg_init,
return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 7);
}
+static int cudbg_meminfo_get_mem_index(struct adapter *padap,
+ struct cudbg_meminfo *mem_info,
+ u8 mem_type, u8 *idx)
+{
+ u8 i, flag;
+
+ switch (mem_type) {
+ case MEM_EDC0:
+ flag = EDC0_FLAG;
+ break;
+ case MEM_EDC1:
+ flag = EDC1_FLAG;
+ break;
+ case MEM_MC0:
+ /* Some T5 cards have both MC0 and MC1. */
+ flag = is_t5(padap->params.chip) ? MC0_FLAG : MC_FLAG;
+ break;
+ case MEM_MC1:
+ flag = MC1_FLAG;
+ break;
+ case MEM_HMA:
+ flag = HMA_FLAG;
+ break;
+ default:
+ return CUDBG_STATUS_ENTITY_NOT_FOUND;
+ }
+
+ for (i = 0; i < mem_info->avail_c; i++) {
+ if (mem_info->avail[i].idx == flag) {
+ *idx = i;
+ return 0;
+ }
+ }
+
+ return CUDBG_STATUS_ENTITY_NOT_FOUND;
+}
+
+/* Fetch the @region_name's start and end from @meminfo. */
+static int cudbg_get_mem_region(struct adapter *padap,
+ struct cudbg_meminfo *meminfo,
+ u8 mem_type, const char *region_name,
+ struct cudbg_mem_desc *mem_desc)
+{
+ u8 mc, found = 0;
+ u32 i, idx = 0;
+ int rc;
+
+ rc = cudbg_meminfo_get_mem_index(padap, meminfo, mem_type, &mc);
+ if (rc)
+ return rc;
+
+ for (i = 0; i < ARRAY_SIZE(cudbg_region); i++) {
+ if (!strcmp(cudbg_region[i], region_name)) {
+ found = 1;
+ idx = i;
+ break;
+ }
+ }
+ if (!found)
+ return -EINVAL;
+
+ found = 0;
+ for (i = 0; i < meminfo->mem_c; i++) {
+ if (meminfo->mem[i].idx >= ARRAY_SIZE(cudbg_region))
+ continue; /* Skip holes */
+
+ if (!(meminfo->mem[i].limit))
+ meminfo->mem[i].limit =
+ i < meminfo->mem_c - 1 ?
+ meminfo->mem[i + 1].base - 1 : ~0;
+
+ if (meminfo->mem[i].idx == idx) {
+ /* Check if the region exists in @mem_type memory */
+ if (meminfo->mem[i].base < meminfo->avail[mc].base &&
+ meminfo->mem[i].limit < meminfo->avail[mc].base)
+ return -EINVAL;
+
+ if (meminfo->mem[i].base > meminfo->avail[mc].limit)
+ return -EINVAL;
+
+ memcpy(mem_desc, &meminfo->mem[i],
+ sizeof(struct cudbg_mem_desc));
+ found = 1;
+ break;
+ }
+ }
+ if (!found)
+ return -EINVAL;
+
+ return 0;
+}
+
+/* Fetch and update the start and end of the requested memory region w.r.t 0
+ * in the corresponding EDC/MC/HMA.
+ */
+static int cudbg_get_mem_relative(struct adapter *padap,
+ struct cudbg_meminfo *meminfo,
+ u8 mem_type, u32 *out_base, u32 *out_end)
+{
+ u8 mc_idx;
+ int rc;
+
+ rc = cudbg_meminfo_get_mem_index(padap, meminfo, mem_type, &mc_idx);
+ if (rc)
+ return rc;
+
+ if (*out_base < meminfo->avail[mc_idx].base)
+ *out_base = 0;
+ else
+ *out_base -= meminfo->avail[mc_idx].base;
+
+ if (*out_end > meminfo->avail[mc_idx].limit)
+ *out_end = meminfo->avail[mc_idx].limit;
+ else
+ *out_end -= meminfo->avail[mc_idx].base;
+
+ return 0;
+}
+
+/* Get TX and RX Payload region */
+static int cudbg_get_payload_range(struct adapter *padap, u8 mem_type,
+ const char *region_name,
+ struct cudbg_region_info *payload)
+{
+ struct cudbg_mem_desc mem_desc = { 0 };
+ struct cudbg_meminfo meminfo;
+ int rc;
+
+ rc = cudbg_fill_meminfo(padap, &meminfo);
+ if (rc)
+ return rc;
+
+ rc = cudbg_get_mem_region(padap, &meminfo, mem_type, region_name,
+ &mem_desc);
+ if (rc) {
+ payload->exist = false;
+ return 0;
+ }
+
+ payload->exist = true;
+ payload->start = mem_desc.base;
+ payload->end = mem_desc.limit;
+
+ return cudbg_get_mem_relative(padap, &meminfo, mem_type,
+ &payload->start, &payload->end);
+}
+
+#define CUDBG_YIELD_ITERATION 256
+
static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init,
struct cudbg_buffer *dbg_buff, u8 mem_type,
unsigned long tot_len,
struct cudbg_error *cudbg_err)
{
+ static const char * const region_name[] = { "Tx payload:",
+ "Rx payload:" };
unsigned long bytes, bytes_left, bytes_read = 0;
struct adapter *padap = pdbg_init->adap;
struct cudbg_buffer temp_buff = { 0 };
+ struct cudbg_region_info payload[2];
+ u32 yield_count = 0;
int rc = 0;
+ u8 i;
+
+ /* Get TX/RX Payload region range if they exist */
+ memset(payload, 0, sizeof(payload));
+ for (i = 0; i < ARRAY_SIZE(region_name); i++) {
+ rc = cudbg_get_payload_range(padap, mem_type, region_name[i],
+ &payload[i]);
+ if (rc)
+ return rc;
+
+ if (payload[i].exist) {
+ /* Align start and end to avoid wrap around */
+ payload[i].start = roundup(payload[i].start,
+ CUDBG_CHUNK_SIZE);
+ payload[i].end = rounddown(payload[i].end,
+ CUDBG_CHUNK_SIZE);
+ }
+ }
bytes_left = tot_len;
while (bytes_left > 0) {
+ /* As MC size is huge and read through PIO access, this
+ * loop will hold cpu for a longer time. OS may think that
+ * the process is hanged and will generate CPU stall traces.
+ * So yield the cpu regularly.
+ */
+ yield_count++;
+ if (!(yield_count % CUDBG_YIELD_ITERATION))
+ schedule();
+
bytes = min_t(unsigned long, bytes_left,
(unsigned long)CUDBG_CHUNK_SIZE);
rc = cudbg_get_buff(dbg_buff, bytes, &temp_buff);
if (rc)
return rc;
+
+ for (i = 0; i < ARRAY_SIZE(payload); i++)
+ if (payload[i].exist &&
+ bytes_read >= payload[i].start &&
+ bytes_read + bytes <= payload[i].end)
+ /* TX and RX Payload regions can't overlap */
+ goto skip_read;
+
spin_lock(&padap->win0_lock);
rc = t4_memory_rw(padap, MEMWIN_NIC, mem_type,
bytes_read, bytes,
@@ -448,6 +909,8 @@ static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init,
cudbg_put_buff(&temp_buff, dbg_buff);
return rc;
}
+
+skip_read:
bytes_left -= bytes;
bytes_read += bytes;
cudbg_write_and_release_buff(&temp_buff, dbg_buff);
@@ -455,27 +918,6 @@ static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init,
return rc;
}
-static void cudbg_collect_mem_info(struct cudbg_init *pdbg_init,
- struct card_mem *mem_info)
-{
- struct adapter *padap = pdbg_init->adap;
- u32 value;
-
- value = t4_read_reg(padap, MA_EDRAM0_BAR_A);
- value = EDRAM0_SIZE_G(value);
- mem_info->size_edc0 = (u16)value;
-
- value = t4_read_reg(padap, MA_EDRAM1_BAR_A);
- value = EDRAM1_SIZE_G(value);
- mem_info->size_edc1 = (u16)value;
-
- value = t4_read_reg(padap, MA_TARGET_MEM_ENABLE_A);
- if (value & EDRAM0_ENABLE_F)
- mem_info->mem_flag |= (1 << EDC0_FLAG);
- if (value & EDRAM1_ENABLE_F)
- mem_info->mem_flag |= (1 << EDC1_FLAG);
-}
-
static void cudbg_t4_fwcache(struct cudbg_init *pdbg_init,
struct cudbg_error *cudbg_err)
{
@@ -495,37 +937,25 @@ static int cudbg_collect_mem_region(struct cudbg_init *pdbg_init,
struct cudbg_error *cudbg_err,
u8 mem_type)
{
- struct card_mem mem_info = {0};
- unsigned long flag, size;
+ struct adapter *padap = pdbg_init->adap;
+ struct cudbg_meminfo mem_info;
+ unsigned long size;
+ u8 mc_idx;
int rc;
+ memset(&mem_info, 0, sizeof(struct cudbg_meminfo));
+ rc = cudbg_fill_meminfo(padap, &mem_info);
+ if (rc)
+ return rc;
+
cudbg_t4_fwcache(pdbg_init, cudbg_err);
- cudbg_collect_mem_info(pdbg_init, &mem_info);
- switch (mem_type) {
- case MEM_EDC0:
- flag = (1 << EDC0_FLAG);
- size = cudbg_mbytes_to_bytes(mem_info.size_edc0);
- break;
- case MEM_EDC1:
- flag = (1 << EDC1_FLAG);
- size = cudbg_mbytes_to_bytes(mem_info.size_edc1);
- break;
- default:
- rc = CUDBG_STATUS_ENTITY_NOT_FOUND;
- goto err;
- }
+ rc = cudbg_meminfo_get_mem_index(padap, &mem_info, mem_type, &mc_idx);
+ if (rc)
+ return rc;
- if (mem_info.mem_flag & flag) {
- rc = cudbg_read_fw_mem(pdbg_init, dbg_buff, mem_type,
- size, cudbg_err);
- if (rc)
- goto err;
- } else {
- rc = CUDBG_STATUS_ENTITY_NOT_FOUND;
- goto err;
- }
-err:
- return rc;
+ size = mem_info.avail[mc_idx].limit - mem_info.avail[mc_idx].base;
+ return cudbg_read_fw_mem(pdbg_init, dbg_buff, mem_type, size,
+ cudbg_err);
}
int cudbg_collect_edc0_meminfo(struct cudbg_init *pdbg_init,
@@ -544,6 +974,30 @@ int cudbg_collect_edc1_meminfo(struct cudbg_init *pdbg_init,
MEM_EDC1);
}
+int cudbg_collect_mc0_meminfo(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err)
+{
+ return cudbg_collect_mem_region(pdbg_init, dbg_buff, cudbg_err,
+ MEM_MC0);
+}
+
+int cudbg_collect_mc1_meminfo(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err)
+{
+ return cudbg_collect_mem_region(pdbg_init, dbg_buff, cudbg_err,
+ MEM_MC1);
+}
+
+int cudbg_collect_hma_meminfo(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err)
+{
+ return cudbg_collect_mem_region(pdbg_init, dbg_buff, cudbg_err,
+ MEM_HMA);
+}
+
int cudbg_collect_rss(struct cudbg_init *pdbg_init,
struct cudbg_buffer *dbg_buff,
struct cudbg_error *cudbg_err)
@@ -843,6 +1297,31 @@ int cudbg_collect_tp_la(struct cudbg_init *pdbg_init,
return rc;
}
+int cudbg_collect_meminfo(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err)
+{
+ struct adapter *padap = pdbg_init->adap;
+ struct cudbg_buffer temp_buff = { 0 };
+ struct cudbg_meminfo *meminfo_buff;
+ int rc;
+
+ rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_meminfo), &temp_buff);
+ if (rc)
+ return rc;
+
+ meminfo_buff = (struct cudbg_meminfo *)temp_buff.data;
+ rc = cudbg_fill_meminfo(padap, meminfo_buff);
+ if (rc) {
+ cudbg_err->sys_err = rc;
+ cudbg_put_buff(&temp_buff, dbg_buff);
+ return rc;
+ }
+
+ cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+ return rc;
+}
+
int cudbg_collect_cim_pif_la(struct cudbg_init *pdbg_init,
struct cudbg_buffer *dbg_buff,
struct cudbg_error *cudbg_err)
@@ -1115,22 +1594,135 @@ int cudbg_collect_tid(struct cudbg_init *pdbg_init,
return rc;
}
-int cudbg_dump_context_size(struct adapter *padap)
+int cudbg_collect_pcie_config(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err)
{
- u32 value, size;
+ struct adapter *padap = pdbg_init->adap;
+ struct cudbg_buffer temp_buff = { 0 };
+ u32 size, *value, j;
+ int i, rc, n;
+
+ size = sizeof(u32) * CUDBG_NUM_PCIE_CONFIG_REGS;
+ n = sizeof(t5_pcie_config_array) / (2 * sizeof(u32));
+ rc = cudbg_get_buff(dbg_buff, size, &temp_buff);
+ if (rc)
+ return rc;
+
+ value = (u32 *)temp_buff.data;
+ for (i = 0; i < n; i++) {
+ for (j = t5_pcie_config_array[i][0];
+ j <= t5_pcie_config_array[i][1]; j += 4) {
+ t4_hw_pci_read_cfg4(padap, j, value);
+ value++;
+ }
+ }
+ cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+ return rc;
+}
+
+static int cudbg_sge_ctxt_check_valid(u32 *buf, int type)
+{
+ int index, bit, bit_pos = 0;
+
+ switch (type) {
+ case CTXT_EGRESS:
+ bit_pos = 176;
+ break;
+ case CTXT_INGRESS:
+ bit_pos = 141;
+ break;
+ case CTXT_FLM:
+ bit_pos = 89;
+ break;
+ }
+ index = bit_pos / 32;
+ bit = bit_pos % 32;
+ return buf[index] & (1U << bit);
+}
+
+static int cudbg_get_ctxt_region_info(struct adapter *padap,
+ struct cudbg_region_info *ctx_info,
+ u8 *mem_type)
+{
+ struct cudbg_mem_desc mem_desc;
+ struct cudbg_meminfo meminfo;
+ u32 i, j, value, found;
u8 flq;
+ int rc;
+ rc = cudbg_fill_meminfo(padap, &meminfo);
+ if (rc)
+ return rc;
+
+ /* Get EGRESS and INGRESS context region size */
+ for (i = CTXT_EGRESS; i <= CTXT_INGRESS; i++) {
+ found = 0;
+ memset(&mem_desc, 0, sizeof(struct cudbg_mem_desc));
+ for (j = 0; j < ARRAY_SIZE(meminfo.avail); j++) {
+ rc = cudbg_get_mem_region(padap, &meminfo, j,
+ cudbg_region[i],
+ &mem_desc);
+ if (!rc) {
+ found = 1;
+ rc = cudbg_get_mem_relative(padap, &meminfo, j,
+ &mem_desc.base,
+ &mem_desc.limit);
+ if (rc) {
+ ctx_info[i].exist = false;
+ break;
+ }
+ ctx_info[i].exist = true;
+ ctx_info[i].start = mem_desc.base;
+ ctx_info[i].end = mem_desc.limit;
+ mem_type[i] = j;
+ break;
+ }
+ }
+ if (!found)
+ ctx_info[i].exist = false;
+ }
+
+ /* Get FLM and CNM max qid. */
value = t4_read_reg(padap, SGE_FLM_CFG_A);
/* Get number of data freelist queues */
flq = HDRSTARTFLQ_G(value);
- size = CUDBG_MAX_FL_QIDS >> flq;
+ ctx_info[CTXT_FLM].exist = true;
+ ctx_info[CTXT_FLM].end = (CUDBG_MAX_FL_QIDS >> flq) * SGE_CTXT_SIZE;
- /* Add extra space for congestion manager contexts.
- * The number of CONM contexts are same as number of freelist
+ /* The number of CONM contexts are same as number of freelist
* queues.
*/
- size += size;
+ ctx_info[CTXT_CNM].exist = true;
+ ctx_info[CTXT_CNM].end = ctx_info[CTXT_FLM].end;
+
+ return 0;
+}
+
+int cudbg_dump_context_size(struct adapter *padap)
+{
+ struct cudbg_region_info region_info[CTXT_CNM + 1] = { {0} };
+ u8 mem_type[CTXT_INGRESS + 1] = { 0 };
+ u32 i, size = 0;
+ int rc;
+
+ /* Get max valid qid for each type of queue */
+ rc = cudbg_get_ctxt_region_info(padap, region_info, mem_type);
+ if (rc)
+ return rc;
+
+ for (i = 0; i < CTXT_CNM; i++) {
+ if (!region_info[i].exist) {
+ if (i == CTXT_EGRESS || i == CTXT_INGRESS)
+ size += CUDBG_LOWMEM_MAX_CTXT_QIDS *
+ SGE_CTXT_SIZE;
+ continue;
+ }
+
+ size += (region_info[i].end - region_info[i].start + 1) /
+ SGE_CTXT_SIZE;
+ }
return size * sizeof(struct cudbg_ch_cntxt);
}
@@ -1153,16 +1745,54 @@ static void cudbg_read_sge_ctxt(struct cudbg_init *pdbg_init, u32 cid,
t4_sge_ctxt_rd_bd(padap, cid, ctype, data);
}
+static void cudbg_get_sge_ctxt_fw(struct cudbg_init *pdbg_init, u32 max_qid,
+ u8 ctxt_type,
+ struct cudbg_ch_cntxt **out_buff)
+{
+ struct cudbg_ch_cntxt *buff = *out_buff;
+ int rc;
+ u32 j;
+
+ for (j = 0; j < max_qid; j++) {
+ cudbg_read_sge_ctxt(pdbg_init, j, ctxt_type, buff->data);
+ rc = cudbg_sge_ctxt_check_valid(buff->data, ctxt_type);
+ if (!rc)
+ continue;
+
+ buff->cntxt_type = ctxt_type;
+ buff->cntxt_id = j;
+ buff++;
+ if (ctxt_type == CTXT_FLM) {
+ cudbg_read_sge_ctxt(pdbg_init, j, CTXT_CNM, buff->data);
+ buff->cntxt_type = CTXT_CNM;
+ buff->cntxt_id = j;
+ buff++;
+ }
+ }
+
+ *out_buff = buff;
+}
+
int cudbg_collect_dump_context(struct cudbg_init *pdbg_init,
struct cudbg_buffer *dbg_buff,
struct cudbg_error *cudbg_err)
{
+ struct cudbg_region_info region_info[CTXT_CNM + 1] = { {0} };
struct adapter *padap = pdbg_init->adap;
+ u32 j, size, max_ctx_size, max_ctx_qid;
+ u8 mem_type[CTXT_INGRESS + 1] = { 0 };
struct cudbg_buffer temp_buff = { 0 };
struct cudbg_ch_cntxt *buff;
- u32 size, i = 0;
+ u64 *dst_off, *src_off;
+ u8 *ctx_buf;
+ u8 i, k;
int rc;
+ /* Get max valid qid for each type of queue */
+ rc = cudbg_get_ctxt_region_info(padap, region_info, mem_type);
+ if (rc)
+ return rc;
+
rc = cudbg_dump_context_size(padap);
if (rc <= 0)
return CUDBG_STATUS_ENTITY_NOT_FOUND;
@@ -1172,23 +1802,79 @@ int cudbg_collect_dump_context(struct cudbg_init *pdbg_init,
if (rc)
return rc;
+ /* Get buffer with enough space to read the biggest context
+ * region in memory.
+ */
+ max_ctx_size = max(region_info[CTXT_EGRESS].end -
+ region_info[CTXT_EGRESS].start + 1,
+ region_info[CTXT_INGRESS].end -
+ region_info[CTXT_INGRESS].start + 1);
+
+ ctx_buf = kvzalloc(max_ctx_size, GFP_KERNEL);
+ if (!ctx_buf) {
+ cudbg_put_buff(&temp_buff, dbg_buff);
+ return -ENOMEM;
+ }
+
buff = (struct cudbg_ch_cntxt *)temp_buff.data;
- while (size > 0) {
- buff->cntxt_type = CTXT_FLM;
- buff->cntxt_id = i;
- cudbg_read_sge_ctxt(pdbg_init, i, CTXT_FLM, buff->data);
- buff++;
- size -= sizeof(struct cudbg_ch_cntxt);
- buff->cntxt_type = CTXT_CNM;
- buff->cntxt_id = i;
- cudbg_read_sge_ctxt(pdbg_init, i, CTXT_CNM, buff->data);
- buff++;
- size -= sizeof(struct cudbg_ch_cntxt);
+ /* Collect EGRESS and INGRESS context data.
+ * In case of failures, fallback to collecting via FW or
+ * backdoor access.
+ */
+ for (i = CTXT_EGRESS; i <= CTXT_INGRESS; i++) {
+ if (!region_info[i].exist) {
+ max_ctx_qid = CUDBG_LOWMEM_MAX_CTXT_QIDS;
+ cudbg_get_sge_ctxt_fw(pdbg_init, max_ctx_qid, i,
+ &buff);
+ continue;
+ }
- i++;
+ max_ctx_size = region_info[i].end - region_info[i].start + 1;
+ max_ctx_qid = max_ctx_size / SGE_CTXT_SIZE;
+
+ t4_sge_ctxt_flush(padap, padap->mbox, i);
+ rc = t4_memory_rw(padap, MEMWIN_NIC, mem_type[i],
+ region_info[i].start, max_ctx_size,
+ (__be32 *)ctx_buf, 1);
+ if (rc) {
+ max_ctx_qid = CUDBG_LOWMEM_MAX_CTXT_QIDS;
+ cudbg_get_sge_ctxt_fw(pdbg_init, max_ctx_qid, i,
+ &buff);
+ continue;
+ }
+
+ for (j = 0; j < max_ctx_qid; j++) {
+ src_off = (u64 *)(ctx_buf + j * SGE_CTXT_SIZE);
+ dst_off = (u64 *)buff->data;
+
+ /* The data is stored in 64-bit cpu order. Convert it
+ * to big endian before parsing.
+ */
+ for (k = 0; k < SGE_CTXT_SIZE / sizeof(u64); k++)
+ dst_off[k] = cpu_to_be64(src_off[k]);
+
+ rc = cudbg_sge_ctxt_check_valid(buff->data, i);
+ if (!rc)
+ continue;
+
+ buff->cntxt_type = i;
+ buff->cntxt_id = j;
+ buff++;
+ }
}
+ kvfree(ctx_buf);
+
+ /* Collect FREELIST and CONGESTION MANAGER contexts */
+ max_ctx_size = region_info[CTXT_FLM].end -
+ region_info[CTXT_FLM].start + 1;
+ max_ctx_qid = max_ctx_size / SGE_CTXT_SIZE;
+ /* Since FLM and CONM are 1-to-1 mapped, the below function
+ * will fetch both FLM and CONM contexts.
+ */
+ cudbg_get_sge_ctxt_fw(pdbg_init, max_ctx_qid, CTXT_FLM, &buff);
+
cudbg_write_and_release_buff(&temp_buff, dbg_buff);
return rc;
}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h
index caeee8e33e86..eebefe7cd18e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h
@@ -75,6 +75,12 @@ int cudbg_collect_edc0_meminfo(struct cudbg_init *pdbg_init,
int cudbg_collect_edc1_meminfo(struct cudbg_init *pdbg_init,
struct cudbg_buffer *dbg_buff,
struct cudbg_error *cudbg_err);
+int cudbg_collect_mc0_meminfo(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
+int cudbg_collect_mc1_meminfo(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
int cudbg_collect_rss(struct cudbg_init *pdbg_init,
struct cudbg_buffer *dbg_buff,
struct cudbg_error *cudbg_err);
@@ -102,6 +108,9 @@ int cudbg_collect_ulprx_la(struct cudbg_init *pdbg_init,
int cudbg_collect_tp_la(struct cudbg_init *pdbg_init,
struct cudbg_buffer *dbg_buff,
struct cudbg_error *cudbg_err);
+int cudbg_collect_meminfo(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
int cudbg_collect_cim_pif_la(struct cudbg_init *pdbg_init,
struct cudbg_buffer *dbg_buff,
struct cudbg_error *cudbg_err);
@@ -123,6 +132,9 @@ int cudbg_collect_pm_indirect(struct cudbg_init *pdbg_init,
int cudbg_collect_tid(struct cudbg_init *pdbg_init,
struct cudbg_buffer *dbg_buff,
struct cudbg_error *cudbg_err);
+int cudbg_collect_pcie_config(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
int cudbg_collect_dump_context(struct cudbg_init *pdbg_init,
struct cudbg_buffer *dbg_buff,
struct cudbg_error *cudbg_err);
@@ -156,6 +168,9 @@ int cudbg_collect_mbox_log(struct cudbg_init *pdbg_init,
int cudbg_collect_hma_indirect(struct cudbg_init *pdbg_init,
struct cudbg_buffer *dbg_buff,
struct cudbg_error *cudbg_err);
+int cudbg_collect_hma_meminfo(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
struct cudbg_entity_hdr *cudbg_get_entity_hdr(void *outbuf, int i);
void cudbg_align_debug_buffer(struct cudbg_buffer *dbg_buff,
@@ -163,7 +178,8 @@ void cudbg_align_debug_buffer(struct cudbg_buffer *dbg_buff,
u32 cudbg_cim_obq_size(struct adapter *padap, int qid);
int cudbg_dump_context_size(struct adapter *padap);
-struct cudbg_tcam;
+int cudbg_fill_meminfo(struct adapter *padap,
+ struct cudbg_meminfo *meminfo_buff);
void cudbg_fill_le_tcam_info(struct adapter *padap,
struct cudbg_tcam *tcam_region);
#endif /* __CUDBG_LIB_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 6f9fa6e3c42a..b1df2aa6be94 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -77,7 +77,8 @@ enum {
MEM_EDC1,
MEM_MC,
MEM_MC0 = MEM_MC,
- MEM_MC1
+ MEM_MC1,
+ MEM_HMA,
};
enum {
@@ -1423,6 +1424,21 @@ static inline void init_rspq(struct adapter *adap, struct sge_rspq *q,
q->size = size;
}
+/**
+ * t4_is_inserted_mod_type - is a plugged in Firmware Module Type
+ * @fw_mod_type: the Firmware Mofule Type
+ *
+ * Return whether the Firmware Module Type represents a real Transceiver
+ * Module/Cable Module Type which has been inserted.
+ */
+static inline bool t4_is_inserted_mod_type(unsigned int fw_mod_type)
+{
+ return (fw_mod_type != FW_PORT_MOD_TYPE_NONE &&
+ fw_mod_type != FW_PORT_MOD_TYPE_NOTSUPPORTED &&
+ fw_mod_type != FW_PORT_MOD_TYPE_UNKNOWN &&
+ fw_mod_type != FW_PORT_MOD_TYPE_ERROR);
+}
+
void t4_write_indirect(struct adapter *adap, unsigned int addr_reg,
unsigned int data_reg, const u32 *vals,
unsigned int nregs, unsigned int start_idx);
@@ -1653,7 +1669,7 @@ int t4_ctrl_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf,
unsigned int vf, unsigned int eqid);
int t4_ofld_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf,
unsigned int vf, unsigned int eqid);
-int t4_sge_ctxt_flush(struct adapter *adap, unsigned int mbox);
+int t4_sge_ctxt_flush(struct adapter *adap, unsigned int mbox, int ctxt_type);
void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl);
int t4_update_port_info(struct port_info *pi);
int t4_get_link_params(struct port_info *pi, unsigned int *link_okp,
@@ -1696,6 +1712,9 @@ void t4_uld_mem_free(struct adapter *adap);
int t4_uld_mem_alloc(struct adapter *adap);
void t4_uld_clean_up(struct adapter *adap);
void t4_register_netevent_notifier(void);
+int t4_i2c_rd(struct adapter *adap, unsigned int mbox, int port,
+ unsigned int devid, unsigned int offset,
+ unsigned int len, u8 *buf);
void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, struct sge_fl *fl);
void free_tx_desc(struct adapter *adap, struct sge_txq *q,
unsigned int n, bool unmap);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
index 29cc625e9833..41c8736314f8 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
@@ -18,11 +18,13 @@
#include "t4_regs.h"
#include "cxgb4.h"
#include "cxgb4_cudbg.h"
-#include "cudbg_entity.h"
static const struct cxgb4_collect_entity cxgb4_collect_mem_dump[] = {
{ CUDBG_EDC0, cudbg_collect_edc0_meminfo },
{ CUDBG_EDC1, cudbg_collect_edc1_meminfo },
+ { CUDBG_MC0, cudbg_collect_mc0_meminfo },
+ { CUDBG_MC1, cudbg_collect_mc1_meminfo },
+ { CUDBG_HMA, cudbg_collect_hma_meminfo },
};
static const struct cxgb4_collect_entity cxgb4_collect_hw_dump[] = {
@@ -53,6 +55,7 @@ static const struct cxgb4_collect_entity cxgb4_collect_hw_dump[] = {
{ CUDBG_SGE_INDIRECT, cudbg_collect_sge_indirect },
{ CUDBG_ULPRX_LA, cudbg_collect_ulprx_la },
{ CUDBG_TP_LA, cudbg_collect_tp_la },
+ { CUDBG_MEMINFO, cudbg_collect_meminfo },
{ CUDBG_CIM_PIF_LA, cudbg_collect_cim_pif_la },
{ CUDBG_CLK, cudbg_collect_clk_info },
{ CUDBG_CIM_OBQ_RXQ0, cudbg_collect_obq_sge_rx_q0 },
@@ -60,6 +63,7 @@ static const struct cxgb4_collect_entity cxgb4_collect_hw_dump[] = {
{ CUDBG_PCIE_INDIRECT, cudbg_collect_pcie_indirect },
{ CUDBG_PM_INDIRECT, cudbg_collect_pm_indirect },
{ CUDBG_TID_INFO, cudbg_collect_tid },
+ { CUDBG_PCIE_CONFIG, cudbg_collect_pcie_config },
{ CUDBG_DUMP_CONTEXT, cudbg_collect_dump_context },
{ CUDBG_MPS_TCAM, cudbg_collect_mps_tcam },
{ CUDBG_VPD_DATA, cudbg_collect_vpd_data },
@@ -158,6 +162,22 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity)
}
len = cudbg_mbytes_to_bytes(len);
break;
+ case CUDBG_MC0:
+ value = t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A);
+ if (value & EXT_MEM0_ENABLE_F) {
+ value = t4_read_reg(adap, MA_EXT_MEMORY0_BAR_A);
+ len = EXT_MEM0_SIZE_G(value);
+ }
+ len = cudbg_mbytes_to_bytes(len);
+ break;
+ case CUDBG_MC1:
+ value = t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A);
+ if (value & EXT_MEM1_ENABLE_F) {
+ value = t4_read_reg(adap, MA_EXT_MEMORY1_BAR_A);
+ len = EXT_MEM1_SIZE_G(value);
+ }
+ len = cudbg_mbytes_to_bytes(len);
+ break;
case CUDBG_RSS:
len = RSS_NENTRIES * sizeof(u16);
break;
@@ -201,6 +221,9 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity)
case CUDBG_TP_LA:
len = sizeof(struct cudbg_tp_la) + TPLA_SIZE * sizeof(u64);
break;
+ case CUDBG_MEMINFO:
+ len = sizeof(struct cudbg_meminfo);
+ break;
case CUDBG_CIM_PIF_LA:
len = sizeof(struct cudbg_cim_pif_la);
len += 2 * CIM_PIFLA_SIZE * 6 * sizeof(u32);
@@ -219,6 +242,9 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity)
case CUDBG_TID_INFO:
len = sizeof(struct cudbg_tid_info_region_rev1);
break;
+ case CUDBG_PCIE_CONFIG:
+ len = sizeof(u32) * CUDBG_NUM_PCIE_CONFIG_REGS;
+ break;
case CUDBG_DUMP_CONTEXT:
len = cudbg_dump_context_size(adap);
break;
@@ -264,6 +290,17 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity)
len = sizeof(struct ireg_buf) * n;
}
break;
+ case CUDBG_HMA:
+ value = t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A);
+ if (value & HMA_MUX_F) {
+ /* In T6, there's no MC1. So, HMA shares MC1
+ * address space.
+ */
+ value = t4_read_reg(adap, MA_EXT_MEMORY1_BAR_A);
+ len = EXT_MEM1_SIZE_G(value);
+ }
+ len = cudbg_mbytes_to_bytes(len);
+ break;
default:
break;
}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h
index c099b5aa2214..7ceeb0bc9fa8 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h
@@ -20,6 +20,7 @@
#include "cudbg_if.h"
#include "cudbg_lib_common.h"
+#include "cudbg_entity.h"
#include "cudbg_lib.h"
typedef int (*cudbg_collect_callback_t)(struct cudbg_init *pdbg_init,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index 917663b35603..4956e429ae1d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -45,6 +45,10 @@
#include "cxgb4_debugfs.h"
#include "clip_tbl.h"
#include "l2t.h"
+#include "cudbg_if.h"
+#include "cudbg_lib_common.h"
+#include "cudbg_entity.h"
+#include "cudbg_lib.h"
/* generic seq_file support for showing a table of size rows x width. */
static void *seq_tab_get_idx(struct seq_tab *tb, loff_t pos)
@@ -2794,18 +2798,6 @@ static const struct file_operations blocked_fl_fops = {
.llseek = generic_file_llseek,
};
-struct mem_desc {
- unsigned int base;
- unsigned int limit;
- unsigned int idx;
-};
-
-static int mem_desc_cmp(const void *a, const void *b)
-{
- return ((const struct mem_desc *)a)->base -
- ((const struct mem_desc *)b)->base;
-}
-
static void mem_region_show(struct seq_file *seq, const char *name,
unsigned int from, unsigned int to)
{
@@ -2819,250 +2811,60 @@ static void mem_region_show(struct seq_file *seq, const char *name,
static int meminfo_show(struct seq_file *seq, void *v)
{
static const char * const memory[] = { "EDC0:", "EDC1:", "MC:",
- "MC0:", "MC1:"};
- static const char * const region[] = {
- "DBQ contexts:", "IMSG contexts:", "FLM cache:", "TCBs:",
- "Pstructs:", "Timers:", "Rx FL:", "Tx FL:", "Pstruct FL:",
- "Tx payload:", "Rx payload:", "LE hash:", "iSCSI region:",
- "TDDP region:", "TPT region:", "STAG region:", "RQ region:",
- "RQUDP region:", "PBL region:", "TXPBL region:",
- "DBVFIFO region:", "ULPRX state:", "ULPTX state:",
- "On-chip queues:"
- };
-
- int i, n;
- u32 lo, hi, used, alloc;
- struct mem_desc avail[4];
- struct mem_desc mem[ARRAY_SIZE(region) + 3]; /* up to 3 holes */
- struct mem_desc *md = mem;
+ "MC0:", "MC1:", "HMA:"};
struct adapter *adap = seq->private;
+ struct cudbg_meminfo meminfo;
+ int i, rc;
- for (i = 0; i < ARRAY_SIZE(mem); i++) {
- mem[i].limit = 0;
- mem[i].idx = i;
- }
-
- /* Find and sort the populated memory ranges */
- i = 0;
- lo = t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A);
- if (lo & EDRAM0_ENABLE_F) {
- hi = t4_read_reg(adap, MA_EDRAM0_BAR_A);
- avail[i].base = EDRAM0_BASE_G(hi) << 20;
- avail[i].limit = avail[i].base + (EDRAM0_SIZE_G(hi) << 20);
- avail[i].idx = 0;
- i++;
- }
- if (lo & EDRAM1_ENABLE_F) {
- hi = t4_read_reg(adap, MA_EDRAM1_BAR_A);
- avail[i].base = EDRAM1_BASE_G(hi) << 20;
- avail[i].limit = avail[i].base + (EDRAM1_SIZE_G(hi) << 20);
- avail[i].idx = 1;
- i++;
- }
-
- if (is_t5(adap->params.chip)) {
- if (lo & EXT_MEM0_ENABLE_F) {
- hi = t4_read_reg(adap, MA_EXT_MEMORY0_BAR_A);
- avail[i].base = EXT_MEM0_BASE_G(hi) << 20;
- avail[i].limit =
- avail[i].base + (EXT_MEM0_SIZE_G(hi) << 20);
- avail[i].idx = 3;
- i++;
- }
- if (lo & EXT_MEM1_ENABLE_F) {
- hi = t4_read_reg(adap, MA_EXT_MEMORY1_BAR_A);
- avail[i].base = EXT_MEM1_BASE_G(hi) << 20;
- avail[i].limit =
- avail[i].base + (EXT_MEM1_SIZE_G(hi) << 20);
- avail[i].idx = 4;
- i++;
- }
- } else {
- if (lo & EXT_MEM_ENABLE_F) {
- hi = t4_read_reg(adap, MA_EXT_MEMORY_BAR_A);
- avail[i].base = EXT_MEM_BASE_G(hi) << 20;
- avail[i].limit =
- avail[i].base + (EXT_MEM_SIZE_G(hi) << 20);
- avail[i].idx = 2;
- i++;
- }
- }
- if (!i) /* no memory available */
- return 0;
- sort(avail, i, sizeof(struct mem_desc), mem_desc_cmp, NULL);
-
- (md++)->base = t4_read_reg(adap, SGE_DBQ_CTXT_BADDR_A);
- (md++)->base = t4_read_reg(adap, SGE_IMSG_CTXT_BADDR_A);
- (md++)->base = t4_read_reg(adap, SGE_FLM_CACHE_BADDR_A);
- (md++)->base = t4_read_reg(adap, TP_CMM_TCB_BASE_A);
- (md++)->base = t4_read_reg(adap, TP_CMM_MM_BASE_A);
- (md++)->base = t4_read_reg(adap, TP_CMM_TIMER_BASE_A);
- (md++)->base = t4_read_reg(adap, TP_CMM_MM_RX_FLST_BASE_A);
- (md++)->base = t4_read_reg(adap, TP_CMM_MM_TX_FLST_BASE_A);
- (md++)->base = t4_read_reg(adap, TP_CMM_MM_PS_FLST_BASE_A);
-
- /* the next few have explicit upper bounds */
- md->base = t4_read_reg(adap, TP_PMM_TX_BASE_A);
- md->limit = md->base - 1 +
- t4_read_reg(adap, TP_PMM_TX_PAGE_SIZE_A) *
- PMTXMAXPAGE_G(t4_read_reg(adap, TP_PMM_TX_MAX_PAGE_A));
- md++;
-
- md->base = t4_read_reg(adap, TP_PMM_RX_BASE_A);
- md->limit = md->base - 1 +
- t4_read_reg(adap, TP_PMM_RX_PAGE_SIZE_A) *
- PMRXMAXPAGE_G(t4_read_reg(adap, TP_PMM_RX_MAX_PAGE_A));
- md++;
-
- if (t4_read_reg(adap, LE_DB_CONFIG_A) & HASHEN_F) {
- if (CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5) {
- hi = t4_read_reg(adap, LE_DB_TID_HASHBASE_A) / 4;
- md->base = t4_read_reg(adap, LE_DB_HASH_TID_BASE_A);
- } else {
- hi = t4_read_reg(adap, LE_DB_HASH_TID_BASE_A);
- md->base = t4_read_reg(adap,
- LE_DB_HASH_TBL_BASE_ADDR_A);
- }
- md->limit = 0;
- } else {
- md->base = 0;
- md->idx = ARRAY_SIZE(region); /* hide it */
- }
- md++;
-
-#define ulp_region(reg) do { \
- md->base = t4_read_reg(adap, ULP_ ## reg ## _LLIMIT_A);\
- (md++)->limit = t4_read_reg(adap, ULP_ ## reg ## _ULIMIT_A); \
-} while (0)
-
- ulp_region(RX_ISCSI);
- ulp_region(RX_TDDP);
- ulp_region(TX_TPT);
- ulp_region(RX_STAG);
- ulp_region(RX_RQ);
- ulp_region(RX_RQUDP);
- ulp_region(RX_PBL);
- ulp_region(TX_PBL);
-#undef ulp_region
- md->base = 0;
- md->idx = ARRAY_SIZE(region);
- if (!is_t4(adap->params.chip)) {
- u32 size = 0;
- u32 sge_ctrl = t4_read_reg(adap, SGE_CONTROL2_A);
- u32 fifo_size = t4_read_reg(adap, SGE_DBVFIFO_SIZE_A);
-
- if (is_t5(adap->params.chip)) {
- if (sge_ctrl & VFIFO_ENABLE_F)
- size = DBVFIFO_SIZE_G(fifo_size);
- } else {
- size = T6_DBVFIFO_SIZE_G(fifo_size);
- }
-
- if (size) {
- md->base = BASEADDR_G(t4_read_reg(adap,
- SGE_DBVFIFO_BADDR_A));
- md->limit = md->base + (size << 2) - 1;
- }
- }
-
- md++;
-
- md->base = t4_read_reg(adap, ULP_RX_CTX_BASE_A);
- md->limit = 0;
- md++;
- md->base = t4_read_reg(adap, ULP_TX_ERR_TABLE_BASE_A);
- md->limit = 0;
- md++;
-
- md->base = adap->vres.ocq.start;
- if (adap->vres.ocq.size)
- md->limit = md->base + adap->vres.ocq.size - 1;
- else
- md->idx = ARRAY_SIZE(region); /* hide it */
- md++;
-
- /* add any address-space holes, there can be up to 3 */
- for (n = 0; n < i - 1; n++)
- if (avail[n].limit < avail[n + 1].base)
- (md++)->base = avail[n].limit;
- if (avail[n].limit)
- (md++)->base = avail[n].limit;
-
- n = md - mem;
- sort(mem, n, sizeof(struct mem_desc), mem_desc_cmp, NULL);
+ memset(&meminfo, 0, sizeof(struct cudbg_meminfo));
+ rc = cudbg_fill_meminfo(adap, &meminfo);
+ if (rc)
+ return -ENXIO;
- for (lo = 0; lo < i; lo++)
- mem_region_show(seq, memory[avail[lo].idx], avail[lo].base,
- avail[lo].limit - 1);
+ for (i = 0; i < meminfo.avail_c; i++)
+ mem_region_show(seq, memory[meminfo.avail[i].idx],
+ meminfo.avail[i].base,
+ meminfo.avail[i].limit - 1);
seq_putc(seq, '\n');
- for (i = 0; i < n; i++) {
- if (mem[i].idx >= ARRAY_SIZE(region))
+ for (i = 0; i < meminfo.mem_c; i++) {
+ if (meminfo.mem[i].idx >= ARRAY_SIZE(cudbg_region))
continue; /* skip holes */
- if (!mem[i].limit)
- mem[i].limit = i < n - 1 ? mem[i + 1].base - 1 : ~0;
- mem_region_show(seq, region[mem[i].idx], mem[i].base,
- mem[i].limit);
+ if (!meminfo.mem[i].limit)
+ meminfo.mem[i].limit =
+ i < meminfo.mem_c - 1 ?
+ meminfo.mem[i + 1].base - 1 : ~0;
+ mem_region_show(seq, cudbg_region[meminfo.mem[i].idx],
+ meminfo.mem[i].base, meminfo.mem[i].limit);
}
seq_putc(seq, '\n');
- lo = t4_read_reg(adap, CIM_SDRAM_BASE_ADDR_A);
- hi = t4_read_reg(adap, CIM_SDRAM_ADDR_SIZE_A) + lo - 1;
- mem_region_show(seq, "uP RAM:", lo, hi);
+ mem_region_show(seq, "uP RAM:", meminfo.up_ram_lo, meminfo.up_ram_hi);
+ mem_region_show(seq, "uP Extmem2:", meminfo.up_extmem2_lo,
+ meminfo.up_extmem2_hi);
- lo = t4_read_reg(adap, CIM_EXTMEM2_BASE_ADDR_A);
- hi = t4_read_reg(adap, CIM_EXTMEM2_ADDR_SIZE_A) + lo - 1;
- mem_region_show(seq, "uP Extmem2:", lo, hi);
-
- lo = t4_read_reg(adap, TP_PMM_RX_MAX_PAGE_A);
seq_printf(seq, "\n%u Rx pages of size %uKiB for %u channels\n",
- PMRXMAXPAGE_G(lo),
- t4_read_reg(adap, TP_PMM_RX_PAGE_SIZE_A) >> 10,
- (lo & PMRXNUMCHN_F) ? 2 : 1);
+ meminfo.rx_pages_data[0], meminfo.rx_pages_data[1],
+ meminfo.rx_pages_data[2]);
- lo = t4_read_reg(adap, TP_PMM_TX_MAX_PAGE_A);
- hi = t4_read_reg(adap, TP_PMM_TX_PAGE_SIZE_A);
seq_printf(seq, "%u Tx pages of size %u%ciB for %u channels\n",
- PMTXMAXPAGE_G(lo),
- hi >= (1 << 20) ? (hi >> 20) : (hi >> 10),
- hi >= (1 << 20) ? 'M' : 'K', 1 << PMTXNUMCHN_G(lo));
- seq_printf(seq, "%u p-structs\n\n",
- t4_read_reg(adap, TP_CMM_MM_MAX_PSTRUCT_A));
-
- for (i = 0; i < 4; i++) {
- if (CHELSIO_CHIP_VERSION(adap->params.chip) > CHELSIO_T5)
- lo = t4_read_reg(adap, MPS_RX_MAC_BG_PG_CNT0_A + i * 4);
- else
- lo = t4_read_reg(adap, MPS_RX_PG_RSV0_A + i * 4);
- if (is_t5(adap->params.chip)) {
- used = T5_USED_G(lo);
- alloc = T5_ALLOC_G(lo);
- } else {
- used = USED_G(lo);
- alloc = ALLOC_G(lo);
- }
+ meminfo.tx_pages_data[0], meminfo.tx_pages_data[1],
+ meminfo.tx_pages_data[2], meminfo.tx_pages_data[3]);
+
+ seq_printf(seq, "%u p-structs\n\n", meminfo.p_structs);
+
+ for (i = 0; i < 4; i++)
/* For T6 these are MAC buffer groups */
seq_printf(seq, "Port %d using %u pages out of %u allocated\n",
- i, used, alloc);
- }
- for (i = 0; i < adap->params.arch.nchan; i++) {
- if (CHELSIO_CHIP_VERSION(adap->params.chip) > CHELSIO_T5)
- lo = t4_read_reg(adap,
- MPS_RX_LPBK_BG_PG_CNT0_A + i * 4);
- else
- lo = t4_read_reg(adap, MPS_RX_PG_RSV4_A + i * 4);
- if (is_t5(adap->params.chip)) {
- used = T5_USED_G(lo);
- alloc = T5_ALLOC_G(lo);
- } else {
- used = USED_G(lo);
- alloc = ALLOC_G(lo);
- }
+ i, meminfo.port_used[i], meminfo.port_alloc[i]);
+
+ for (i = 0; i < adap->params.arch.nchan; i++)
/* For T6 these are MAC buffer groups */
seq_printf(seq,
"Loopback %d using %u pages out of %u allocated\n",
- i, used, alloc);
- }
+ i, meminfo.loopback_used[i],
+ meminfo.loopback_alloc[i]);
+
return 0;
}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
index eb338212f5af..541419b084ac 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
@@ -1396,6 +1396,101 @@ static int get_dump_data(struct net_device *dev, struct ethtool_dump *eth_dump,
return 0;
}
+static int cxgb4_get_module_info(struct net_device *dev,
+ struct ethtool_modinfo *modinfo)
+{
+ struct port_info *pi = netdev_priv(dev);
+ u8 sff8472_comp, sff_diag_type, sff_rev;
+ struct adapter *adapter = pi->adapter;
+ int ret;
+
+ if (!t4_is_inserted_mod_type(pi->mod_type))
+ return -EINVAL;
+
+ switch (pi->port_type) {
+ case FW_PORT_TYPE_SFP:
+ case FW_PORT_TYPE_QSA:
+ case FW_PORT_TYPE_SFP28:
+ ret = t4_i2c_rd(adapter, adapter->mbox, pi->tx_chan,
+ I2C_DEV_ADDR_A0, SFF_8472_COMP_ADDR,
+ SFF_8472_COMP_LEN, &sff8472_comp);
+ if (ret)
+ return ret;
+ ret = t4_i2c_rd(adapter, adapter->mbox, pi->tx_chan,
+ I2C_DEV_ADDR_A0, SFP_DIAG_TYPE_ADDR,
+ SFP_DIAG_TYPE_LEN, &sff_diag_type);
+ if (ret)
+ return ret;
+
+ if (!sff8472_comp || (sff_diag_type & 4)) {
+ modinfo->type = ETH_MODULE_SFF_8079;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+ } else {
+ modinfo->type = ETH_MODULE_SFF_8472;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+ }
+ break;
+
+ case FW_PORT_TYPE_QSFP:
+ case FW_PORT_TYPE_QSFP_10G:
+ case FW_PORT_TYPE_CR_QSFP:
+ case FW_PORT_TYPE_CR2_QSFP:
+ case FW_PORT_TYPE_CR4_QSFP:
+ ret = t4_i2c_rd(adapter, adapter->mbox, pi->tx_chan,
+ I2C_DEV_ADDR_A0, SFF_REV_ADDR,
+ SFF_REV_LEN, &sff_rev);
+ /* For QSFP type ports, revision value >= 3
+ * means the SFP is 8636 compliant.
+ */
+ if (ret)
+ return ret;
+ if (sff_rev >= 0x3) {
+ modinfo->type = ETH_MODULE_SFF_8636;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN;
+ } else {
+ modinfo->type = ETH_MODULE_SFF_8436;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+ }
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int cxgb4_get_module_eeprom(struct net_device *dev,
+ struct ethtool_eeprom *eprom, u8 *data)
+{
+ int ret = 0, offset = eprom->offset, len = eprom->len;
+ struct port_info *pi = netdev_priv(dev);
+ struct adapter *adapter = pi->adapter;
+
+ memset(data, 0, eprom->len);
+ if (offset + len <= I2C_PAGE_SIZE)
+ return t4_i2c_rd(adapter, adapter->mbox, pi->tx_chan,
+ I2C_DEV_ADDR_A0, offset, len, data);
+
+ /* offset + len spans 0xa0 and 0xa1 pages */
+ if (offset <= I2C_PAGE_SIZE) {
+ /* read 0xa0 page */
+ len = I2C_PAGE_SIZE - offset;
+ ret = t4_i2c_rd(adapter, adapter->mbox, pi->tx_chan,
+ I2C_DEV_ADDR_A0, offset, len, data);
+ if (ret)
+ return ret;
+ offset = I2C_PAGE_SIZE;
+ /* Remaining bytes to be read from second page =
+ * Total length - bytes read from first page
+ */
+ len = eprom->len - len;
+ }
+ /* Read additional optical diagnostics from page 0xa2 if supported */
+ return t4_i2c_rd(adapter, adapter->mbox, pi->tx_chan, I2C_DEV_ADDR_A2,
+ offset, len, &data[eprom->len - len]);
+}
+
static const struct ethtool_ops cxgb_ethtool_ops = {
.get_link_ksettings = get_link_ksettings,
.set_link_ksettings = set_link_ksettings,
@@ -1430,6 +1525,8 @@ static const struct ethtool_ops cxgb_ethtool_ops = {
.set_dump = set_dump,
.get_dump_flag = get_dump_flag,
.get_dump_data = get_dump_data,
+ .get_module_info = cxgb4_get_module_info,
+ .get_module_eeprom = cxgb4_get_module_eeprom,
};
void cxgb4_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 6f900ffe25cc..87ac1e4dafc1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -1673,7 +1673,7 @@ int cxgb4_flush_eq_cache(struct net_device *dev)
{
struct adapter *adap = netdev2adap(dev);
- return t4_sge_ctxt_flush(adap, adap->mbox);
+ return t4_sge_ctxt_flush(adap, adap->mbox, CTXT_EGRESS);
}
EXPORT_SYMBOL(cxgb4_flush_eq_cache);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
index d4a548a6a55c..9b9f3f99b39d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
@@ -405,9 +405,7 @@ static void cxgb4_process_flow_actions(struct net_device *in,
} else if (is_tcf_gact_shot(a)) {
fs->action = FILTER_DROP;
} else if (is_tcf_mirred_egress_redirect(a)) {
- int ifindex = tcf_mirred_ifindex(a);
- struct net_device *out = __dev_get_by_index(dev_net(in),
- ifindex);
+ struct net_device *out = tcf_mirred_dev(a);
struct port_info *pi = netdev_priv(out);
fs->action = FILTER_SWITCH;
@@ -582,14 +580,14 @@ static int cxgb4_validate_flow_actions(struct net_device *dev,
/* Do nothing */
} else if (is_tcf_mirred_egress_redirect(a)) {
struct adapter *adap = netdev2adap(dev);
- struct net_device *n_dev;
- unsigned int i, ifindex;
+ struct net_device *n_dev, *target_dev;
+ unsigned int i;
bool found = false;
- ifindex = tcf_mirred_ifindex(a);
+ target_dev = tcf_mirred_dev(a);
for_each_port(adap, i) {
n_dev = adap->port[i];
- if (ifindex == n_dev->ifindex) {
+ if (target_dev == n_dev) {
found = true;
break;
}
@@ -765,9 +763,7 @@ static void ch_flower_stats_handler(struct work_struct *work)
rhashtable_walk_enter(&adap->flower_tbl, &iter);
do {
- flower_entry = ERR_PTR(rhashtable_walk_start(&iter));
- if (IS_ERR(flower_entry))
- goto walk_stop;
+ rhashtable_walk_start(&iter);
while ((flower_entry = rhashtable_walk_next(&iter)) &&
!IS_ERR(flower_entry)) {
@@ -786,8 +782,9 @@ static void ch_flower_stats_handler(struct work_struct *work)
spin_unlock(&flower_entry->lock);
}
}
-walk_stop:
+
rhashtable_walk_stop(&iter);
+
} while (flower_entry == ERR_PTR(-EAGAIN));
rhashtable_walk_exit(&iter);
mod_timer(&adap->flower_stats_timer, jiffies + STATS_CHECK_PERIOD);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
index cd0cd13a964d..ab174bcfbfb0 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
@@ -114,14 +114,14 @@ static int fill_action_fields(struct adapter *adap,
/* Re-direct to specified port in hardware. */
if (is_tcf_mirred_egress_redirect(a)) {
- struct net_device *n_dev;
- unsigned int i, index;
+ struct net_device *n_dev, *target_dev;
bool found = false;
+ unsigned int i;
- index = tcf_mirred_ifindex(a);
+ target_dev = tcf_mirred_dev(a);
for_each_port(adap, i) {
n_dev = adap->port[i];
- if (index == n_dev->ifindex) {
+ if (target_dev == n_dev) {
fs->action = FILTER_SWITCH;
fs->eport = i;
found = true;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index f63210f15579..f044717aaa10 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -524,11 +524,14 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
* MEM_EDC1 = 1
* MEM_MC = 2 -- MEM_MC for chips with only 1 memory controller
* MEM_MC1 = 3 -- for chips with 2 memory controllers (e.g. T5)
+ * MEM_HMA = 4
*/
edc_size = EDRAM0_SIZE_G(t4_read_reg(adap, MA_EDRAM0_BAR_A));
- if (mtype != MEM_MC1)
+ if (mtype == MEM_HMA) {
+ memoffset = 2 * (edc_size * 1024 * 1024);
+ } else if (mtype != MEM_MC1) {
memoffset = (mtype * (edc_size * 1024 * 1024));
- else {
+ } else {
mc_size = EXT_MEM0_SIZE_G(t4_read_reg(adap,
MA_EXT_MEMORY0_BAR_A));
memoffset = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024;
@@ -6527,18 +6530,21 @@ void t4_sge_decode_idma_state(struct adapter *adapter, int state)
* t4_sge_ctxt_flush - flush the SGE context cache
* @adap: the adapter
* @mbox: mailbox to use for the FW command
+ * @ctx_type: Egress or Ingress
*
* Issues a FW command through the given mailbox to flush the
* SGE context cache.
*/
-int t4_sge_ctxt_flush(struct adapter *adap, unsigned int mbox)
+int t4_sge_ctxt_flush(struct adapter *adap, unsigned int mbox, int ctxt_type)
{
int ret;
u32 ldst_addrspace;
struct fw_ldst_cmd c;
memset(&c, 0, sizeof(c));
- ldst_addrspace = FW_LDST_CMD_ADDRSPACE_V(FW_LDST_ADDRSPC_SGE_EGRC);
+ ldst_addrspace = FW_LDST_CMD_ADDRSPACE_V(ctxt_type == CTXT_EGRESS ?
+ FW_LDST_ADDRSPC_SGE_EGRC :
+ FW_LDST_ADDRSPC_SGE_INGC);
c.op_to_addrspace = cpu_to_be32(FW_CMD_OP_V(FW_LDST_CMD) |
FW_CMD_REQUEST_F | FW_CMD_READ_F |
ldst_addrspace);
@@ -9737,3 +9743,59 @@ int t4_sched_params(struct adapter *adapter, int type, int level, int mode,
return t4_wr_mbox_meat(adapter, adapter->mbox, &cmd, sizeof(cmd),
NULL, 1);
}
+
+/**
+ * t4_i2c_rd - read I2C data from adapter
+ * @adap: the adapter
+ * @port: Port number if per-port device; <0 if not
+ * @devid: per-port device ID or absolute device ID
+ * @offset: byte offset into device I2C space
+ * @len: byte length of I2C space data
+ * @buf: buffer in which to return I2C data
+ *
+ * Reads the I2C data from the indicated device and location.
+ */
+int t4_i2c_rd(struct adapter *adap, unsigned int mbox, int port,
+ unsigned int devid, unsigned int offset,
+ unsigned int len, u8 *buf)
+{
+ struct fw_ldst_cmd ldst_cmd, ldst_rpl;
+ unsigned int i2c_max = sizeof(ldst_cmd.u.i2c.data);
+ int ret = 0;
+
+ if (len > I2C_PAGE_SIZE)
+ return -EINVAL;
+
+ /* Dont allow reads that spans multiple pages */
+ if (offset < I2C_PAGE_SIZE && offset + len > I2C_PAGE_SIZE)
+ return -EINVAL;
+
+ memset(&ldst_cmd, 0, sizeof(ldst_cmd));
+ ldst_cmd.op_to_addrspace =
+ cpu_to_be32(FW_CMD_OP_V(FW_LDST_CMD) |
+ FW_CMD_REQUEST_F |
+ FW_CMD_READ_F |
+ FW_LDST_CMD_ADDRSPACE_V(FW_LDST_ADDRSPC_I2C));
+ ldst_cmd.cycles_to_len16 = cpu_to_be32(FW_LEN16(ldst_cmd));
+ ldst_cmd.u.i2c.pid = (port < 0 ? 0xff : port);
+ ldst_cmd.u.i2c.did = devid;
+
+ while (len > 0) {
+ unsigned int i2c_len = (len < i2c_max) ? len : i2c_max;
+
+ ldst_cmd.u.i2c.boffset = offset;
+ ldst_cmd.u.i2c.blen = i2c_len;
+
+ ret = t4_wr_mbox(adap, mbox, &ldst_cmd, sizeof(ldst_cmd),
+ &ldst_rpl);
+ if (ret)
+ break;
+
+ memcpy(buf, ldst_rpl.u.i2c.data, i2c_len);
+ offset += i2c_len;
+ buf += i2c_len;
+ len -= i2c_len;
+ }
+
+ return ret;
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
index a964ed184356..872a91b1930c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
@@ -70,7 +70,9 @@ enum {
/* SGE context types */
enum ctxt_type {
- CTXT_FLM = 2,
+ CTXT_EGRESS,
+ CTXT_INGRESS,
+ CTXT_FLM,
CTXT_CNM,
};
@@ -284,4 +286,14 @@ enum {
#define SGE_TIMESTAMP_V(x) ((__u64)(x) << SGE_TIMESTAMP_S)
#define SGE_TIMESTAMP_G(x) (((__u64)(x) >> SGE_TIMESTAMP_S) & SGE_TIMESTAMP_M)
+#define I2C_DEV_ADDR_A0 0xa0
+#define I2C_DEV_ADDR_A2 0xa2
+#define I2C_PAGE_SIZE 0x100
+#define SFP_DIAG_TYPE_ADDR 0x5c
+#define SFP_DIAG_TYPE_LEN 0x1
+#define SFF_8472_COMP_ADDR 0x5e
+#define SFF_8472_COMP_LEN 0x1
+#define SFF_REV_ADDR 0x1
+#define SFF_REV_LEN 0x1
+
#endif /* __T4_HW_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
index a7cfece72828..f6701e0a6701 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
@@ -961,6 +961,10 @@
#define MA_EXT_MEMORY1_BAR_A 0x7808
+#define HMA_MUX_S 5
+#define HMA_MUX_V(x) ((x) << HMA_MUX_S)
+#define HMA_MUX_F HMA_MUX_V(1U)
+
#define EXT_MEM1_BASE_S 16
#define EXT_MEM1_BASE_M 0xfffU
#define EXT_MEM1_BASE_G(x) (((x) >> EXT_MEM1_BASE_S) & EXT_MEM1_BASE_M)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index 57eb4ad3485d..01f5a5ec16fa 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -828,6 +828,7 @@ enum fw_ldst_addrspc {
FW_LDST_ADDRSPC_MPS = 0x0020,
FW_LDST_ADDRSPC_FUNC = 0x0028,
FW_LDST_ADDRSPC_FUNC_PCIE = 0x0029,
+ FW_LDST_ADDRSPC_I2C = 0x0038,
};
enum fw_ldst_mps_fid {
diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
index 462d0ce51240..efb9333c7cf8 100644
--- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c
+++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
@@ -18,6 +18,7 @@
#include <linux/netdevice.h>
#include <linux/ethtool.h>
+#include <linux/net_tstamp.h>
#include "enic_res.h"
#include "enic.h"
@@ -578,6 +579,16 @@ static int enic_set_rxfh(struct net_device *netdev, const u32 *indir,
return __enic_set_rsskey(enic);
}
+static int enic_get_ts_info(struct net_device *netdev,
+ struct ethtool_ts_info *info)
+{
+ info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_RX_SOFTWARE |
+ SOF_TIMESTAMPING_SOFTWARE;
+
+ return 0;
+}
+
static const struct ethtool_ops enic_ethtool_ops = {
.get_drvinfo = enic_get_drvinfo,
.get_msglevel = enic_get_msglevel,
@@ -597,6 +608,7 @@ static const struct ethtool_ops enic_ethtool_ops = {
.get_rxfh = enic_get_rxfh,
.set_rxfh = enic_set_rxfh,
.get_link_ksettings = enic_get_ksettings,
+ .get_ts_info = enic_get_ts_info,
};
void enic_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index e130fb757e7b..d98676e43e03 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -856,6 +856,7 @@ static netdev_tx_t enic_hard_start_xmit(struct sk_buff *skb,
if (vnic_wq_desc_avail(wq) < MAX_SKB_FRAGS + ENIC_DESC_MAX_SPLITS)
netif_tx_stop_queue(txq);
+ skb_tx_timestamp(skb);
if (!skb->xmit_more || netif_xmit_stopped(txq))
vnic_wq_doorbell(wq);
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 610573855213..2d1b06579c1a 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1862,6 +1862,8 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable)
ret = clk_prepare_enable(fep->clk_ref);
if (ret)
goto failed_clk_ref;
+
+ phy_reset_after_clk_enable(ndev->phydev);
} else {
clk_disable_unprepare(fep->clk_ahb);
clk_disable_unprepare(fep->clk_enet_out);
@@ -2834,6 +2836,7 @@ fec_enet_open(struct net_device *ndev)
{
struct fec_enet_private *fep = netdev_priv(ndev);
int ret;
+ bool reset_again;
ret = pm_runtime_get_sync(&fep->pdev->dev);
if (ret < 0)
@@ -2844,6 +2847,17 @@ fec_enet_open(struct net_device *ndev)
if (ret)
goto clk_enable;
+ /* During the first fec_enet_open call the PHY isn't probed at this
+ * point. Therefore the phy_reset_after_clk_enable() call within
+ * fec_enet_clk_enable() fails. As we need this reset in order to be
+ * sure the PHY is working correctly we check if we need to reset again
+ * later when the PHY is probed
+ */
+ if (ndev->phydev && ndev->phydev->drv)
+ reset_again = false;
+ else
+ reset_again = true;
+
/* I should reset the ring buffers here, but I don't yet know
* a simple way to do that.
*/
@@ -2860,6 +2874,12 @@ fec_enet_open(struct net_device *ndev)
if (ret)
goto err_enet_mii_probe;
+ /* Call phy_reset_after_clk_enable() again if it failed during
+ * phy_reset_after_clk_enable() before because the PHY wasn't probed.
+ */
+ if (reset_again)
+ phy_reset_after_clk_enable(ndev->phydev);
+
if (fep->quirks & FEC_QUIRK_ERR006687)
imx6q_cpuidle_fec_irqs_used();
diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig
index 30000b6aa7b8..8bcf470ff5f3 100644
--- a/drivers/net/ethernet/hisilicon/Kconfig
+++ b/drivers/net/ethernet/hisilicon/Kconfig
@@ -94,15 +94,6 @@ config HNS3_HCLGE
compatibility layer. The engine would be used in Hisilicon hip08 family of
SoCs and further upcoming SoCs.
-config HNS3_ENET
- tristate "Hisilicon HNS3 Ethernet Device Support"
- depends on 64BIT && PCI
- depends on HNS3 && HNS3_HCLGE
- ---help---
- This selects the Ethernet Driver for Hisilicon Network Subsystem 3 for hip08
- family of SoCs. This module depends upon HNAE3 driver to access the HNAE3
- devices and their associated operations.
-
config HNS3_DCB
bool "Hisilicon HNS3 Data Center Bridge Support"
default n
@@ -112,4 +103,23 @@ config HNS3_DCB
If unsure, say N.
+config HNS3_HCLGEVF
+ tristate "Hisilicon HNS3VF Acceleration Engine & Compatibility Layer Support"
+ depends on PCI_MSI
+ depends on HNS3
+ depends on HNS3_HCLGE
+ ---help---
+ This selects the HNS3 VF drivers network acceleration engine & its hardware
+ compatibility layer. The engine would be used in Hisilicon hip08 family of
+ SoCs and further upcoming SoCs.
+
+config HNS3_ENET
+ tristate "Hisilicon HNS3 Ethernet Device Support"
+ depends on 64BIT && PCI
+ depends on HNS3
+ ---help---
+ This selects the Ethernet Driver for Hisilicon Network Subsystem 3 for hip08
+ family of SoCs. This module depends upon HNAE3 driver to access the HNAE3
+ devices and their associated operations.
+
endif # NET_VENDOR_HISILICON
diff --git a/drivers/net/ethernet/hisilicon/hns3/Makefile b/drivers/net/ethernet/hisilicon/hns3/Makefile
index a9349e1f3e51..002534f12b66 100644
--- a/drivers/net/ethernet/hisilicon/hns3/Makefile
+++ b/drivers/net/ethernet/hisilicon/hns3/Makefile
@@ -1,7 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0+
#
# Makefile for the HISILICON network device drivers.
#
obj-$(CONFIG_HNS3) += hns3pf/
+obj-$(CONFIG_HNS3) += hns3vf/
obj-$(CONFIG_HNS3) += hnae3.o
+
+obj-$(CONFIG_HNS3_ENET) += hns3.o
+hns3-objs = hns3_enet.o hns3_ethtool.o
+
+hns3-$(CONFIG_HNS3_DCB) += hns3_dcbnl.o
diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
new file mode 100644
index 000000000000..3e9203ea42a6
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* Copyright (c) 2016-2017 Hisilicon Limited. */
+
+#ifndef __HCLGE_MBX_H
+#define __HCLGE_MBX_H
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+
+#define HCLGE_MBX_VF_MSG_DATA_NUM 16
+
+enum HCLGE_MBX_OPCODE {
+ HCLGE_MBX_RESET = 0x01, /* (VF -> PF) assert reset */
+ HCLGE_MBX_SET_UNICAST, /* (VF -> PF) set UC addr */
+ HCLGE_MBX_SET_MULTICAST, /* (VF -> PF) set MC addr */
+ HCLGE_MBX_SET_VLAN, /* (VF -> PF) set VLAN */
+ HCLGE_MBX_MAP_RING_TO_VECTOR, /* (VF -> PF) map ring-to-vector */
+ HCLGE_MBX_UNMAP_RING_TO_VECTOR, /* (VF -> PF) unamp ring-to-vector */
+ HCLGE_MBX_SET_PROMISC_MODE, /* (VF -> PF) set promiscuous mode */
+ HCLGE_MBX_SET_MACVLAN, /* (VF -> PF) set unicast filter */
+ HCLGE_MBX_API_NEGOTIATE, /* (VF -> PF) negotiate API version */
+ HCLGE_MBX_GET_QINFO, /* (VF -> PF) get queue config */
+ HCLGE_MBX_GET_TCINFO, /* (VF -> PF) get TC config */
+ HCLGE_MBX_GET_RETA, /* (VF -> PF) get RETA */
+ HCLGE_MBX_GET_RSS_KEY, /* (VF -> PF) get RSS key */
+ HCLGE_MBX_GET_MAC_ADDR, /* (VF -> PF) get MAC addr */
+ HCLGE_MBX_PF_VF_RESP, /* (PF -> VF) generate respone to VF */
+ HCLGE_MBX_GET_BDNUM, /* (VF -> PF) get BD num */
+ HCLGE_MBX_GET_BUFSIZE, /* (VF -> PF) get buffer size */
+ HCLGE_MBX_GET_STREAMID, /* (VF -> PF) get stream id */
+ HCLGE_MBX_SET_AESTART, /* (VF -> PF) start ae */
+ HCLGE_MBX_SET_TSOSTATS, /* (VF -> PF) get tso stats */
+ HCLGE_MBX_LINK_STAT_CHANGE, /* (PF -> VF) link status has changed */
+ HCLGE_MBX_GET_BASE_CONFIG, /* (VF -> PF) get config */
+ HCLGE_MBX_BIND_FUNC_QUEUE, /* (VF -> PF) bind function and queue */
+ HCLGE_MBX_GET_LINK_STATUS, /* (VF -> PF) get link status */
+ HCLGE_MBX_QUEUE_RESET, /* (VF -> PF) reset queue */
+};
+
+/* below are per-VF mac-vlan subcodes */
+enum hclge_mbx_mac_vlan_subcode {
+ HCLGE_MBX_MAC_VLAN_UC_MODIFY = 0, /* modify UC mac addr */
+ HCLGE_MBX_MAC_VLAN_UC_ADD, /* add a new UC mac addr */
+ HCLGE_MBX_MAC_VLAN_UC_REMOVE, /* remove a new UC mac addr */
+ HCLGE_MBX_MAC_VLAN_MC_MODIFY, /* modify MC mac addr */
+ HCLGE_MBX_MAC_VLAN_MC_ADD, /* add new MC mac addr */
+ HCLGE_MBX_MAC_VLAN_MC_REMOVE, /* remove MC mac addr */
+ HCLGE_MBX_MAC_VLAN_MC_FUNC_MTA_ENABLE, /* config func MTA enable */
+};
+
+/* below are per-VF vlan cfg subcodes */
+enum hclge_mbx_vlan_cfg_subcode {
+ HCLGE_MBX_VLAN_FILTER = 0, /* set vlan filter */
+ HCLGE_MBX_VLAN_TX_OFF_CFG, /* set tx side vlan offload */
+ HCLGE_MBX_VLAN_RX_OFF_CFG, /* set rx side vlan offload */
+};
+
+#define HCLGE_MBX_MAX_MSG_SIZE 16
+#define HCLGE_MBX_MAX_RESP_DATA_SIZE 8
+
+struct hclgevf_mbx_resp_status {
+ struct mutex mbx_mutex; /* protects against contending sync cmd resp */
+ u32 origin_mbx_msg;
+ bool received_resp;
+ int resp_status;
+ u8 additional_info[HCLGE_MBX_MAX_RESP_DATA_SIZE];
+};
+
+struct hclge_mbx_vf_to_pf_cmd {
+ u8 rsv;
+ u8 mbx_src_vfid; /* Auto filled by IMP */
+ u8 rsv1[2];
+ u8 msg_len;
+ u8 rsv2[3];
+ u8 msg[HCLGE_MBX_MAX_MSG_SIZE];
+};
+
+struct hclge_mbx_pf_to_vf_cmd {
+ u8 dest_vfid;
+ u8 rsv[3];
+ u8 msg_len;
+ u8 rsv1[3];
+ u16 msg[8];
+};
+
+#define hclge_mbx_ring_ptr_move_crq(crq) \
+ (crq->next_to_use = (crq->next_to_use + 1) % crq->desc_num)
+#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.c b/drivers/net/ethernet/hisilicon/hns3/hnae3.c
index 5bcb2238acb2..02145f2de820 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.c
@@ -196,9 +196,18 @@ int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev)
const struct pci_device_id *id;
struct hnae3_ae_algo *ae_algo;
struct hnae3_client *client;
- int ret = 0;
+ int ret = 0, lock_acquired;
+
+ /* we can get deadlocked if SRIOV is being enabled in context to probe
+ * and probe gets called again in same context. This can happen when
+ * pci_enable_sriov() is called to create VFs from PF probes context.
+ * Therefore, for simplicity uniformly defering further probing in all
+ * cases where we detect contention.
+ */
+ lock_acquired = mutex_trylock(&hnae3_common_lock);
+ if (!lock_acquired)
+ return -EPROBE_DEFER;
- mutex_lock(&hnae3_common_lock);
list_add_tail(&ae_dev->node, &hnae3_ae_dev_list);
/* Check if there are matched ae_algo */
@@ -211,6 +220,7 @@ int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev)
if (!ae_dev->ops) {
dev_err(&ae_dev->pdev->dev, "ae_dev ops are null\n");
+ ret = -EOPNOTSUPP;
goto out_err;
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 67c59e1039f2..a9e2b32834c5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -452,9 +452,10 @@ struct hnae3_unic_private_info {
struct hnae3_queue **tqp; /* array base of all TQPs of this instance */
};
-#define HNAE3_SUPPORT_MAC_LOOPBACK 1
-#define HNAE3_SUPPORT_PHY_LOOPBACK 2
-#define HNAE3_SUPPORT_SERDES_LOOPBACK 4
+#define HNAE3_SUPPORT_MAC_LOOPBACK BIT(0)
+#define HNAE3_SUPPORT_PHY_LOOPBACK BIT(1)
+#define HNAE3_SUPPORT_SERDES_LOOPBACK BIT(2)
+#define HNAE3_SUPPORT_VF BIT(3)
struct hnae3_handle {
struct hnae3_client *client;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c b/drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c
index 925619a7c50a..eb82700da7d0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c
@@ -93,7 +93,7 @@ void hns3_dcbnl_setup(struct hnae3_handle *handle)
{
struct net_device *dev = handle->kinfo.netdev;
- if (!handle->kinfo.dcb_ops)
+ if ((!handle->kinfo.dcb_ops) || (handle->flags & HNAE3_SUPPORT_VF))
return;
dev->dcbnl_ops = &hns3_dcbnl_ops;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 59415090ff0f..c2c13238db52 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -52,6 +52,8 @@ static const struct pci_device_id hns3_pci_tbl[] = {
HNAE3_DEV_SUPPORT_ROCE_DCB_BITS},
{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC),
HNAE3_DEV_SUPPORT_ROCE_DCB_BITS},
+ {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_VF), 0},
+ {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_DCB_PFC_VF), 0},
/* required last entry */
{0, }
};
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 8a9de759957b..8a9de759957b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index a21470c72da3..65a69b439457 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -849,6 +849,21 @@ static int hns3_nway_reset(struct net_device *netdev)
return genphy_restart_aneg(phy);
}
+static const struct ethtool_ops hns3vf_ethtool_ops = {
+ .get_drvinfo = hns3_get_drvinfo,
+ .get_ringparam = hns3_get_ringparam,
+ .set_ringparam = hns3_set_ringparam,
+ .get_strings = hns3_get_strings,
+ .get_ethtool_stats = hns3_get_stats,
+ .get_sset_count = hns3_get_sset_count,
+ .get_rxnfc = hns3_get_rxnfc,
+ .get_rxfh_key_size = hns3_get_rss_key_size,
+ .get_rxfh_indir_size = hns3_get_rss_indir_size,
+ .get_rxfh = hns3_get_rss,
+ .set_rxfh = hns3_set_rss,
+ .get_link_ksettings = hns3_get_link_ksettings,
+};
+
static const struct ethtool_ops hns3_ethtool_ops = {
.self_test = hns3_self_test,
.get_drvinfo = hns3_get_drvinfo,
@@ -872,5 +887,10 @@ static const struct ethtool_ops hns3_ethtool_ops = {
void hns3_ethtool_set_ops(struct net_device *netdev)
{
- netdev->ethtool_ops = &hns3_ethtool_ops;
+ struct hnae3_handle *h = hns3_get_handle(netdev);
+
+ if (h->flags & HNAE3_SUPPORT_VF)
+ netdev->ethtool_ops = &hns3vf_ethtool_ops;
+ else
+ netdev->ethtool_ops = &hns3_ethtool_ops;
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
index d2b20d01a58c..cb8ddd043476 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0+
#
# Makefile for the HISILICON network device drivers.
#
@@ -5,11 +6,6 @@
ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3
obj-$(CONFIG_HNS3_HCLGE) += hclge.o
-hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o
+hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o hclge_mbx.o
hclge-$(CONFIG_HNS3_DCB) += hclge_dcb.o
-
-obj-$(CONFIG_HNS3_ENET) += hns3.o
-hns3-objs = hns3_enet.o hns3_ethtool.o
-
-hns3-$(CONFIG_HNS3_DCB) += hns3_dcbnl.o
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 59ed806a52c3..e97fd6654e5e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -17,10 +17,11 @@
#include <linux/netdevice.h>
#include <linux/pci.h>
#include <linux/platform_device.h>
-
+#include <net/rtnetlink.h>
#include "hclge_cmd.h"
#include "hclge_dcb.h"
#include "hclge_main.h"
+#include "hclge_mbx.h"
#include "hclge_mdio.h"
#include "hclge_tm.h"
#include "hnae3.h"
@@ -2226,6 +2227,18 @@ static int hclge_mac_init(struct hclge_dev *hdev)
return hclge_cfg_func_mta_filter(hdev, 0, hdev->accept_mta_mc);
}
+static void hclge_mbx_task_schedule(struct hclge_dev *hdev)
+{
+ if (!test_and_set_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state))
+ schedule_work(&hdev->mbx_service_task);
+}
+
+static void hclge_reset_task_schedule(struct hclge_dev *hdev)
+{
+ if (!test_and_set_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state))
+ schedule_work(&hdev->rst_service_task);
+}
+
static void hclge_task_schedule(struct hclge_dev *hdev)
{
if (!test_bit(HCLGE_STATE_DOWN, &hdev->state) &&
@@ -2362,6 +2375,64 @@ static void hclge_service_complete(struct hclge_dev *hdev)
clear_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state);
}
+static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
+{
+ u32 rst_src_reg;
+ u32 cmdq_src_reg;
+
+ /* fetch the events from their corresponding regs */
+ rst_src_reg = hclge_read_dev(&hdev->hw, HCLGE_MISC_RESET_STS_REG);
+ cmdq_src_reg = hclge_read_dev(&hdev->hw, HCLGE_VECTOR0_CMDQ_SRC_REG);
+
+ /* Assumption: If by any chance reset and mailbox events are reported
+ * together then we will only process reset event in this go and will
+ * defer the processing of the mailbox events. Since, we would have not
+ * cleared RX CMDQ event this time we would receive again another
+ * interrupt from H/W just for the mailbox.
+ */
+
+ /* check for vector0 reset event sources */
+ if (BIT(HCLGE_VECTOR0_GLOBALRESET_INT_B) & rst_src_reg) {
+ set_bit(HNAE3_GLOBAL_RESET, &hdev->reset_pending);
+ *clearval = BIT(HCLGE_VECTOR0_GLOBALRESET_INT_B);
+ return HCLGE_VECTOR0_EVENT_RST;
+ }
+
+ if (BIT(HCLGE_VECTOR0_CORERESET_INT_B) & rst_src_reg) {
+ set_bit(HNAE3_CORE_RESET, &hdev->reset_pending);
+ *clearval = BIT(HCLGE_VECTOR0_CORERESET_INT_B);
+ return HCLGE_VECTOR0_EVENT_RST;
+ }
+
+ if (BIT(HCLGE_VECTOR0_IMPRESET_INT_B) & rst_src_reg) {
+ set_bit(HNAE3_IMP_RESET, &hdev->reset_pending);
+ *clearval = BIT(HCLGE_VECTOR0_IMPRESET_INT_B);
+ return HCLGE_VECTOR0_EVENT_RST;
+ }
+
+ /* check for vector0 mailbox(=CMDQ RX) event source */
+ if (BIT(HCLGE_VECTOR0_RX_CMDQ_INT_B) & cmdq_src_reg) {
+ cmdq_src_reg &= ~BIT(HCLGE_VECTOR0_RX_CMDQ_INT_B);
+ *clearval = cmdq_src_reg;
+ return HCLGE_VECTOR0_EVENT_MBX;
+ }
+
+ return HCLGE_VECTOR0_EVENT_OTHER;
+}
+
+static void hclge_clear_event_cause(struct hclge_dev *hdev, u32 event_type,
+ u32 regclr)
+{
+ switch (event_type) {
+ case HCLGE_VECTOR0_EVENT_RST:
+ hclge_write_dev(&hdev->hw, HCLGE_MISC_RESET_STS_REG, regclr);
+ break;
+ case HCLGE_VECTOR0_EVENT_MBX:
+ hclge_write_dev(&hdev->hw, HCLGE_VECTOR0_CMDQ_SRC_REG, regclr);
+ break;
+ }
+}
+
static void hclge_enable_vector(struct hclge_misc_vector *vector, bool enable)
{
writel(enable ? 1 : 0, vector->addr);
@@ -2370,10 +2441,38 @@ static void hclge_enable_vector(struct hclge_misc_vector *vector, bool enable)
static irqreturn_t hclge_misc_irq_handle(int irq, void *data)
{
struct hclge_dev *hdev = data;
+ u32 event_cause;
+ u32 clearval;
hclge_enable_vector(&hdev->misc_vector, false);
- if (!test_and_set_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state))
- schedule_work(&hdev->service_task);
+ event_cause = hclge_check_event_cause(hdev, &clearval);
+
+ /* vector 0 interrupt is shared with reset and mailbox source events.*/
+ switch (event_cause) {
+ case HCLGE_VECTOR0_EVENT_RST:
+ hclge_reset_task_schedule(hdev);
+ break;
+ case HCLGE_VECTOR0_EVENT_MBX:
+ /* If we are here then,
+ * 1. Either we are not handling any mbx task and we are not
+ * scheduled as well
+ * OR
+ * 2. We could be handling a mbx task but nothing more is
+ * scheduled.
+ * In both cases, we should schedule mbx task as there are more
+ * mbx messages reported by this interrupt.
+ */
+ hclge_mbx_task_schedule(hdev);
+
+ default:
+ dev_dbg(&hdev->pdev->dev,
+ "received unknown or unhandled event of vector0\n");
+ break;
+ }
+
+ /* we should clear the source of interrupt */
+ hclge_clear_event_cause(hdev, event_cause, clearval);
+ hclge_enable_vector(&hdev->misc_vector, true);
return IRQ_HANDLED;
}
@@ -2404,9 +2503,9 @@ static int hclge_misc_irq_init(struct hclge_dev *hdev)
hclge_get_misc_vector(hdev);
- ret = devm_request_irq(&hdev->pdev->dev,
- hdev->misc_vector.vector_irq,
- hclge_misc_irq_handle, 0, "hclge_misc", hdev);
+ /* this would be explicitly freed in the end */
+ ret = request_irq(hdev->misc_vector.vector_irq, hclge_misc_irq_handle,
+ 0, "hclge_misc", hdev);
if (ret) {
hclge_free_vector(hdev, 0);
dev_err(&hdev->pdev->dev, "request misc irq(%d) fail\n",
@@ -2416,6 +2515,12 @@ static int hclge_misc_irq_init(struct hclge_dev *hdev)
return ret;
}
+static void hclge_misc_irq_uninit(struct hclge_dev *hdev)
+{
+ free_irq(hdev->misc_vector.vector_irq, hdev);
+ hclge_free_vector(hdev, 0);
+}
+
static int hclge_notify_client(struct hclge_dev *hdev,
enum hnae3_reset_notify_type type)
{
@@ -2471,12 +2576,6 @@ static int hclge_reset_wait(struct hclge_dev *hdev)
cnt++;
}
- /* must clear reset status register to
- * prevent driver detect reset interrupt again
- */
- reg = hclge_read_dev(&hdev->hw, HCLGE_MISC_RESET_STS_REG);
- hclge_write_dev(&hdev->hw, HCLGE_MISC_RESET_STS_REG, reg);
-
if (cnt >= HCLGE_RESET_WAIT_CNT) {
dev_warn(&hdev->pdev->dev,
"Wait for reset timeout: %d\n", hdev->reset_type);
@@ -2505,12 +2604,12 @@ static int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id)
return ret;
}
-static void hclge_do_reset(struct hclge_dev *hdev, enum hnae3_reset_type type)
+static void hclge_do_reset(struct hclge_dev *hdev)
{
struct pci_dev *pdev = hdev->pdev;
u32 val;
- switch (type) {
+ switch (hdev->reset_type) {
case HNAE3_GLOBAL_RESET:
val = hclge_read_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG);
hnae_set_bit(val, HCLGE_GLOBAL_RESET_BIT, 1);
@@ -2526,30 +2625,62 @@ static void hclge_do_reset(struct hclge_dev *hdev, enum hnae3_reset_type type)
case HNAE3_FUNC_RESET:
dev_info(&pdev->dev, "PF Reset requested\n");
hclge_func_reset_cmd(hdev, 0);
+ /* schedule again to check later */
+ set_bit(HNAE3_FUNC_RESET, &hdev->reset_pending);
+ hclge_reset_task_schedule(hdev);
break;
default:
dev_warn(&pdev->dev,
- "Unsupported reset type: %d\n", type);
+ "Unsupported reset type: %d\n", hdev->reset_type);
break;
}
}
-static enum hnae3_reset_type hclge_detected_reset_event(struct hclge_dev *hdev)
+static enum hnae3_reset_type hclge_get_reset_level(struct hclge_dev *hdev,
+ unsigned long *addr)
{
enum hnae3_reset_type rst_level = HNAE3_NONE_RESET;
- u32 rst_reg_val;
- rst_reg_val = hclge_read_dev(&hdev->hw, HCLGE_MISC_RESET_STS_REG);
- if (BIT(HCLGE_VECTOR0_GLOBALRESET_INT_B) & rst_reg_val)
+ /* return the highest priority reset level amongst all */
+ if (test_bit(HNAE3_GLOBAL_RESET, addr))
rst_level = HNAE3_GLOBAL_RESET;
- else if (BIT(HCLGE_VECTOR0_CORERESET_INT_B) & rst_reg_val)
+ else if (test_bit(HNAE3_CORE_RESET, addr))
rst_level = HNAE3_CORE_RESET;
- else if (BIT(HCLGE_VECTOR0_IMPRESET_INT_B) & rst_reg_val)
+ else if (test_bit(HNAE3_IMP_RESET, addr))
rst_level = HNAE3_IMP_RESET;
+ else if (test_bit(HNAE3_FUNC_RESET, addr))
+ rst_level = HNAE3_FUNC_RESET;
+
+ /* now, clear all other resets */
+ clear_bit(HNAE3_GLOBAL_RESET, addr);
+ clear_bit(HNAE3_CORE_RESET, addr);
+ clear_bit(HNAE3_IMP_RESET, addr);
+ clear_bit(HNAE3_FUNC_RESET, addr);
return rst_level;
}
+static void hclge_reset(struct hclge_dev *hdev)
+{
+ /* perform reset of the stack & ae device for a client */
+
+ hclge_notify_client(hdev, HNAE3_DOWN_CLIENT);
+
+ if (!hclge_reset_wait(hdev)) {
+ rtnl_lock();
+ hclge_notify_client(hdev, HNAE3_UNINIT_CLIENT);
+ hclge_reset_ae_dev(hdev->ae_dev);
+ hclge_notify_client(hdev, HNAE3_INIT_CLIENT);
+ rtnl_unlock();
+ } else {
+ /* schedule again to check pending resets later */
+ set_bit(hdev->reset_type, &hdev->reset_pending);
+ hclge_reset_task_schedule(hdev);
+ }
+
+ hclge_notify_client(hdev, HNAE3_UP_CLIENT);
+}
+
static void hclge_reset_event(struct hnae3_handle *handle,
enum hnae3_reset_type reset)
{
@@ -2563,14 +2694,9 @@ static void hclge_reset_event(struct hnae3_handle *handle,
case HNAE3_FUNC_RESET:
case HNAE3_CORE_RESET:
case HNAE3_GLOBAL_RESET:
- if (test_bit(HCLGE_STATE_RESET_INT, &hdev->state)) {
- dev_err(&hdev->pdev->dev, "Already in reset state");
- return;
- }
- hdev->reset_type = reset;
- set_bit(HCLGE_STATE_RESET_INT, &hdev->state);
- set_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state);
- schedule_work(&hdev->service_task);
+ /* request reset & schedule reset task */
+ set_bit(reset, &hdev->reset_request);
+ hclge_reset_task_schedule(hdev);
break;
default:
dev_warn(&hdev->pdev->dev, "Unsupported reset event:%d", reset);
@@ -2580,49 +2706,55 @@ static void hclge_reset_event(struct hnae3_handle *handle,
static void hclge_reset_subtask(struct hclge_dev *hdev)
{
- bool do_reset;
+ /* check if there is any ongoing reset in the hardware. This status can
+ * be checked from reset_pending. If there is then, we need to wait for
+ * hardware to complete reset.
+ * a. If we are able to figure out in reasonable time that hardware
+ * has fully resetted then, we can proceed with driver, client
+ * reset.
+ * b. else, we can come back later to check this status so re-sched
+ * now.
+ */
+ hdev->reset_type = hclge_get_reset_level(hdev, &hdev->reset_pending);
+ if (hdev->reset_type != HNAE3_NONE_RESET)
+ hclge_reset(hdev);
- do_reset = hdev->reset_type != HNAE3_NONE_RESET;
+ /* check if we got any *new* reset requests to be honored */
+ hdev->reset_type = hclge_get_reset_level(hdev, &hdev->reset_request);
+ if (hdev->reset_type != HNAE3_NONE_RESET)
+ hclge_do_reset(hdev);
- /* Reset is detected by interrupt */
- if (hdev->reset_type == HNAE3_NONE_RESET)
- hdev->reset_type = hclge_detected_reset_event(hdev);
+ hdev->reset_type = HNAE3_NONE_RESET;
+}
- if (hdev->reset_type == HNAE3_NONE_RESET)
+static void hclge_reset_service_task(struct work_struct *work)
+{
+ struct hclge_dev *hdev =
+ container_of(work, struct hclge_dev, rst_service_task);
+
+ if (test_and_set_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
return;
- switch (hdev->reset_type) {
- case HNAE3_FUNC_RESET:
- case HNAE3_CORE_RESET:
- case HNAE3_GLOBAL_RESET:
- case HNAE3_IMP_RESET:
- hclge_notify_client(hdev, HNAE3_DOWN_CLIENT);
+ clear_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state);
- if (do_reset)
- hclge_do_reset(hdev, hdev->reset_type);
- else
- set_bit(HCLGE_STATE_RESET_INT, &hdev->state);
+ hclge_reset_subtask(hdev);
- if (!hclge_reset_wait(hdev)) {
- hclge_notify_client(hdev, HNAE3_UNINIT_CLIENT);
- hclge_reset_ae_dev(hdev->ae_dev);
- hclge_notify_client(hdev, HNAE3_INIT_CLIENT);
- clear_bit(HCLGE_STATE_RESET_INT, &hdev->state);
- }
- hclge_notify_client(hdev, HNAE3_UP_CLIENT);
- break;
- default:
- dev_err(&hdev->pdev->dev, "Unsupported reset type:%d\n",
- hdev->reset_type);
- break;
- }
- hdev->reset_type = HNAE3_NONE_RESET;
+ clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state);
}
-static void hclge_misc_irq_service_task(struct hclge_dev *hdev)
+static void hclge_mailbox_service_task(struct work_struct *work)
{
- hclge_reset_subtask(hdev);
- hclge_enable_vector(&hdev->misc_vector, true);
+ struct hclge_dev *hdev =
+ container_of(work, struct hclge_dev, mbx_service_task);
+
+ if (test_and_set_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state))
+ return;
+
+ clear_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state);
+
+ hclge_mbx_handler(hdev);
+
+ clear_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state);
}
static void hclge_service_task(struct work_struct *work)
@@ -2630,7 +2762,6 @@ static void hclge_service_task(struct work_struct *work)
struct hclge_dev *hdev =
container_of(work, struct hclge_dev, service_task);
- hclge_misc_irq_service_task(hdev);
hclge_update_speed_duplex(hdev);
hclge_update_link_status(hdev);
hclge_update_stats_for_all(hdev);
@@ -3174,49 +3305,48 @@ err:
return ret;
}
-int hclge_map_vport_ring_to_vector(struct hclge_vport *vport, int vector_id,
- struct hnae3_ring_chain_node *ring_chain)
+int hclge_bind_ring_with_vector(struct hclge_vport *vport,
+ int vector_id, bool en,
+ struct hnae3_ring_chain_node *ring_chain)
{
struct hclge_dev *hdev = vport->back;
- struct hclge_ctrl_vector_chain_cmd *req;
struct hnae3_ring_chain_node *node;
struct hclge_desc desc;
- int ret;
+ struct hclge_ctrl_vector_chain_cmd *req
+ = (struct hclge_ctrl_vector_chain_cmd *)desc.data;
+ enum hclge_cmd_status status;
+ enum hclge_opcode_type op;
+ u16 tqp_type_and_id;
int i;
- hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_ADD_RING_TO_VECTOR, false);
-
- req = (struct hclge_ctrl_vector_chain_cmd *)desc.data;
+ op = en ? HCLGE_OPC_ADD_RING_TO_VECTOR : HCLGE_OPC_DEL_RING_TO_VECTOR;
+ hclge_cmd_setup_basic_desc(&desc, op, false);
req->int_vector_id = vector_id;
i = 0;
for (node = ring_chain; node; node = node->next) {
- u16 type_and_id = 0;
-
- hnae_set_field(type_and_id, HCLGE_INT_TYPE_M, HCLGE_INT_TYPE_S,
- hnae_get_bit(node->flag, HNAE3_RING_TYPE_B));
- hnae_set_field(type_and_id, HCLGE_TQP_ID_M, HCLGE_TQP_ID_S,
- node->tqp_index);
- hnae_set_field(type_and_id, HCLGE_INT_GL_IDX_M,
- HCLGE_INT_GL_IDX_S,
+ tqp_type_and_id = le16_to_cpu(req->tqp_type_and_id[i]);
+ hnae_set_field(tqp_type_and_id, HCLGE_INT_TYPE_M,
+ HCLGE_INT_TYPE_S,
hnae_get_bit(node->flag, HNAE3_RING_TYPE_B));
- req->tqp_type_and_id[i] = cpu_to_le16(type_and_id);
- req->vfid = vport->vport_id;
-
+ hnae_set_field(tqp_type_and_id, HCLGE_TQP_ID_M,
+ HCLGE_TQP_ID_S, node->tqp_index);
+ req->tqp_type_and_id[i] = cpu_to_le16(tqp_type_and_id);
if (++i >= HCLGE_VECTOR_ELEMENTS_PER_CMD) {
req->int_cause_num = HCLGE_VECTOR_ELEMENTS_PER_CMD;
+ req->vfid = vport->vport_id;
- ret = hclge_cmd_send(&hdev->hw, &desc, 1);
- if (ret) {
+ status = hclge_cmd_send(&hdev->hw, &desc, 1);
+ if (status) {
dev_err(&hdev->pdev->dev,
"Map TQP fail, status is %d.\n",
- ret);
- return ret;
+ status);
+ return -EIO;
}
i = 0;
hclge_cmd_setup_basic_desc(&desc,
- HCLGE_OPC_ADD_RING_TO_VECTOR,
+ op,
false);
req->int_vector_id = vector_id;
}
@@ -3224,21 +3354,21 @@ int hclge_map_vport_ring_to_vector(struct hclge_vport *vport, int vector_id,
if (i > 0) {
req->int_cause_num = i;
-
- ret = hclge_cmd_send(&hdev->hw, &desc, 1);
- if (ret) {
+ req->vfid = vport->vport_id;
+ status = hclge_cmd_send(&hdev->hw, &desc, 1);
+ if (status) {
dev_err(&hdev->pdev->dev,
- "Map TQP fail, status is %d.\n", ret);
- return ret;
+ "Map TQP fail, status is %d.\n", status);
+ return -EIO;
}
}
return 0;
}
-static int hclge_map_handle_ring_to_vector(
- struct hnae3_handle *handle, int vector,
- struct hnae3_ring_chain_node *ring_chain)
+static int hclge_map_ring_to_vector(struct hnae3_handle *handle,
+ int vector,
+ struct hnae3_ring_chain_node *ring_chain)
{
struct hclge_vport *vport = hclge_get_vport(handle);
struct hclge_dev *hdev = vport->back;
@@ -3247,24 +3377,20 @@ static int hclge_map_handle_ring_to_vector(
vector_id = hclge_get_vector_index(hdev, vector);
if (vector_id < 0) {
dev_err(&hdev->pdev->dev,
- "Get vector index fail. ret =%d\n", vector_id);
+ "Get vector index fail. vector_id =%d\n", vector_id);
return vector_id;
}
- return hclge_map_vport_ring_to_vector(vport, vector_id, ring_chain);
+ return hclge_bind_ring_with_vector(vport, vector_id, true, ring_chain);
}
-static int hclge_unmap_ring_from_vector(
- struct hnae3_handle *handle, int vector,
- struct hnae3_ring_chain_node *ring_chain)
+static int hclge_unmap_ring_frm_vector(struct hnae3_handle *handle,
+ int vector,
+ struct hnae3_ring_chain_node *ring_chain)
{
struct hclge_vport *vport = hclge_get_vport(handle);
struct hclge_dev *hdev = vport->back;
- struct hclge_ctrl_vector_chain_cmd *req;
- struct hnae3_ring_chain_node *node;
- struct hclge_desc desc;
- int i, vector_id;
- int ret;
+ int vector_id, ret;
vector_id = hclge_get_vector_index(hdev, vector);
if (vector_id < 0) {
@@ -3273,54 +3399,17 @@ static int hclge_unmap_ring_from_vector(
return vector_id;
}
- hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_DEL_RING_TO_VECTOR, false);
-
- req = (struct hclge_ctrl_vector_chain_cmd *)desc.data;
- req->int_vector_id = vector_id;
-
- i = 0;
- for (node = ring_chain; node; node = node->next) {
- u16 type_and_id = 0;
-
- hnae_set_field(type_and_id, HCLGE_INT_TYPE_M, HCLGE_INT_TYPE_S,
- hnae_get_bit(node->flag, HNAE3_RING_TYPE_B));
- hnae_set_field(type_and_id, HCLGE_TQP_ID_M, HCLGE_TQP_ID_S,
- node->tqp_index);
- hnae_set_field(type_and_id, HCLGE_INT_GL_IDX_M,
- HCLGE_INT_GL_IDX_S,
- hnae_get_bit(node->flag, HNAE3_RING_TYPE_B));
-
- req->tqp_type_and_id[i] = cpu_to_le16(type_and_id);
- req->vfid = vport->vport_id;
-
- if (++i >= HCLGE_VECTOR_ELEMENTS_PER_CMD) {
- req->int_cause_num = HCLGE_VECTOR_ELEMENTS_PER_CMD;
-
- ret = hclge_cmd_send(&hdev->hw, &desc, 1);
- if (ret) {
- dev_err(&hdev->pdev->dev,
- "Unmap TQP fail, status is %d.\n",
- ret);
- return ret;
- }
- i = 0;
- hclge_cmd_setup_basic_desc(&desc,
- HCLGE_OPC_DEL_RING_TO_VECTOR,
- false);
- req->int_vector_id = vector_id;
- }
+ ret = hclge_bind_ring_with_vector(vport, vector_id, false, ring_chain);
+ if (ret) {
+ dev_err(&handle->pdev->dev,
+ "Unmap ring from vector fail. vectorid=%d, ret =%d\n",
+ vector_id,
+ ret);
+ return ret;
}
- if (i > 0) {
- req->int_cause_num = i;
-
- ret = hclge_cmd_send(&hdev->hw, &desc, 1);
- if (ret) {
- dev_err(&hdev->pdev->dev,
- "Unmap TQP fail, status is %d.\n", ret);
- return ret;
- }
- }
+ /* Free this MSIX or MSI vector */
+ hclge_free_vector(hdev, vector_id);
return 0;
}
@@ -4341,7 +4430,7 @@ static int hclge_get_reset_status(struct hclge_dev *hdev, u16 queue_id)
return hnae_get_bit(req->ready_to_reset, HCLGE_TQP_RESET_B);
}
-static void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
+void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
{
struct hclge_vport *vport = hclge_get_vport(handle);
struct hclge_dev *hdev = vport->back;
@@ -4661,6 +4750,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
hdev->pdev = pdev;
hdev->ae_dev = ae_dev;
hdev->reset_type = HNAE3_NONE_RESET;
+ hdev->reset_request = 0;
+ hdev->reset_pending = 0;
ae_dev->priv = hdev;
ret = hclge_pci_init(hdev);
@@ -4772,12 +4863,18 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
timer_setup(&hdev->service_timer, hclge_service_timer, 0);
INIT_WORK(&hdev->service_task, hclge_service_task);
+ INIT_WORK(&hdev->rst_service_task, hclge_reset_service_task);
+ INIT_WORK(&hdev->mbx_service_task, hclge_mailbox_service_task);
/* Enable MISC vector(vector0) */
hclge_enable_vector(&hdev->misc_vector, true);
set_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state);
set_bit(HCLGE_STATE_DOWN, &hdev->state);
+ clear_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state);
+ clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state);
+ clear_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state);
+ clear_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state);
pr_info("%s driver initialization finished.\n", HCLGE_DRIVER_NAME);
return 0;
@@ -4889,14 +4986,18 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
del_timer_sync(&hdev->service_timer);
if (hdev->service_task.func)
cancel_work_sync(&hdev->service_task);
+ if (hdev->rst_service_task.func)
+ cancel_work_sync(&hdev->rst_service_task);
+ if (hdev->mbx_service_task.func)
+ cancel_work_sync(&hdev->mbx_service_task);
if (mac->phydev)
mdiobus_unregister(mac->mdio_bus);
/* Disable MISC vector(vector0) */
hclge_enable_vector(&hdev->misc_vector, false);
- hclge_free_vector(hdev, 0);
hclge_destroy_cmd_queue(&hdev->hw);
+ hclge_misc_irq_uninit(hdev);
hclge_pci_uninit(hdev);
ae_dev->priv = NULL;
}
@@ -4906,8 +5007,8 @@ static const struct hnae3_ae_ops hclge_ops = {
.uninit_ae_dev = hclge_uninit_ae_dev,
.init_client_instance = hclge_init_client_instance,
.uninit_client_instance = hclge_uninit_client_instance,
- .map_ring_to_vector = hclge_map_handle_ring_to_vector,
- .unmap_ring_from_vector = hclge_unmap_ring_from_vector,
+ .map_ring_to_vector = hclge_map_ring_to_vector,
+ .unmap_ring_from_vector = hclge_unmap_ring_frm_vector,
.get_vector = hclge_get_vector,
.set_promisc_mode = hclge_set_promisc_mode,
.set_loopback = hclge_set_loopback,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 7027814ea5d7..fb043b54583d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -92,6 +92,11 @@
#define HCLGE_VECTOR0_CORERESET_INT_B 6
#define HCLGE_VECTOR0_IMPRESET_INT_B 7
+/* Vector0 interrupt CMDQ event source register(RW) */
+#define HCLGE_VECTOR0_CMDQ_SRC_REG 0x27100
+/* CMDQ register bits for RX event(=MBX event) */
+#define HCLGE_VECTOR0_RX_CMDQ_INT_B 1
+
enum HCLGE_DEV_STATE {
HCLGE_STATE_REINITING,
HCLGE_STATE_DOWN,
@@ -99,12 +104,19 @@ enum HCLGE_DEV_STATE {
HCLGE_STATE_REMOVING,
HCLGE_STATE_SERVICE_INITED,
HCLGE_STATE_SERVICE_SCHED,
+ HCLGE_STATE_RST_SERVICE_SCHED,
+ HCLGE_STATE_RST_HANDLING,
+ HCLGE_STATE_MBX_SERVICE_SCHED,
HCLGE_STATE_MBX_HANDLING,
- HCLGE_STATE_MBX_IRQ,
- HCLGE_STATE_RESET_INT,
HCLGE_STATE_MAX
};
+enum hclge_evt_cause {
+ HCLGE_VECTOR0_EVENT_RST,
+ HCLGE_VECTOR0_EVENT_MBX,
+ HCLGE_VECTOR0_EVENT_OTHER,
+};
+
#define HCLGE_MPF_ENBALE 1
struct hclge_caps {
u16 num_tqp;
@@ -420,6 +432,8 @@ struct hclge_dev {
unsigned long state;
enum hnae3_reset_type reset_type;
+ unsigned long reset_request; /* reset has been requested */
+ unsigned long reset_pending; /* client rst is pending to be served */
u32 fw_version;
u16 num_vmdq_vport; /* Num vmdq vport this PF has set up */
u16 num_tqps; /* Num task queue pairs of this PF */
@@ -469,6 +483,8 @@ struct hclge_dev {
unsigned long service_timer_previous;
struct timer_list service_timer;
struct work_struct service_task;
+ struct work_struct rst_service_task;
+ struct work_struct mbx_service_task;
bool cur_promisc;
int num_alloc_vfs; /* Actual number of VFs allocated */
@@ -529,8 +545,10 @@ int hclge_cfg_func_mta_filter(struct hclge_dev *hdev,
u8 func_id,
bool enable);
struct hclge_vport *hclge_get_vport(struct hnae3_handle *handle);
-int hclge_map_vport_ring_to_vector(struct hclge_vport *vport, int vector,
- struct hnae3_ring_chain_node *ring_chain);
+int hclge_bind_ring_with_vector(struct hclge_vport *vport,
+ int vector_id, bool en,
+ struct hnae3_ring_chain_node *ring_chain);
+
static inline int hclge_get_queue_id(struct hnae3_queue *queue)
{
struct hclge_tqp *tqp = container_of(queue, struct hclge_tqp, q);
@@ -544,4 +562,7 @@ int hclge_set_vf_vlan_common(struct hclge_dev *vport, int vfid,
int hclge_buffer_alloc(struct hclge_dev *hdev);
int hclge_rss_init_hw(struct hclge_dev *hdev);
+
+void hclge_mbx_handler(struct hclge_dev *hdev);
+void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id);
#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
new file mode 100644
index 000000000000..96f453ff84b5
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -0,0 +1,410 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (c) 2016-2017 Hisilicon Limited.
+
+#include "hclge_main.h"
+#include "hclge_mbx.h"
+#include "hnae3.h"
+
+/* hclge_gen_resp_to_vf: used to generate a synchronous response to VF when PF
+ * receives a mailbox message from VF.
+ * @vport: pointer to struct hclge_vport
+ * @vf_to_pf_req: pointer to hclge_mbx_vf_to_pf_cmd of the original mailbox
+ * message
+ * @resp_status: indicate to VF whether its request success(0) or failed.
+ */
+static int hclge_gen_resp_to_vf(struct hclge_vport *vport,
+ struct hclge_mbx_vf_to_pf_cmd *vf_to_pf_req,
+ int resp_status,
+ u8 *resp_data, u16 resp_data_len)
+{
+ struct hclge_mbx_pf_to_vf_cmd *resp_pf_to_vf;
+ struct hclge_dev *hdev = vport->back;
+ enum hclge_cmd_status status;
+ struct hclge_desc desc;
+
+ resp_pf_to_vf = (struct hclge_mbx_pf_to_vf_cmd *)desc.data;
+
+ if (resp_data_len > HCLGE_MBX_MAX_RESP_DATA_SIZE) {
+ dev_err(&hdev->pdev->dev,
+ "PF fail to gen resp to VF len %d exceeds max len %d\n",
+ resp_data_len,
+ HCLGE_MBX_MAX_RESP_DATA_SIZE);
+ }
+
+ hclge_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_MBX_PF_TO_VF, false);
+
+ resp_pf_to_vf->dest_vfid = vf_to_pf_req->mbx_src_vfid;
+ resp_pf_to_vf->msg_len = vf_to_pf_req->msg_len;
+
+ resp_pf_to_vf->msg[0] = HCLGE_MBX_PF_VF_RESP;
+ resp_pf_to_vf->msg[1] = vf_to_pf_req->msg[0];
+ resp_pf_to_vf->msg[2] = vf_to_pf_req->msg[1];
+ resp_pf_to_vf->msg[3] = (resp_status == 0) ? 0 : 1;
+
+ if (resp_data && resp_data_len > 0)
+ memcpy(&resp_pf_to_vf->msg[4], resp_data, resp_data_len);
+
+ status = hclge_cmd_send(&hdev->hw, &desc, 1);
+ if (status)
+ dev_err(&hdev->pdev->dev,
+ "PF failed(=%d) to send response to VF\n", status);
+
+ return status;
+}
+
+static int hclge_send_mbx_msg(struct hclge_vport *vport, u8 *msg, u16 msg_len,
+ u16 mbx_opcode, u8 dest_vfid)
+{
+ struct hclge_mbx_pf_to_vf_cmd *resp_pf_to_vf;
+ struct hclge_dev *hdev = vport->back;
+ enum hclge_cmd_status status;
+ struct hclge_desc desc;
+
+ resp_pf_to_vf = (struct hclge_mbx_pf_to_vf_cmd *)desc.data;
+
+ hclge_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_MBX_PF_TO_VF, false);
+
+ resp_pf_to_vf->dest_vfid = dest_vfid;
+ resp_pf_to_vf->msg_len = msg_len;
+ resp_pf_to_vf->msg[0] = mbx_opcode;
+
+ memcpy(&resp_pf_to_vf->msg[1], msg, msg_len);
+
+ status = hclge_cmd_send(&hdev->hw, &desc, 1);
+ if (status)
+ dev_err(&hdev->pdev->dev,
+ "PF failed(=%d) to send mailbox message to VF\n",
+ status);
+
+ return status;
+}
+
+static void hclge_free_vector_ring_chain(struct hnae3_ring_chain_node *head)
+{
+ struct hnae3_ring_chain_node *chain_tmp, *chain;
+
+ chain = head->next;
+
+ while (chain) {
+ chain_tmp = chain->next;
+ kzfree(chain);
+ chain = chain_tmp;
+ }
+}
+
+/* hclge_get_ring_chain_from_mbx: get ring type & tqpid from mailbox message
+ * msg[0]: opcode
+ * msg[1]: <not relevant to this function>
+ * msg[2]: ring_num
+ * msg[3]: first ring type (TX|RX)
+ * msg[4]: first tqp id
+ * msg[5] ~ msg[14]: other ring type and tqp id
+ */
+static int hclge_get_ring_chain_from_mbx(
+ struct hclge_mbx_vf_to_pf_cmd *req,
+ struct hnae3_ring_chain_node *ring_chain,
+ struct hclge_vport *vport)
+{
+#define HCLGE_RING_NODE_VARIABLE_NUM 3
+#define HCLGE_RING_MAP_MBX_BASIC_MSG_NUM 3
+ struct hnae3_ring_chain_node *cur_chain, *new_chain;
+ int ring_num;
+ int i;
+
+ ring_num = req->msg[2];
+
+ hnae_set_bit(ring_chain->flag, HNAE3_RING_TYPE_B, req->msg[3]);
+ ring_chain->tqp_index =
+ hclge_get_queue_id(vport->nic.kinfo.tqp[req->msg[4]]);
+
+ cur_chain = ring_chain;
+
+ for (i = 1; i < ring_num; i++) {
+ new_chain = kzalloc(sizeof(*new_chain), GFP_KERNEL);
+ if (!new_chain)
+ goto err;
+
+ hnae_set_bit(new_chain->flag, HNAE3_RING_TYPE_B,
+ req->msg[HCLGE_RING_NODE_VARIABLE_NUM * i +
+ HCLGE_RING_MAP_MBX_BASIC_MSG_NUM]);
+
+ new_chain->tqp_index =
+ hclge_get_queue_id(vport->nic.kinfo.tqp
+ [req->msg[HCLGE_RING_NODE_VARIABLE_NUM * i +
+ HCLGE_RING_MAP_MBX_BASIC_MSG_NUM + 1]]);
+
+ cur_chain->next = new_chain;
+ cur_chain = new_chain;
+ }
+
+ return 0;
+err:
+ hclge_free_vector_ring_chain(ring_chain);
+ return -ENOMEM;
+}
+
+static int hclge_map_unmap_ring_to_vf_vector(struct hclge_vport *vport, bool en,
+ struct hclge_mbx_vf_to_pf_cmd *req)
+{
+ struct hnae3_ring_chain_node ring_chain;
+ int vector_id = req->msg[1];
+ int ret;
+
+ memset(&ring_chain, 0, sizeof(ring_chain));
+ ret = hclge_get_ring_chain_from_mbx(req, &ring_chain, vport);
+ if (ret)
+ return ret;
+
+ ret = hclge_bind_ring_with_vector(vport, vector_id, en, &ring_chain);
+ if (ret)
+ return ret;
+
+ hclge_free_vector_ring_chain(&ring_chain);
+
+ return 0;
+}
+
+static int hclge_set_vf_promisc_mode(struct hclge_vport *vport,
+ struct hclge_mbx_vf_to_pf_cmd *req)
+{
+ bool en = req->msg[1] ? true : false;
+ struct hclge_promisc_param param;
+
+ /* always enable broadcast promisc bit */
+ hclge_promisc_param_init(&param, en, en, true, vport->vport_id);
+ return hclge_cmd_set_promisc_mode(vport->back, &param);
+}
+
+static int hclge_set_vf_uc_mac_addr(struct hclge_vport *vport,
+ struct hclge_mbx_vf_to_pf_cmd *mbx_req,
+ bool gen_resp)
+{
+ const u8 *mac_addr = (const u8 *)(&mbx_req->msg[2]);
+ struct hclge_dev *hdev = vport->back;
+ int status;
+
+ if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_UC_MODIFY) {
+ const u8 *old_addr = (const u8 *)(&mbx_req->msg[8]);
+
+ hclge_rm_uc_addr_common(vport, old_addr);
+ status = hclge_add_uc_addr_common(vport, mac_addr);
+ } else if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_UC_ADD) {
+ status = hclge_add_uc_addr_common(vport, mac_addr);
+ } else if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_UC_REMOVE) {
+ status = hclge_rm_uc_addr_common(vport, mac_addr);
+ } else {
+ dev_err(&hdev->pdev->dev,
+ "failed to set unicast mac addr, unknown subcode %d\n",
+ mbx_req->msg[1]);
+ return -EIO;
+ }
+
+ if (gen_resp)
+ hclge_gen_resp_to_vf(vport, mbx_req, status, NULL, 0);
+
+ return 0;
+}
+
+static int hclge_set_vf_mc_mac_addr(struct hclge_vport *vport,
+ struct hclge_mbx_vf_to_pf_cmd *mbx_req,
+ bool gen_resp)
+{
+ const u8 *mac_addr = (const u8 *)(&mbx_req->msg[2]);
+ struct hclge_dev *hdev = vport->back;
+ int status;
+
+ if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_MC_ADD) {
+ status = hclge_add_mc_addr_common(vport, mac_addr);
+ } else if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_MC_REMOVE) {
+ status = hclge_rm_mc_addr_common(vport, mac_addr);
+ } else if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_MC_FUNC_MTA_ENABLE) {
+ u8 func_id = vport->vport_id;
+ bool enable = mbx_req->msg[2];
+
+ status = hclge_cfg_func_mta_filter(hdev, func_id, enable);
+ } else {
+ dev_err(&hdev->pdev->dev,
+ "failed to set mcast mac addr, unknown subcode %d\n",
+ mbx_req->msg[1]);
+ return -EIO;
+ }
+
+ if (gen_resp)
+ hclge_gen_resp_to_vf(vport, mbx_req, status, NULL, 0);
+
+ return 0;
+}
+
+static int hclge_set_vf_vlan_cfg(struct hclge_vport *vport,
+ struct hclge_mbx_vf_to_pf_cmd *mbx_req,
+ bool gen_resp)
+{
+ struct hclge_dev *hdev = vport->back;
+ int status = 0;
+
+ if (mbx_req->msg[1] == HCLGE_MBX_VLAN_FILTER) {
+ u16 vlan, proto;
+ bool is_kill;
+
+ is_kill = !!mbx_req->msg[2];
+ memcpy(&vlan, &mbx_req->msg[3], sizeof(vlan));
+ memcpy(&proto, &mbx_req->msg[5], sizeof(proto));
+ status = hclge_set_vf_vlan_common(hdev, vport->vport_id,
+ is_kill, vlan, 0,
+ cpu_to_be16(proto));
+ }
+
+ if (gen_resp)
+ status = hclge_gen_resp_to_vf(vport, mbx_req, status, NULL, 0);
+
+ return status;
+}
+
+static int hclge_get_vf_tcinfo(struct hclge_vport *vport,
+ struct hclge_mbx_vf_to_pf_cmd *mbx_req,
+ bool gen_resp)
+{
+ struct hclge_dev *hdev = vport->back;
+ int ret;
+
+ ret = hclge_gen_resp_to_vf(vport, mbx_req, 0, &hdev->hw_tc_map,
+ sizeof(u8));
+
+ return ret;
+}
+
+static int hclge_get_vf_queue_info(struct hclge_vport *vport,
+ struct hclge_mbx_vf_to_pf_cmd *mbx_req,
+ bool gen_resp)
+{
+#define HCLGE_TQPS_RSS_INFO_LEN 8
+ u8 resp_data[HCLGE_TQPS_RSS_INFO_LEN];
+ struct hclge_dev *hdev = vport->back;
+
+ /* get the queue related info */
+ memcpy(&resp_data[0], &vport->alloc_tqps, sizeof(u16));
+ memcpy(&resp_data[2], &hdev->rss_size_max, sizeof(u16));
+ memcpy(&resp_data[4], &hdev->num_desc, sizeof(u16));
+ memcpy(&resp_data[6], &hdev->rx_buf_len, sizeof(u16));
+
+ return hclge_gen_resp_to_vf(vport, mbx_req, 0, resp_data,
+ HCLGE_TQPS_RSS_INFO_LEN);
+}
+
+static int hclge_get_link_info(struct hclge_vport *vport,
+ struct hclge_mbx_vf_to_pf_cmd *mbx_req)
+{
+ struct hclge_dev *hdev = vport->back;
+ u16 link_status;
+ u8 msg_data[2];
+ u8 dest_vfid;
+
+ /* mac.link can only be 0 or 1 */
+ link_status = (u16)hdev->hw.mac.link;
+ memcpy(&msg_data[0], &link_status, sizeof(u16));
+ dest_vfid = mbx_req->mbx_src_vfid;
+
+ /* send this requested info to VF */
+ return hclge_send_mbx_msg(vport, msg_data, sizeof(u8),
+ HCLGE_MBX_LINK_STAT_CHANGE, dest_vfid);
+}
+
+static void hclge_reset_vf_queue(struct hclge_vport *vport,
+ struct hclge_mbx_vf_to_pf_cmd *mbx_req)
+{
+ u16 queue_id;
+
+ memcpy(&queue_id, &mbx_req->msg[2], sizeof(queue_id));
+
+ hclge_reset_tqp(&vport->nic, queue_id);
+}
+
+void hclge_mbx_handler(struct hclge_dev *hdev)
+{
+ struct hclge_cmq_ring *crq = &hdev->hw.cmq.crq;
+ struct hclge_mbx_vf_to_pf_cmd *req;
+ struct hclge_vport *vport;
+ struct hclge_desc *desc;
+ int ret;
+
+ /* handle all the mailbox requests in the queue */
+ while (hnae_get_bit(crq->desc[crq->next_to_use].flag,
+ HCLGE_CMDQ_RX_OUTVLD_B)) {
+ desc = &crq->desc[crq->next_to_use];
+ req = (struct hclge_mbx_vf_to_pf_cmd *)desc->data;
+
+ vport = &hdev->vport[req->mbx_src_vfid];
+
+ switch (req->msg[0]) {
+ case HCLGE_MBX_MAP_RING_TO_VECTOR:
+ ret = hclge_map_unmap_ring_to_vf_vector(vport, true,
+ req);
+ break;
+ case HCLGE_MBX_UNMAP_RING_TO_VECTOR:
+ ret = hclge_map_unmap_ring_to_vf_vector(vport, false,
+ req);
+ break;
+ case HCLGE_MBX_SET_PROMISC_MODE:
+ ret = hclge_set_vf_promisc_mode(vport, req);
+ if (ret)
+ dev_err(&hdev->pdev->dev,
+ "PF fail(%d) to set VF promisc mode\n",
+ ret);
+ break;
+ case HCLGE_MBX_SET_UNICAST:
+ ret = hclge_set_vf_uc_mac_addr(vport, req, false);
+ if (ret)
+ dev_err(&hdev->pdev->dev,
+ "PF fail(%d) to set VF UC MAC Addr\n",
+ ret);
+ break;
+ case HCLGE_MBX_SET_MULTICAST:
+ ret = hclge_set_vf_mc_mac_addr(vport, req, false);
+ if (ret)
+ dev_err(&hdev->pdev->dev,
+ "PF fail(%d) to set VF MC MAC Addr\n",
+ ret);
+ break;
+ case HCLGE_MBX_SET_VLAN:
+ ret = hclge_set_vf_vlan_cfg(vport, req, false);
+ if (ret)
+ dev_err(&hdev->pdev->dev,
+ "PF failed(%d) to config VF's VLAN\n",
+ ret);
+ break;
+ case HCLGE_MBX_GET_QINFO:
+ ret = hclge_get_vf_queue_info(vport, req, true);
+ if (ret)
+ dev_err(&hdev->pdev->dev,
+ "PF failed(%d) to get Q info for VF\n",
+ ret);
+ break;
+ case HCLGE_MBX_GET_TCINFO:
+ ret = hclge_get_vf_tcinfo(vport, req, true);
+ if (ret)
+ dev_err(&hdev->pdev->dev,
+ "PF failed(%d) to get TC info for VF\n",
+ ret);
+ break;
+ case HCLGE_MBX_GET_LINK_STATUS:
+ ret = hclge_get_link_info(vport, req);
+ if (ret)
+ dev_err(&hdev->pdev->dev,
+ "PF fail(%d) to get link stat for VF\n",
+ ret);
+ break;
+ case HCLGE_MBX_QUEUE_RESET:
+ hclge_reset_vf_queue(vport, req);
+ break;
+ default:
+ dev_err(&hdev->pdev->dev,
+ "un-supported mailbox message, code = %d\n",
+ req->msg[0]);
+ break;
+ }
+ hclge_mbx_ring_ptr_move_crq(crq);
+ }
+
+ /* Write back CMDQ_RQ header pointer, M7 need this pointer */
+ hclge_write_dev(&hdev->hw, HCLGE_NIC_CRQ_HEAD_REG, crq->next_to_use);
+}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile
new file mode 100644
index 000000000000..fb93bbd35845
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Makefile for the HISILICON network device drivers.
+#
+
+ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3
+
+obj-$(CONFIG_HNS3_HCLGEVF) += hclgevf.o
+hclgevf-objs = hclgevf_main.o hclgevf_cmd.o hclgevf_mbx.o \ No newline at end of file
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
new file mode 100644
index 000000000000..85985e731311
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (c) 2016-2017 Hisilicon Limited.
+
+#include <linux/device.h>
+#include <linux/dma-direction.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include "hclgevf_cmd.h"
+#include "hclgevf_main.h"
+#include "hnae3.h"
+
+#define hclgevf_is_csq(ring) ((ring)->flag & HCLGEVF_TYPE_CSQ)
+#define hclgevf_ring_to_dma_dir(ring) (hclgevf_is_csq(ring) ? \
+ DMA_TO_DEVICE : DMA_FROM_DEVICE)
+#define cmq_ring_to_dev(ring) (&(ring)->dev->pdev->dev)
+
+static int hclgevf_ring_space(struct hclgevf_cmq_ring *ring)
+{
+ int ntc = ring->next_to_clean;
+ int ntu = ring->next_to_use;
+ int used;
+
+ used = (ntu - ntc + ring->desc_num) % ring->desc_num;
+
+ return ring->desc_num - used - 1;
+}
+
+static int hclgevf_cmd_csq_clean(struct hclgevf_hw *hw)
+{
+ struct hclgevf_cmq_ring *csq = &hw->cmq.csq;
+ u16 ntc = csq->next_to_clean;
+ struct hclgevf_desc *desc;
+ int clean = 0;
+ u32 head;
+
+ desc = &csq->desc[ntc];
+ head = hclgevf_read_dev(hw, HCLGEVF_NIC_CSQ_HEAD_REG);
+ while (head != ntc) {
+ memset(desc, 0, sizeof(*desc));
+ ntc++;
+ if (ntc == csq->desc_num)
+ ntc = 0;
+ desc = &csq->desc[ntc];
+ clean++;
+ }
+ csq->next_to_clean = ntc;
+
+ return clean;
+}
+
+static bool hclgevf_cmd_csq_done(struct hclgevf_hw *hw)
+{
+ u32 head;
+
+ head = hclgevf_read_dev(hw, HCLGEVF_NIC_CSQ_HEAD_REG);
+
+ return head == hw->cmq.csq.next_to_use;
+}
+
+static bool hclgevf_is_special_opcode(u16 opcode)
+{
+ u16 spec_opcode[] = {0x30, 0x31, 0x32};
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(spec_opcode); i++) {
+ if (spec_opcode[i] == opcode)
+ return true;
+ }
+
+ return false;
+}
+
+static int hclgevf_alloc_cmd_desc(struct hclgevf_cmq_ring *ring)
+{
+ int size = ring->desc_num * sizeof(struct hclgevf_desc);
+
+ ring->desc = kzalloc(size, GFP_KERNEL);
+ if (!ring->desc)
+ return -ENOMEM;
+
+ ring->desc_dma_addr = dma_map_single(cmq_ring_to_dev(ring), ring->desc,
+ size, DMA_BIDIRECTIONAL);
+
+ if (dma_mapping_error(cmq_ring_to_dev(ring), ring->desc_dma_addr)) {
+ ring->desc_dma_addr = 0;
+ kfree(ring->desc);
+ ring->desc = NULL;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void hclgevf_free_cmd_desc(struct hclgevf_cmq_ring *ring)
+{
+ dma_unmap_single(cmq_ring_to_dev(ring), ring->desc_dma_addr,
+ ring->desc_num * sizeof(ring->desc[0]),
+ hclgevf_ring_to_dma_dir(ring));
+
+ ring->desc_dma_addr = 0;
+ kfree(ring->desc);
+ ring->desc = NULL;
+}
+
+static int hclgevf_init_cmd_queue(struct hclgevf_dev *hdev,
+ struct hclgevf_cmq_ring *ring)
+{
+ struct hclgevf_hw *hw = &hdev->hw;
+ int ring_type = ring->flag;
+ u32 reg_val;
+ int ret;
+
+ ring->desc_num = HCLGEVF_NIC_CMQ_DESC_NUM;
+ spin_lock_init(&ring->lock);
+ ring->next_to_clean = 0;
+ ring->next_to_use = 0;
+ ring->dev = hdev;
+
+ /* allocate CSQ/CRQ descriptor */
+ ret = hclgevf_alloc_cmd_desc(ring);
+ if (ret) {
+ dev_err(&hdev->pdev->dev, "failed(%d) to alloc %s desc\n", ret,
+ (ring_type == HCLGEVF_TYPE_CSQ) ? "CSQ" : "CRQ");
+ return ret;
+ }
+
+ /* initialize the hardware registers with csq/crq dma-address,
+ * descriptor number, head & tail pointers
+ */
+ switch (ring_type) {
+ case HCLGEVF_TYPE_CSQ:
+ reg_val = (u32)ring->desc_dma_addr;
+ hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_BASEADDR_L_REG, reg_val);
+ reg_val = (u32)((ring->desc_dma_addr >> 31) >> 1);
+ hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_BASEADDR_H_REG, reg_val);
+
+ reg_val = (ring->desc_num >> HCLGEVF_NIC_CMQ_DESC_NUM_S);
+ reg_val |= HCLGEVF_NIC_CMQ_ENABLE;
+ hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_DEPTH_REG, reg_val);
+
+ hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_TAIL_REG, 0);
+ hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_HEAD_REG, 0);
+ break;
+ case HCLGEVF_TYPE_CRQ:
+ reg_val = (u32)ring->desc_dma_addr;
+ hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_BASEADDR_L_REG, reg_val);
+ reg_val = (u32)((ring->desc_dma_addr >> 31) >> 1);
+ hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_BASEADDR_H_REG, reg_val);
+
+ reg_val = (ring->desc_num >> HCLGEVF_NIC_CMQ_DESC_NUM_S);
+ reg_val |= HCLGEVF_NIC_CMQ_ENABLE;
+ hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_DEPTH_REG, reg_val);
+
+ hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_TAIL_REG, 0);
+ hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_HEAD_REG, 0);
+ break;
+ }
+
+ return 0;
+}
+
+void hclgevf_cmd_setup_basic_desc(struct hclgevf_desc *desc,
+ enum hclgevf_opcode_type opcode, bool is_read)
+{
+ memset(desc, 0, sizeof(struct hclgevf_desc));
+ desc->opcode = cpu_to_le16(opcode);
+ desc->flag = cpu_to_le16(HCLGEVF_CMD_FLAG_NO_INTR |
+ HCLGEVF_CMD_FLAG_IN);
+ if (is_read)
+ desc->flag |= cpu_to_le16(HCLGEVF_CMD_FLAG_WR);
+ else
+ desc->flag &= cpu_to_le16(~HCLGEVF_CMD_FLAG_WR);
+}
+
+/* hclgevf_cmd_send - send command to command queue
+ * @hw: pointer to the hw struct
+ * @desc: prefilled descriptor for describing the command
+ * @num : the number of descriptors to be sent
+ *
+ * This is the main send command for command queue, it
+ * sends the queue, cleans the queue, etc
+ */
+int hclgevf_cmd_send(struct hclgevf_hw *hw, struct hclgevf_desc *desc, int num)
+{
+ struct hclgevf_dev *hdev = (struct hclgevf_dev *)hw->hdev;
+ struct hclgevf_desc *desc_to_use;
+ bool complete = false;
+ u32 timeout = 0;
+ int handle = 0;
+ int status = 0;
+ u16 retval;
+ u16 opcode;
+ int ntc;
+
+ spin_lock_bh(&hw->cmq.csq.lock);
+
+ if (num > hclgevf_ring_space(&hw->cmq.csq)) {
+ spin_unlock_bh(&hw->cmq.csq.lock);
+ return -EBUSY;
+ }
+
+ /* Record the location of desc in the ring for this time
+ * which will be use for hardware to write back
+ */
+ ntc = hw->cmq.csq.next_to_use;
+ opcode = le16_to_cpu(desc[0].opcode);
+ while (handle < num) {
+ desc_to_use = &hw->cmq.csq.desc[hw->cmq.csq.next_to_use];
+ *desc_to_use = desc[handle];
+ (hw->cmq.csq.next_to_use)++;
+ if (hw->cmq.csq.next_to_use == hw->cmq.csq.desc_num)
+ hw->cmq.csq.next_to_use = 0;
+ handle++;
+ }
+
+ /* Write to hardware */
+ hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_TAIL_REG,
+ hw->cmq.csq.next_to_use);
+
+ /* If the command is sync, wait for the firmware to write back,
+ * if multi descriptors to be sent, use the first one to check
+ */
+ if (HCLGEVF_SEND_SYNC(le16_to_cpu(desc->flag))) {
+ do {
+ if (hclgevf_cmd_csq_done(hw))
+ break;
+ udelay(1);
+ timeout++;
+ } while (timeout < hw->cmq.tx_timeout);
+ }
+
+ if (hclgevf_cmd_csq_done(hw)) {
+ complete = true;
+ handle = 0;
+
+ while (handle < num) {
+ /* Get the result of hardware write back */
+ desc_to_use = &hw->cmq.csq.desc[ntc];
+ desc[handle] = *desc_to_use;
+
+ if (likely(!hclgevf_is_special_opcode(opcode)))
+ retval = le16_to_cpu(desc[handle].retval);
+ else
+ retval = le16_to_cpu(desc[0].retval);
+
+ if ((enum hclgevf_cmd_return_status)retval ==
+ HCLGEVF_CMD_EXEC_SUCCESS)
+ status = 0;
+ else
+ status = -EIO;
+ hw->cmq.last_status = (enum hclgevf_cmd_status)retval;
+ ntc++;
+ handle++;
+ if (ntc == hw->cmq.csq.desc_num)
+ ntc = 0;
+ }
+ }
+
+ if (!complete)
+ status = -EAGAIN;
+
+ /* Clean the command send queue */
+ handle = hclgevf_cmd_csq_clean(hw);
+ if (handle != num) {
+ dev_warn(&hdev->pdev->dev,
+ "cleaned %d, need to clean %d\n", handle, num);
+ }
+
+ spin_unlock_bh(&hw->cmq.csq.lock);
+
+ return status;
+}
+
+static int hclgevf_cmd_query_firmware_version(struct hclgevf_hw *hw,
+ u32 *version)
+{
+ struct hclgevf_query_version_cmd *resp;
+ struct hclgevf_desc desc;
+ int status;
+
+ resp = (struct hclgevf_query_version_cmd *)desc.data;
+
+ hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_QUERY_FW_VER, 1);
+ status = hclgevf_cmd_send(hw, &desc, 1);
+ if (!status)
+ *version = le32_to_cpu(resp->firmware);
+
+ return status;
+}
+
+int hclgevf_cmd_init(struct hclgevf_dev *hdev)
+{
+ u32 version;
+ int ret;
+
+ /* setup Tx write back timeout */
+ hdev->hw.cmq.tx_timeout = HCLGEVF_CMDQ_TX_TIMEOUT;
+
+ /* setup queue CSQ/CRQ rings */
+ hdev->hw.cmq.csq.flag = HCLGEVF_TYPE_CSQ;
+ ret = hclgevf_init_cmd_queue(hdev, &hdev->hw.cmq.csq);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "failed(%d) to initialize CSQ ring\n", ret);
+ return ret;
+ }
+
+ hdev->hw.cmq.crq.flag = HCLGEVF_TYPE_CRQ;
+ ret = hclgevf_init_cmd_queue(hdev, &hdev->hw.cmq.crq);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "failed(%d) to initialize CRQ ring\n", ret);
+ goto err_csq;
+ }
+
+ /* get firmware version */
+ ret = hclgevf_cmd_query_firmware_version(&hdev->hw, &version);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "failed(%d) to query firmware version\n", ret);
+ goto err_crq;
+ }
+ hdev->fw_version = version;
+
+ dev_info(&hdev->pdev->dev, "The firmware version is %08x\n", version);
+
+ return 0;
+err_crq:
+ hclgevf_free_cmd_desc(&hdev->hw.cmq.crq);
+err_csq:
+ hclgevf_free_cmd_desc(&hdev->hw.cmq.csq);
+
+ return ret;
+}
+
+void hclgevf_cmd_uninit(struct hclgevf_dev *hdev)
+{
+ hclgevf_free_cmd_desc(&hdev->hw.cmq.csq);
+ hclgevf_free_cmd_desc(&hdev->hw.cmq.crq);
+}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
new file mode 100644
index 000000000000..ad8adfecbb22
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
@@ -0,0 +1,256 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* Copyright (c) 2016-2017 Hisilicon Limited. */
+
+#ifndef __HCLGEVF_CMD_H
+#define __HCLGEVF_CMD_H
+#include <linux/io.h>
+#include <linux/types.h>
+#include "hnae3.h"
+
+#define HCLGEVF_CMDQ_TX_TIMEOUT 200
+#define HCLGEVF_CMDQ_RX_INVLD_B 0
+#define HCLGEVF_CMDQ_RX_OUTVLD_B 1
+
+struct hclgevf_hw;
+struct hclgevf_dev;
+
+struct hclgevf_desc {
+ __le16 opcode;
+ __le16 flag;
+ __le16 retval;
+ __le16 rsv;
+ __le32 data[6];
+};
+
+struct hclgevf_desc_cb {
+ dma_addr_t dma;
+ void *va;
+ u32 length;
+};
+
+struct hclgevf_cmq_ring {
+ dma_addr_t desc_dma_addr;
+ struct hclgevf_desc *desc;
+ struct hclgevf_desc_cb *desc_cb;
+ struct hclgevf_dev *dev;
+ u32 head;
+ u32 tail;
+
+ u16 buf_size;
+ u16 desc_num;
+ int next_to_use;
+ int next_to_clean;
+ u8 flag;
+ spinlock_t lock; /* Command queue lock */
+};
+
+enum hclgevf_cmd_return_status {
+ HCLGEVF_CMD_EXEC_SUCCESS = 0,
+ HCLGEVF_CMD_NO_AUTH = 1,
+ HCLGEVF_CMD_NOT_EXEC = 2,
+ HCLGEVF_CMD_QUEUE_FULL = 3,
+};
+
+enum hclgevf_cmd_status {
+ HCLGEVF_STATUS_SUCCESS = 0,
+ HCLGEVF_ERR_CSQ_FULL = -1,
+ HCLGEVF_ERR_CSQ_TIMEOUT = -2,
+ HCLGEVF_ERR_CSQ_ERROR = -3
+};
+
+struct hclgevf_cmq {
+ struct hclgevf_cmq_ring csq;
+ struct hclgevf_cmq_ring crq;
+ u16 tx_timeout; /* Tx timeout */
+ enum hclgevf_cmd_status last_status;
+};
+
+#define HCLGEVF_CMD_FLAG_IN_VALID_SHIFT 0
+#define HCLGEVF_CMD_FLAG_OUT_VALID_SHIFT 1
+#define HCLGEVF_CMD_FLAG_NEXT_SHIFT 2
+#define HCLGEVF_CMD_FLAG_WR_OR_RD_SHIFT 3
+#define HCLGEVF_CMD_FLAG_NO_INTR_SHIFT 4
+#define HCLGEVF_CMD_FLAG_ERR_INTR_SHIFT 5
+
+#define HCLGEVF_CMD_FLAG_IN BIT(HCLGEVF_CMD_FLAG_IN_VALID_SHIFT)
+#define HCLGEVF_CMD_FLAG_OUT BIT(HCLGEVF_CMD_FLAG_OUT_VALID_SHIFT)
+#define HCLGEVF_CMD_FLAG_NEXT BIT(HCLGEVF_CMD_FLAG_NEXT_SHIFT)
+#define HCLGEVF_CMD_FLAG_WR BIT(HCLGEVF_CMD_FLAG_WR_OR_RD_SHIFT)
+#define HCLGEVF_CMD_FLAG_NO_INTR BIT(HCLGEVF_CMD_FLAG_NO_INTR_SHIFT)
+#define HCLGEVF_CMD_FLAG_ERR_INTR BIT(HCLGEVF_CMD_FLAG_ERR_INTR_SHIFT)
+
+enum hclgevf_opcode_type {
+ /* Generic command */
+ HCLGEVF_OPC_QUERY_FW_VER = 0x0001,
+ /* TQP command */
+ HCLGEVF_OPC_QUERY_TX_STATUS = 0x0B03,
+ HCLGEVF_OPC_QUERY_RX_STATUS = 0x0B13,
+ HCLGEVF_OPC_CFG_COM_TQP_QUEUE = 0x0B20,
+ /* TSO cmd */
+ HCLGEVF_OPC_TSO_GENERIC_CONFIG = 0x0C01,
+ /* RSS cmd */
+ HCLGEVF_OPC_RSS_GENERIC_CONFIG = 0x0D01,
+ HCLGEVF_OPC_RSS_INDIR_TABLE = 0x0D07,
+ HCLGEVF_OPC_RSS_TC_MODE = 0x0D08,
+ /* Mailbox cmd */
+ HCLGEVF_OPC_MBX_VF_TO_PF = 0x2001,
+};
+
+#define HCLGEVF_TQP_REG_OFFSET 0x80000
+#define HCLGEVF_TQP_REG_SIZE 0x200
+
+struct hclgevf_tqp_map {
+ __le16 tqp_id; /* Absolute tqp id for in this pf */
+ u8 tqp_vf; /* VF id */
+#define HCLGEVF_TQP_MAP_TYPE_PF 0
+#define HCLGEVF_TQP_MAP_TYPE_VF 1
+#define HCLGEVF_TQP_MAP_TYPE_B 0
+#define HCLGEVF_TQP_MAP_EN_B 1
+ u8 tqp_flag; /* Indicate it's pf or vf tqp */
+ __le16 tqp_vid; /* Virtual id in this pf/vf */
+ u8 rsv[18];
+};
+
+#define HCLGEVF_VECTOR_ELEMENTS_PER_CMD 10
+
+enum hclgevf_int_type {
+ HCLGEVF_INT_TX = 0,
+ HCLGEVF_INT_RX,
+ HCLGEVF_INT_EVENT,
+};
+
+struct hclgevf_ctrl_vector_chain {
+ u8 int_vector_id;
+ u8 int_cause_num;
+#define HCLGEVF_INT_TYPE_S 0
+#define HCLGEVF_INT_TYPE_M 0x3
+#define HCLGEVF_TQP_ID_S 2
+#define HCLGEVF_TQP_ID_M (0x3fff << HCLGEVF_TQP_ID_S)
+ __le16 tqp_type_and_id[HCLGEVF_VECTOR_ELEMENTS_PER_CMD];
+ u8 vfid;
+ u8 resv;
+};
+
+struct hclgevf_query_version_cmd {
+ __le32 firmware;
+ __le32 firmware_rsv[5];
+};
+
+#define HCLGEVF_RSS_HASH_KEY_OFFSET 4
+#define HCLGEVF_RSS_HASH_KEY_NUM 16
+struct hclgevf_rss_config_cmd {
+ u8 hash_config;
+ u8 rsv[7];
+ u8 hash_key[HCLGEVF_RSS_HASH_KEY_NUM];
+};
+
+struct hclgevf_rss_input_tuple_cmd {
+ u8 ipv4_tcp_en;
+ u8 ipv4_udp_en;
+ u8 ipv4_stcp_en;
+ u8 ipv4_fragment_en;
+ u8 ipv6_tcp_en;
+ u8 ipv6_udp_en;
+ u8 ipv6_stcp_en;
+ u8 ipv6_fragment_en;
+ u8 rsv[16];
+};
+
+#define HCLGEVF_RSS_CFG_TBL_SIZE 16
+
+struct hclgevf_rss_indirection_table_cmd {
+ u16 start_table_index;
+ u16 rss_set_bitmap;
+ u8 rsv[4];
+ u8 rss_result[HCLGEVF_RSS_CFG_TBL_SIZE];
+};
+
+#define HCLGEVF_RSS_TC_OFFSET_S 0
+#define HCLGEVF_RSS_TC_OFFSET_M (0x3ff << HCLGEVF_RSS_TC_OFFSET_S)
+#define HCLGEVF_RSS_TC_SIZE_S 12
+#define HCLGEVF_RSS_TC_SIZE_M (0x7 << HCLGEVF_RSS_TC_SIZE_S)
+#define HCLGEVF_RSS_TC_VALID_B 15
+#define HCLGEVF_MAX_TC_NUM 8
+struct hclgevf_rss_tc_mode_cmd {
+ u16 rss_tc_mode[HCLGEVF_MAX_TC_NUM];
+ u8 rsv[8];
+};
+
+#define HCLGEVF_LINK_STS_B 0
+#define HCLGEVF_LINK_STATUS BIT(HCLGEVF_LINK_STS_B)
+struct hclgevf_link_status_cmd {
+ u8 status;
+ u8 rsv[23];
+};
+
+#define HCLGEVF_RING_ID_MASK 0x3ff
+#define HCLGEVF_TQP_ENABLE_B 0
+
+struct hclgevf_cfg_com_tqp_queue_cmd {
+ __le16 tqp_id;
+ __le16 stream_id;
+ u8 enable;
+ u8 rsv[19];
+};
+
+struct hclgevf_cfg_tx_queue_pointer_cmd {
+ __le16 tqp_id;
+ __le16 tx_tail;
+ __le16 tx_head;
+ __le16 fbd_num;
+ __le16 ring_offset;
+ u8 rsv[14];
+};
+
+#define HCLGEVF_TSO_ENABLE_B 0
+struct hclgevf_cfg_tso_status_cmd {
+ u8 tso_enable;
+ u8 rsv[23];
+};
+
+#define HCLGEVF_TYPE_CRQ 0
+#define HCLGEVF_TYPE_CSQ 1
+#define HCLGEVF_NIC_CSQ_BASEADDR_L_REG 0x27000
+#define HCLGEVF_NIC_CSQ_BASEADDR_H_REG 0x27004
+#define HCLGEVF_NIC_CSQ_DEPTH_REG 0x27008
+#define HCLGEVF_NIC_CSQ_TAIL_REG 0x27010
+#define HCLGEVF_NIC_CSQ_HEAD_REG 0x27014
+#define HCLGEVF_NIC_CRQ_BASEADDR_L_REG 0x27018
+#define HCLGEVF_NIC_CRQ_BASEADDR_H_REG 0x2701c
+#define HCLGEVF_NIC_CRQ_DEPTH_REG 0x27020
+#define HCLGEVF_NIC_CRQ_TAIL_REG 0x27024
+#define HCLGEVF_NIC_CRQ_HEAD_REG 0x27028
+#define HCLGEVF_NIC_CMQ_EN_B 16
+#define HCLGEVF_NIC_CMQ_ENABLE BIT(HCLGEVF_NIC_CMQ_EN_B)
+#define HCLGEVF_NIC_CMQ_DESC_NUM 1024
+#define HCLGEVF_NIC_CMQ_DESC_NUM_S 3
+#define HCLGEVF_NIC_CMDQ_INT_SRC_REG 0x27100
+
+static inline void hclgevf_write_reg(void __iomem *base, u32 reg, u32 value)
+{
+ writel(value, base + reg);
+}
+
+static inline u32 hclgevf_read_reg(u8 __iomem *base, u32 reg)
+{
+ u8 __iomem *reg_addr = READ_ONCE(base);
+
+ return readl(reg_addr + reg);
+}
+
+#define hclgevf_write_dev(a, reg, value) \
+ hclgevf_write_reg((a)->io_base, (reg), (value))
+#define hclgevf_read_dev(a, reg) \
+ hclgevf_read_reg((a)->io_base, (reg))
+
+#define HCLGEVF_SEND_SYNC(flag) \
+ ((flag) & HCLGEVF_CMD_FLAG_NO_INTR)
+
+int hclgevf_cmd_init(struct hclgevf_dev *hdev);
+void hclgevf_cmd_uninit(struct hclgevf_dev *hdev);
+
+int hclgevf_cmd_send(struct hclgevf_hw *hw, struct hclgevf_desc *desc, int num);
+void hclgevf_cmd_setup_basic_desc(struct hclgevf_desc *desc,
+ enum hclgevf_opcode_type opcode,
+ bool is_read);
+#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
new file mode 100644
index 000000000000..31866057bc7d
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -0,0 +1,1490 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (c) 2016-2017 Hisilicon Limited.
+
+#include <linux/etherdevice.h>
+#include "hclgevf_cmd.h"
+#include "hclgevf_main.h"
+#include "hclge_mbx.h"
+#include "hnae3.h"
+
+#define HCLGEVF_NAME "hclgevf"
+
+static struct hnae3_ae_algo ae_algovf;
+
+static const struct pci_device_id ae_algovf_pci_tbl[] = {
+ {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_VF), 0},
+ {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_DCB_PFC_VF), 0},
+ /* required last entry */
+ {0, }
+};
+
+static inline struct hclgevf_dev *hclgevf_ae_get_hdev(
+ struct hnae3_handle *handle)
+{
+ return container_of(handle, struct hclgevf_dev, nic);
+}
+
+static int hclgevf_tqps_update_stats(struct hnae3_handle *handle)
+{
+ struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+ struct hnae3_queue *queue;
+ struct hclgevf_desc desc;
+ struct hclgevf_tqp *tqp;
+ int status;
+ int i;
+
+ for (i = 0; i < hdev->num_tqps; i++) {
+ queue = handle->kinfo.tqp[i];
+ tqp = container_of(queue, struct hclgevf_tqp, q);
+ hclgevf_cmd_setup_basic_desc(&desc,
+ HCLGEVF_OPC_QUERY_RX_STATUS,
+ true);
+
+ desc.data[0] = cpu_to_le32(tqp->index & 0x1ff);
+ status = hclgevf_cmd_send(&hdev->hw, &desc, 1);
+ if (status) {
+ dev_err(&hdev->pdev->dev,
+ "Query tqp stat fail, status = %d,queue = %d\n",
+ status, i);
+ return status;
+ }
+ tqp->tqp_stats.rcb_rx_ring_pktnum_rcd +=
+ le32_to_cpu(desc.data[4]);
+
+ hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_QUERY_TX_STATUS,
+ true);
+
+ desc.data[0] = cpu_to_le32(tqp->index & 0x1ff);
+ status = hclgevf_cmd_send(&hdev->hw, &desc, 1);
+ if (status) {
+ dev_err(&hdev->pdev->dev,
+ "Query tqp stat fail, status = %d,queue = %d\n",
+ status, i);
+ return status;
+ }
+ tqp->tqp_stats.rcb_tx_ring_pktnum_rcd +=
+ le32_to_cpu(desc.data[4]);
+ }
+
+ return 0;
+}
+
+static u64 *hclgevf_tqps_get_stats(struct hnae3_handle *handle, u64 *data)
+{
+ struct hnae3_knic_private_info *kinfo = &handle->kinfo;
+ struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+ struct hclgevf_tqp *tqp;
+ u64 *buff = data;
+ int i;
+
+ for (i = 0; i < hdev->num_tqps; i++) {
+ tqp = container_of(handle->kinfo.tqp[i], struct hclgevf_tqp, q);
+ *buff++ = tqp->tqp_stats.rcb_tx_ring_pktnum_rcd;
+ }
+ for (i = 0; i < kinfo->num_tqps; i++) {
+ tqp = container_of(handle->kinfo.tqp[i], struct hclgevf_tqp, q);
+ *buff++ = tqp->tqp_stats.rcb_rx_ring_pktnum_rcd;
+ }
+
+ return buff;
+}
+
+static int hclgevf_tqps_get_sset_count(struct hnae3_handle *handle, int strset)
+{
+ struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+ return hdev->num_tqps * 2;
+}
+
+static u8 *hclgevf_tqps_get_strings(struct hnae3_handle *handle, u8 *data)
+{
+ struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+ u8 *buff = data;
+ int i = 0;
+
+ for (i = 0; i < hdev->num_tqps; i++) {
+ struct hclgevf_tqp *tqp = container_of(handle->kinfo.tqp[i],
+ struct hclgevf_tqp, q);
+ snprintf(buff, ETH_GSTRING_LEN, "rcb_q%d_tx_pktnum_rcd",
+ tqp->index);
+ buff += ETH_GSTRING_LEN;
+ }
+
+ for (i = 0; i < hdev->num_tqps; i++) {
+ struct hclgevf_tqp *tqp = container_of(handle->kinfo.tqp[i],
+ struct hclgevf_tqp, q);
+ snprintf(buff, ETH_GSTRING_LEN, "rcb_q%d_rx_pktnum_rcd",
+ tqp->index);
+ buff += ETH_GSTRING_LEN;
+ }
+
+ return buff;
+}
+
+static void hclgevf_update_stats(struct hnae3_handle *handle,
+ struct net_device_stats *net_stats)
+{
+ struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+ int status;
+
+ status = hclgevf_tqps_update_stats(handle);
+ if (status)
+ dev_err(&hdev->pdev->dev,
+ "VF update of TQPS stats fail, status = %d.\n",
+ status);
+}
+
+static int hclgevf_get_sset_count(struct hnae3_handle *handle, int strset)
+{
+ if (strset == ETH_SS_TEST)
+ return -EOPNOTSUPP;
+ else if (strset == ETH_SS_STATS)
+ return hclgevf_tqps_get_sset_count(handle, strset);
+
+ return 0;
+}
+
+static void hclgevf_get_strings(struct hnae3_handle *handle, u32 strset,
+ u8 *data)
+{
+ u8 *p = (char *)data;
+
+ if (strset == ETH_SS_STATS)
+ p = hclgevf_tqps_get_strings(handle, p);
+}
+
+static void hclgevf_get_stats(struct hnae3_handle *handle, u64 *data)
+{
+ hclgevf_tqps_get_stats(handle, data);
+}
+
+static int hclgevf_get_tc_info(struct hclgevf_dev *hdev)
+{
+ u8 resp_msg;
+ int status;
+
+ status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_TCINFO, 0, NULL, 0,
+ true, &resp_msg, sizeof(u8));
+ if (status) {
+ dev_err(&hdev->pdev->dev,
+ "VF request to get TC info from PF failed %d",
+ status);
+ return status;
+ }
+
+ hdev->hw_tc_map = resp_msg;
+
+ return 0;
+}
+
+static int hclge_get_queue_info(struct hclgevf_dev *hdev)
+{
+#define HCLGEVF_TQPS_RSS_INFO_LEN 8
+ u8 resp_msg[HCLGEVF_TQPS_RSS_INFO_LEN];
+ int status;
+
+ status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_QINFO, 0, NULL, 0,
+ true, resp_msg,
+ HCLGEVF_TQPS_RSS_INFO_LEN);
+ if (status) {
+ dev_err(&hdev->pdev->dev,
+ "VF request to get tqp info from PF failed %d",
+ status);
+ return status;
+ }
+
+ memcpy(&hdev->num_tqps, &resp_msg[0], sizeof(u16));
+ memcpy(&hdev->rss_size_max, &resp_msg[2], sizeof(u16));
+ memcpy(&hdev->num_desc, &resp_msg[4], sizeof(u16));
+ memcpy(&hdev->rx_buf_len, &resp_msg[6], sizeof(u16));
+
+ return 0;
+}
+
+static int hclgevf_enable_tso(struct hclgevf_dev *hdev, int enable)
+{
+ struct hclgevf_cfg_tso_status_cmd *req;
+ struct hclgevf_desc desc;
+
+ req = (struct hclgevf_cfg_tso_status_cmd *)desc.data;
+
+ hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_TSO_GENERIC_CONFIG,
+ false);
+ hnae_set_bit(req->tso_enable, HCLGEVF_TSO_ENABLE_B, enable);
+
+ return hclgevf_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclgevf_alloc_tqps(struct hclgevf_dev *hdev)
+{
+ struct hclgevf_tqp *tqp;
+ int i;
+
+ hdev->htqp = devm_kcalloc(&hdev->pdev->dev, hdev->num_tqps,
+ sizeof(struct hclgevf_tqp), GFP_KERNEL);
+ if (!hdev->htqp)
+ return -ENOMEM;
+
+ tqp = hdev->htqp;
+
+ for (i = 0; i < hdev->num_tqps; i++) {
+ tqp->dev = &hdev->pdev->dev;
+ tqp->index = i;
+
+ tqp->q.ae_algo = &ae_algovf;
+ tqp->q.buf_size = hdev->rx_buf_len;
+ tqp->q.desc_num = hdev->num_desc;
+ tqp->q.io_base = hdev->hw.io_base + HCLGEVF_TQP_REG_OFFSET +
+ i * HCLGEVF_TQP_REG_SIZE;
+
+ tqp++;
+ }
+
+ return 0;
+}
+
+static int hclgevf_knic_setup(struct hclgevf_dev *hdev)
+{
+ struct hnae3_handle *nic = &hdev->nic;
+ struct hnae3_knic_private_info *kinfo;
+ u16 new_tqps = hdev->num_tqps;
+ int i;
+
+ kinfo = &nic->kinfo;
+ kinfo->num_tc = 0;
+ kinfo->num_desc = hdev->num_desc;
+ kinfo->rx_buf_len = hdev->rx_buf_len;
+ for (i = 0; i < HCLGEVF_MAX_TC_NUM; i++)
+ if (hdev->hw_tc_map & BIT(i))
+ kinfo->num_tc++;
+
+ kinfo->rss_size
+ = min_t(u16, hdev->rss_size_max, new_tqps / kinfo->num_tc);
+ new_tqps = kinfo->rss_size * kinfo->num_tc;
+ kinfo->num_tqps = min(new_tqps, hdev->num_tqps);
+
+ kinfo->tqp = devm_kcalloc(&hdev->pdev->dev, kinfo->num_tqps,
+ sizeof(struct hnae3_queue *), GFP_KERNEL);
+ if (!kinfo->tqp)
+ return -ENOMEM;
+
+ for (i = 0; i < kinfo->num_tqps; i++) {
+ hdev->htqp[i].q.handle = &hdev->nic;
+ hdev->htqp[i].q.tqp_index = i;
+ kinfo->tqp[i] = &hdev->htqp[i].q;
+ }
+
+ return 0;
+}
+
+static void hclgevf_request_link_info(struct hclgevf_dev *hdev)
+{
+ int status;
+ u8 resp_msg;
+
+ status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_LINK_STATUS, 0, NULL,
+ 0, false, &resp_msg, sizeof(u8));
+ if (status)
+ dev_err(&hdev->pdev->dev,
+ "VF failed to fetch link status(%d) from PF", status);
+}
+
+void hclgevf_update_link_status(struct hclgevf_dev *hdev, int link_state)
+{
+ struct hnae3_handle *handle = &hdev->nic;
+ struct hnae3_client *client;
+
+ client = handle->client;
+
+ if (link_state != hdev->hw.mac.link) {
+ client->ops->link_status_change(handle, !!link_state);
+ hdev->hw.mac.link = link_state;
+ }
+}
+
+static int hclgevf_set_handle_info(struct hclgevf_dev *hdev)
+{
+ struct hnae3_handle *nic = &hdev->nic;
+ int ret;
+
+ nic->ae_algo = &ae_algovf;
+ nic->pdev = hdev->pdev;
+ nic->numa_node_mask = hdev->numa_node_mask;
+ nic->flags |= HNAE3_SUPPORT_VF;
+
+ if (hdev->ae_dev->dev_type != HNAE3_DEV_KNIC) {
+ dev_err(&hdev->pdev->dev, "unsupported device type %d\n",
+ hdev->ae_dev->dev_type);
+ return -EINVAL;
+ }
+
+ ret = hclgevf_knic_setup(hdev);
+ if (ret)
+ dev_err(&hdev->pdev->dev, "VF knic setup failed %d\n",
+ ret);
+ return ret;
+}
+
+static void hclgevf_free_vector(struct hclgevf_dev *hdev, int vector_id)
+{
+ hdev->vector_status[vector_id] = HCLGEVF_INVALID_VPORT;
+ hdev->num_msi_left += 1;
+ hdev->num_msi_used -= 1;
+}
+
+static int hclgevf_get_vector(struct hnae3_handle *handle, u16 vector_num,
+ struct hnae3_vector_info *vector_info)
+{
+ struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+ struct hnae3_vector_info *vector = vector_info;
+ int alloc = 0;
+ int i, j;
+
+ vector_num = min(hdev->num_msi_left, vector_num);
+
+ for (j = 0; j < vector_num; j++) {
+ for (i = HCLGEVF_MISC_VECTOR_NUM + 1; i < hdev->num_msi; i++) {
+ if (hdev->vector_status[i] == HCLGEVF_INVALID_VPORT) {
+ vector->vector = pci_irq_vector(hdev->pdev, i);
+ vector->io_addr = hdev->hw.io_base +
+ HCLGEVF_VECTOR_REG_BASE +
+ (i - 1) * HCLGEVF_VECTOR_REG_OFFSET;
+ hdev->vector_status[i] = 0;
+ hdev->vector_irq[i] = vector->vector;
+
+ vector++;
+ alloc++;
+
+ break;
+ }
+ }
+ }
+ hdev->num_msi_left -= alloc;
+ hdev->num_msi_used += alloc;
+
+ return alloc;
+}
+
+static int hclgevf_get_vector_index(struct hclgevf_dev *hdev, int vector)
+{
+ int i;
+
+ for (i = 0; i < hdev->num_msi; i++)
+ if (vector == hdev->vector_irq[i])
+ return i;
+
+ return -EINVAL;
+}
+
+static u32 hclgevf_get_rss_key_size(struct hnae3_handle *handle)
+{
+ return HCLGEVF_RSS_KEY_SIZE;
+}
+
+static u32 hclgevf_get_rss_indir_size(struct hnae3_handle *handle)
+{
+ return HCLGEVF_RSS_IND_TBL_SIZE;
+}
+
+static int hclgevf_set_rss_indir_table(struct hclgevf_dev *hdev)
+{
+ const u8 *indir = hdev->rss_cfg.rss_indirection_tbl;
+ struct hclgevf_rss_indirection_table_cmd *req;
+ struct hclgevf_desc desc;
+ int status;
+ int i, j;
+
+ req = (struct hclgevf_rss_indirection_table_cmd *)desc.data;
+
+ for (i = 0; i < HCLGEVF_RSS_CFG_TBL_NUM; i++) {
+ hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_RSS_INDIR_TABLE,
+ false);
+ req->start_table_index = i * HCLGEVF_RSS_CFG_TBL_SIZE;
+ req->rss_set_bitmap = HCLGEVF_RSS_SET_BITMAP_MSK;
+ for (j = 0; j < HCLGEVF_RSS_CFG_TBL_SIZE; j++)
+ req->rss_result[j] =
+ indir[i * HCLGEVF_RSS_CFG_TBL_SIZE + j];
+
+ status = hclgevf_cmd_send(&hdev->hw, &desc, 1);
+ if (status) {
+ dev_err(&hdev->pdev->dev,
+ "VF failed(=%d) to set RSS indirection table\n",
+ status);
+ return status;
+ }
+ }
+
+ return 0;
+}
+
+static int hclgevf_set_rss_tc_mode(struct hclgevf_dev *hdev, u16 rss_size)
+{
+ struct hclgevf_rss_tc_mode_cmd *req;
+ u16 tc_offset[HCLGEVF_MAX_TC_NUM];
+ u16 tc_valid[HCLGEVF_MAX_TC_NUM];
+ u16 tc_size[HCLGEVF_MAX_TC_NUM];
+ struct hclgevf_desc desc;
+ u16 roundup_size;
+ int status;
+ int i;
+
+ req = (struct hclgevf_rss_tc_mode_cmd *)desc.data;
+
+ roundup_size = roundup_pow_of_two(rss_size);
+ roundup_size = ilog2(roundup_size);
+
+ for (i = 0; i < HCLGEVF_MAX_TC_NUM; i++) {
+ tc_valid[i] = !!(hdev->hw_tc_map & BIT(i));
+ tc_size[i] = roundup_size;
+ tc_offset[i] = rss_size * i;
+ }
+
+ hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_RSS_TC_MODE, false);
+ for (i = 0; i < HCLGEVF_MAX_TC_NUM; i++) {
+ hnae_set_bit(req->rss_tc_mode[i], HCLGEVF_RSS_TC_VALID_B,
+ (tc_valid[i] & 0x1));
+ hnae_set_field(req->rss_tc_mode[i], HCLGEVF_RSS_TC_SIZE_M,
+ HCLGEVF_RSS_TC_SIZE_S, tc_size[i]);
+ hnae_set_field(req->rss_tc_mode[i], HCLGEVF_RSS_TC_OFFSET_M,
+ HCLGEVF_RSS_TC_OFFSET_S, tc_offset[i]);
+ }
+ status = hclgevf_cmd_send(&hdev->hw, &desc, 1);
+ if (status)
+ dev_err(&hdev->pdev->dev,
+ "VF failed(=%d) to set rss tc mode\n", status);
+
+ return status;
+}
+
+static int hclgevf_get_rss_hw_cfg(struct hnae3_handle *handle, u8 *hash,
+ u8 *key)
+{
+ struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+ struct hclgevf_rss_config_cmd *req;
+ int lkup_times = key ? 3 : 1;
+ struct hclgevf_desc desc;
+ int key_offset;
+ int key_size;
+ int status;
+
+ req = (struct hclgevf_rss_config_cmd *)desc.data;
+ lkup_times = (lkup_times == 3) ? 3 : ((hash) ? 1 : 0);
+
+ for (key_offset = 0; key_offset < lkup_times; key_offset++) {
+ hclgevf_cmd_setup_basic_desc(&desc,
+ HCLGEVF_OPC_RSS_GENERIC_CONFIG,
+ true);
+ req->hash_config |= (key_offset << HCLGEVF_RSS_HASH_KEY_OFFSET);
+
+ status = hclgevf_cmd_send(&hdev->hw, &desc, 1);
+ if (status) {
+ dev_err(&hdev->pdev->dev,
+ "failed to get hardware RSS cfg, status = %d\n",
+ status);
+ return status;
+ }
+
+ if (key_offset == 2)
+ key_size =
+ HCLGEVF_RSS_KEY_SIZE - HCLGEVF_RSS_HASH_KEY_NUM * 2;
+ else
+ key_size = HCLGEVF_RSS_HASH_KEY_NUM;
+
+ if (key)
+ memcpy(key + key_offset * HCLGEVF_RSS_HASH_KEY_NUM,
+ req->hash_key,
+ key_size);
+ }
+
+ if (hash) {
+ if ((req->hash_config & 0xf) == HCLGEVF_RSS_HASH_ALGO_TOEPLITZ)
+ *hash = ETH_RSS_HASH_TOP;
+ else
+ *hash = ETH_RSS_HASH_UNKNOWN;
+ }
+
+ return 0;
+}
+
+static int hclgevf_get_rss(struct hnae3_handle *handle, u32 *indir, u8 *key,
+ u8 *hfunc)
+{
+ struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+ struct hclgevf_rss_cfg *rss_cfg = &hdev->rss_cfg;
+ int i;
+
+ if (indir)
+ for (i = 0; i < HCLGEVF_RSS_IND_TBL_SIZE; i++)
+ indir[i] = rss_cfg->rss_indirection_tbl[i];
+
+ return hclgevf_get_rss_hw_cfg(handle, hfunc, key);
+}
+
+static int hclgevf_set_rss(struct hnae3_handle *handle, const u32 *indir,
+ const u8 *key, const u8 hfunc)
+{
+ struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+ struct hclgevf_rss_cfg *rss_cfg = &hdev->rss_cfg;
+ int i;
+
+ /* update the shadow RSS table with user specified qids */
+ for (i = 0; i < HCLGEVF_RSS_IND_TBL_SIZE; i++)
+ rss_cfg->rss_indirection_tbl[i] = indir[i];
+
+ /* update the hardware */
+ return hclgevf_set_rss_indir_table(hdev);
+}
+
+static int hclgevf_get_tc_size(struct hnae3_handle *handle)
+{
+ struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+ struct hclgevf_rss_cfg *rss_cfg = &hdev->rss_cfg;
+
+ return rss_cfg->rss_size;
+}
+
+static int hclgevf_bind_ring_to_vector(struct hnae3_handle *handle, bool en,
+ int vector,
+ struct hnae3_ring_chain_node *ring_chain)
+{
+#define HCLGEVF_RING_NODE_VARIABLE_NUM 3
+#define HCLGEVF_RING_MAP_MBX_BASIC_MSG_NUM 3
+ struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+ struct hnae3_ring_chain_node *node;
+ struct hclge_mbx_vf_to_pf_cmd *req;
+ struct hclgevf_desc desc;
+ int i, vector_id;
+ int status;
+ u8 type;
+
+ req = (struct hclge_mbx_vf_to_pf_cmd *)desc.data;
+ vector_id = hclgevf_get_vector_index(hdev, vector);
+ if (vector_id < 0) {
+ dev_err(&handle->pdev->dev,
+ "Get vector index fail. ret =%d\n", vector_id);
+ return vector_id;
+ }
+
+ hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_MBX_VF_TO_PF, false);
+ type = en ?
+ HCLGE_MBX_MAP_RING_TO_VECTOR : HCLGE_MBX_UNMAP_RING_TO_VECTOR;
+ req->msg[0] = type;
+ req->msg[1] = vector_id; /* vector_id should be id in VF */
+
+ i = 0;
+ for (node = ring_chain; node; node = node->next) {
+ i++;
+ /* msg[2] is cause num */
+ req->msg[HCLGEVF_RING_NODE_VARIABLE_NUM * i] =
+ hnae_get_bit(node->flag, HNAE3_RING_TYPE_B);
+ req->msg[HCLGEVF_RING_NODE_VARIABLE_NUM * i + 1] =
+ node->tqp_index;
+ if (i == (HCLGE_MBX_VF_MSG_DATA_NUM -
+ HCLGEVF_RING_MAP_MBX_BASIC_MSG_NUM) /
+ HCLGEVF_RING_NODE_VARIABLE_NUM) {
+ req->msg[2] = i;
+
+ status = hclgevf_cmd_send(&hdev->hw, &desc, 1);
+ if (status) {
+ dev_err(&hdev->pdev->dev,
+ "Map TQP fail, status is %d.\n",
+ status);
+ return status;
+ }
+ i = 0;
+ hclgevf_cmd_setup_basic_desc(&desc,
+ HCLGEVF_OPC_MBX_VF_TO_PF,
+ false);
+ req->msg[0] = type;
+ req->msg[1] = vector_id;
+ }
+ }
+
+ if (i > 0) {
+ req->msg[2] = i;
+
+ status = hclgevf_cmd_send(&hdev->hw, &desc, 1);
+ if (status) {
+ dev_err(&hdev->pdev->dev,
+ "Map TQP fail, status is %d.\n", status);
+ return status;
+ }
+ }
+
+ return 0;
+}
+
+static int hclgevf_map_ring_to_vector(struct hnae3_handle *handle, int vector,
+ struct hnae3_ring_chain_node *ring_chain)
+{
+ return hclgevf_bind_ring_to_vector(handle, true, vector, ring_chain);
+}
+
+static int hclgevf_unmap_ring_from_vector(
+ struct hnae3_handle *handle,
+ int vector,
+ struct hnae3_ring_chain_node *ring_chain)
+{
+ struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+ int ret, vector_id;
+
+ vector_id = hclgevf_get_vector_index(hdev, vector);
+ if (vector_id < 0) {
+ dev_err(&handle->pdev->dev,
+ "Get vector index fail. ret =%d\n", vector_id);
+ return vector_id;
+ }
+
+ ret = hclgevf_bind_ring_to_vector(handle, false, vector, ring_chain);
+ if (ret) {
+ dev_err(&handle->pdev->dev,
+ "Unmap ring from vector fail. vector=%d, ret =%d\n",
+ vector_id,
+ ret);
+ return ret;
+ }
+
+ hclgevf_free_vector(hdev, vector);
+
+ return 0;
+}
+
+static int hclgevf_cmd_set_promisc_mode(struct hclgevf_dev *hdev, u32 en)
+{
+ struct hclge_mbx_vf_to_pf_cmd *req;
+ struct hclgevf_desc desc;
+ int status;
+
+ req = (struct hclge_mbx_vf_to_pf_cmd *)desc.data;
+
+ hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_MBX_VF_TO_PF, false);
+ req->msg[0] = HCLGE_MBX_SET_PROMISC_MODE;
+ req->msg[1] = en;
+
+ status = hclgevf_cmd_send(&hdev->hw, &desc, 1);
+ if (status)
+ dev_err(&hdev->pdev->dev,
+ "Set promisc mode fail, status is %d.\n", status);
+
+ return status;
+}
+
+static void hclgevf_set_promisc_mode(struct hnae3_handle *handle, u32 en)
+{
+ struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+ hclgevf_cmd_set_promisc_mode(hdev, en);
+}
+
+static int hclgevf_tqp_enable(struct hclgevf_dev *hdev, int tqp_id,
+ int stream_id, bool enable)
+{
+ struct hclgevf_cfg_com_tqp_queue_cmd *req;
+ struct hclgevf_desc desc;
+ int status;
+
+ req = (struct hclgevf_cfg_com_tqp_queue_cmd *)desc.data;
+
+ hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_CFG_COM_TQP_QUEUE,
+ false);
+ req->tqp_id = cpu_to_le16(tqp_id & HCLGEVF_RING_ID_MASK);
+ req->stream_id = cpu_to_le16(stream_id);
+ req->enable |= enable << HCLGEVF_TQP_ENABLE_B;
+
+ status = hclgevf_cmd_send(&hdev->hw, &desc, 1);
+ if (status)
+ dev_err(&hdev->pdev->dev,
+ "TQP enable fail, status =%d.\n", status);
+
+ return status;
+}
+
+static int hclgevf_get_queue_id(struct hnae3_queue *queue)
+{
+ struct hclgevf_tqp *tqp = container_of(queue, struct hclgevf_tqp, q);
+
+ return tqp->index;
+}
+
+static void hclgevf_reset_tqp_stats(struct hnae3_handle *handle)
+{
+ struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+ struct hnae3_queue *queue;
+ struct hclgevf_tqp *tqp;
+ int i;
+
+ for (i = 0; i < hdev->num_tqps; i++) {
+ queue = handle->kinfo.tqp[i];
+ tqp = container_of(queue, struct hclgevf_tqp, q);
+ memset(&tqp->tqp_stats, 0, sizeof(tqp->tqp_stats));
+ }
+}
+
+static int hclgevf_cfg_func_mta_filter(struct hnae3_handle *handle, bool en)
+{
+ struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+ u8 msg[2] = {0};
+
+ msg[0] = en;
+ return hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_MULTICAST,
+ HCLGE_MBX_MAC_VLAN_MC_FUNC_MTA_ENABLE,
+ msg, 1, false, NULL, 0);
+}
+
+static void hclgevf_get_mac_addr(struct hnae3_handle *handle, u8 *p)
+{
+ struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+ ether_addr_copy(p, hdev->hw.mac.mac_addr);
+}
+
+static int hclgevf_set_mac_addr(struct hnae3_handle *handle, void *p)
+{
+ struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+ u8 *old_mac_addr = (u8 *)hdev->hw.mac.mac_addr;
+ u8 *new_mac_addr = (u8 *)p;
+ u8 msg_data[ETH_ALEN * 2];
+ int status;
+
+ ether_addr_copy(msg_data, new_mac_addr);
+ ether_addr_copy(&msg_data[ETH_ALEN], old_mac_addr);
+
+ status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_UNICAST,
+ HCLGE_MBX_MAC_VLAN_UC_MODIFY,
+ msg_data, ETH_ALEN * 2,
+ false, NULL, 0);
+ if (!status)
+ ether_addr_copy(hdev->hw.mac.mac_addr, new_mac_addr);
+
+ return status;
+}
+
+static int hclgevf_add_uc_addr(struct hnae3_handle *handle,
+ const unsigned char *addr)
+{
+ struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+ return hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_UNICAST,
+ HCLGE_MBX_MAC_VLAN_UC_ADD,
+ addr, ETH_ALEN, false, NULL, 0);
+}
+
+static int hclgevf_rm_uc_addr(struct hnae3_handle *handle,
+ const unsigned char *addr)
+{
+ struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+ return hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_UNICAST,
+ HCLGE_MBX_MAC_VLAN_UC_REMOVE,
+ addr, ETH_ALEN, false, NULL, 0);
+}
+
+static int hclgevf_add_mc_addr(struct hnae3_handle *handle,
+ const unsigned char *addr)
+{
+ struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+ return hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_MULTICAST,
+ HCLGE_MBX_MAC_VLAN_MC_ADD,
+ addr, ETH_ALEN, false, NULL, 0);
+}
+
+static int hclgevf_rm_mc_addr(struct hnae3_handle *handle,
+ const unsigned char *addr)
+{
+ struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+ return hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_MULTICAST,
+ HCLGE_MBX_MAC_VLAN_MC_REMOVE,
+ addr, ETH_ALEN, false, NULL, 0);
+}
+
+static int hclgevf_set_vlan_filter(struct hnae3_handle *handle,
+ __be16 proto, u16 vlan_id,
+ bool is_kill)
+{
+#define HCLGEVF_VLAN_MBX_MSG_LEN 5
+ struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+ u8 msg_data[HCLGEVF_VLAN_MBX_MSG_LEN];
+
+ if (vlan_id > 4095)
+ return -EINVAL;
+
+ if (proto != htons(ETH_P_8021Q))
+ return -EPROTONOSUPPORT;
+
+ msg_data[0] = is_kill;
+ memcpy(&msg_data[1], &vlan_id, sizeof(vlan_id));
+ memcpy(&msg_data[3], &proto, sizeof(proto));
+ return hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_VLAN,
+ HCLGE_MBX_VLAN_FILTER, msg_data,
+ HCLGEVF_VLAN_MBX_MSG_LEN, false, NULL, 0);
+}
+
+static void hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
+{
+ struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+ u8 msg_data[2];
+
+ memcpy(&msg_data[0], &queue_id, sizeof(queue_id));
+
+ hclgevf_send_mbx_msg(hdev, HCLGE_MBX_QUEUE_RESET, 0, msg_data, 2, false,
+ NULL, 0);
+}
+
+static u32 hclgevf_get_fw_version(struct hnae3_handle *handle)
+{
+ struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+ return hdev->fw_version;
+}
+
+static void hclgevf_get_misc_vector(struct hclgevf_dev *hdev)
+{
+ struct hclgevf_misc_vector *vector = &hdev->misc_vector;
+
+ vector->vector_irq = pci_irq_vector(hdev->pdev,
+ HCLGEVF_MISC_VECTOR_NUM);
+ vector->addr = hdev->hw.io_base + HCLGEVF_MISC_VECTOR_REG_BASE;
+ /* vector status always valid for Vector 0 */
+ hdev->vector_status[HCLGEVF_MISC_VECTOR_NUM] = 0;
+ hdev->vector_irq[HCLGEVF_MISC_VECTOR_NUM] = vector->vector_irq;
+
+ hdev->num_msi_left -= 1;
+ hdev->num_msi_used += 1;
+}
+
+static void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev)
+{
+ if (!test_and_set_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state))
+ schedule_work(&hdev->mbx_service_task);
+}
+
+static void hclgevf_task_schedule(struct hclgevf_dev *hdev)
+{
+ if (!test_bit(HCLGEVF_STATE_DOWN, &hdev->state) &&
+ !test_and_set_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state))
+ schedule_work(&hdev->service_task);
+}
+
+static void hclgevf_service_timer(struct timer_list *t)
+{
+ struct hclgevf_dev *hdev = from_timer(hdev, t, service_timer);
+
+ mod_timer(&hdev->service_timer, jiffies + 5 * HZ);
+
+ hclgevf_task_schedule(hdev);
+}
+
+static void hclgevf_mailbox_service_task(struct work_struct *work)
+{
+ struct hclgevf_dev *hdev;
+
+ hdev = container_of(work, struct hclgevf_dev, mbx_service_task);
+
+ if (test_and_set_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state))
+ return;
+
+ clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state);
+
+ hclgevf_mbx_handler(hdev);
+
+ clear_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state);
+}
+
+static void hclgevf_service_task(struct work_struct *work)
+{
+ struct hclgevf_dev *hdev;
+
+ hdev = container_of(work, struct hclgevf_dev, service_task);
+
+ /* request the link status from the PF. PF would be able to tell VF
+ * about such updates in future so we might remove this later
+ */
+ hclgevf_request_link_info(hdev);
+
+ clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state);
+}
+
+static void hclgevf_clear_event_cause(struct hclgevf_dev *hdev, u32 regclr)
+{
+ hclgevf_write_dev(&hdev->hw, HCLGEVF_VECTOR0_CMDQ_SRC_REG, regclr);
+}
+
+static bool hclgevf_check_event_cause(struct hclgevf_dev *hdev, u32 *clearval)
+{
+ u32 cmdq_src_reg;
+
+ /* fetch the events from their corresponding regs */
+ cmdq_src_reg = hclgevf_read_dev(&hdev->hw,
+ HCLGEVF_VECTOR0_CMDQ_SRC_REG);
+
+ /* check for vector0 mailbox(=CMDQ RX) event source */
+ if (BIT(HCLGEVF_VECTOR0_RX_CMDQ_INT_B) & cmdq_src_reg) {
+ cmdq_src_reg &= ~BIT(HCLGEVF_VECTOR0_RX_CMDQ_INT_B);
+ *clearval = cmdq_src_reg;
+ return true;
+ }
+
+ dev_dbg(&hdev->pdev->dev, "vector 0 interrupt from unknown source\n");
+
+ return false;
+}
+
+static void hclgevf_enable_vector(struct hclgevf_misc_vector *vector, bool en)
+{
+ writel(en ? 1 : 0, vector->addr);
+}
+
+static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data)
+{
+ struct hclgevf_dev *hdev = data;
+ u32 clearval;
+
+ hclgevf_enable_vector(&hdev->misc_vector, false);
+ if (!hclgevf_check_event_cause(hdev, &clearval))
+ goto skip_sched;
+
+ /* schedule the VF mailbox service task, if not already scheduled */
+ hclgevf_mbx_task_schedule(hdev);
+
+ hclgevf_clear_event_cause(hdev, clearval);
+
+skip_sched:
+ hclgevf_enable_vector(&hdev->misc_vector, true);
+
+ return IRQ_HANDLED;
+}
+
+static int hclgevf_configure(struct hclgevf_dev *hdev)
+{
+ int ret;
+
+ /* get queue configuration from PF */
+ ret = hclge_get_queue_info(hdev);
+ if (ret)
+ return ret;
+ /* get tc configuration from PF */
+ return hclgevf_get_tc_info(hdev);
+}
+
+static int hclgevf_init_roce_base_info(struct hclgevf_dev *hdev)
+{
+ struct hnae3_handle *roce = &hdev->roce;
+ struct hnae3_handle *nic = &hdev->nic;
+
+ roce->rinfo.num_vectors = HCLGEVF_ROCEE_VECTOR_NUM;
+
+ if (hdev->num_msi_left < roce->rinfo.num_vectors ||
+ hdev->num_msi_left == 0)
+ return -EINVAL;
+
+ roce->rinfo.base_vector =
+ hdev->vector_status[hdev->num_msi_used];
+
+ roce->rinfo.netdev = nic->kinfo.netdev;
+ roce->rinfo.roce_io_base = hdev->hw.io_base;
+
+ roce->pdev = nic->pdev;
+ roce->ae_algo = nic->ae_algo;
+ roce->numa_node_mask = nic->numa_node_mask;
+
+ return 0;
+}
+
+static int hclgevf_rss_init_hw(struct hclgevf_dev *hdev)
+{
+ struct hclgevf_rss_cfg *rss_cfg = &hdev->rss_cfg;
+ int i, ret;
+
+ rss_cfg->rss_size = hdev->rss_size_max;
+
+ /* Initialize RSS indirect table for each vport */
+ for (i = 0; i < HCLGEVF_RSS_IND_TBL_SIZE; i++)
+ rss_cfg->rss_indirection_tbl[i] = i % hdev->rss_size_max;
+
+ ret = hclgevf_set_rss_indir_table(hdev);
+ if (ret)
+ return ret;
+
+ return hclgevf_set_rss_tc_mode(hdev, hdev->rss_size_max);
+}
+
+static int hclgevf_init_vlan_config(struct hclgevf_dev *hdev)
+{
+ /* other vlan config(like, VLAN TX/RX offload) would also be added
+ * here later
+ */
+ return hclgevf_set_vlan_filter(&hdev->nic, htons(ETH_P_8021Q), 0,
+ false);
+}
+
+static int hclgevf_ae_start(struct hnae3_handle *handle)
+{
+ struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+ int i, queue_id;
+
+ for (i = 0; i < handle->kinfo.num_tqps; i++) {
+ /* ring enable */
+ queue_id = hclgevf_get_queue_id(handle->kinfo.tqp[i]);
+ if (queue_id < 0) {
+ dev_warn(&hdev->pdev->dev,
+ "Get invalid queue id, ignore it\n");
+ continue;
+ }
+
+ hclgevf_tqp_enable(hdev, queue_id, 0, true);
+ }
+
+ /* reset tqp stats */
+ hclgevf_reset_tqp_stats(handle);
+
+ hclgevf_request_link_info(hdev);
+
+ clear_bit(HCLGEVF_STATE_DOWN, &hdev->state);
+ mod_timer(&hdev->service_timer, jiffies + HZ);
+
+ return 0;
+}
+
+static void hclgevf_ae_stop(struct hnae3_handle *handle)
+{
+ struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+ int i, queue_id;
+
+ for (i = 0; i < hdev->num_tqps; i++) {
+ /* Ring disable */
+ queue_id = hclgevf_get_queue_id(handle->kinfo.tqp[i]);
+ if (queue_id < 0) {
+ dev_warn(&hdev->pdev->dev,
+ "Get invalid queue id, ignore it\n");
+ continue;
+ }
+
+ hclgevf_tqp_enable(hdev, queue_id, 0, false);
+ }
+
+ /* reset tqp stats */
+ hclgevf_reset_tqp_stats(handle);
+}
+
+static void hclgevf_state_init(struct hclgevf_dev *hdev)
+{
+ /* setup tasks for the MBX */
+ INIT_WORK(&hdev->mbx_service_task, hclgevf_mailbox_service_task);
+ clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state);
+ clear_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state);
+
+ /* setup tasks for service timer */
+ timer_setup(&hdev->service_timer, hclgevf_service_timer, 0);
+
+ INIT_WORK(&hdev->service_task, hclgevf_service_task);
+ clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state);
+
+ mutex_init(&hdev->mbx_resp.mbx_mutex);
+
+ /* bring the device down */
+ set_bit(HCLGEVF_STATE_DOWN, &hdev->state);
+}
+
+static void hclgevf_state_uninit(struct hclgevf_dev *hdev)
+{
+ set_bit(HCLGEVF_STATE_DOWN, &hdev->state);
+
+ if (hdev->service_timer.function)
+ del_timer_sync(&hdev->service_timer);
+ if (hdev->service_task.func)
+ cancel_work_sync(&hdev->service_task);
+ if (hdev->mbx_service_task.func)
+ cancel_work_sync(&hdev->mbx_service_task);
+
+ mutex_destroy(&hdev->mbx_resp.mbx_mutex);
+}
+
+static int hclgevf_init_msi(struct hclgevf_dev *hdev)
+{
+ struct pci_dev *pdev = hdev->pdev;
+ int vectors;
+ int i;
+
+ hdev->num_msi = HCLGEVF_MAX_VF_VECTOR_NUM;
+
+ vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi,
+ PCI_IRQ_MSI | PCI_IRQ_MSIX);
+ if (vectors < 0) {
+ dev_err(&pdev->dev,
+ "failed(%d) to allocate MSI/MSI-X vectors\n",
+ vectors);
+ return vectors;
+ }
+ if (vectors < hdev->num_msi)
+ dev_warn(&hdev->pdev->dev,
+ "requested %d MSI/MSI-X, but allocated %d MSI/MSI-X\n",
+ hdev->num_msi, vectors);
+
+ hdev->num_msi = vectors;
+ hdev->num_msi_left = vectors;
+ hdev->base_msi_vector = pdev->irq;
+
+ hdev->vector_status = devm_kcalloc(&pdev->dev, hdev->num_msi,
+ sizeof(u16), GFP_KERNEL);
+ if (!hdev->vector_status) {
+ pci_free_irq_vectors(pdev);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < hdev->num_msi; i++)
+ hdev->vector_status[i] = HCLGEVF_INVALID_VPORT;
+
+ hdev->vector_irq = devm_kcalloc(&pdev->dev, hdev->num_msi,
+ sizeof(int), GFP_KERNEL);
+ if (!hdev->vector_irq) {
+ pci_free_irq_vectors(pdev);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void hclgevf_uninit_msi(struct hclgevf_dev *hdev)
+{
+ struct pci_dev *pdev = hdev->pdev;
+
+ pci_free_irq_vectors(pdev);
+}
+
+static int hclgevf_misc_irq_init(struct hclgevf_dev *hdev)
+{
+ int ret = 0;
+
+ hclgevf_get_misc_vector(hdev);
+
+ ret = request_irq(hdev->misc_vector.vector_irq, hclgevf_misc_irq_handle,
+ 0, "hclgevf_cmd", hdev);
+ if (ret) {
+ dev_err(&hdev->pdev->dev, "VF failed to request misc irq(%d)\n",
+ hdev->misc_vector.vector_irq);
+ return ret;
+ }
+
+ /* enable misc. vector(vector 0) */
+ hclgevf_enable_vector(&hdev->misc_vector, true);
+
+ return ret;
+}
+
+static void hclgevf_misc_irq_uninit(struct hclgevf_dev *hdev)
+{
+ /* disable misc vector(vector 0) */
+ hclgevf_enable_vector(&hdev->misc_vector, false);
+ free_irq(hdev->misc_vector.vector_irq, hdev);
+ hclgevf_free_vector(hdev, 0);
+}
+
+static int hclgevf_init_instance(struct hclgevf_dev *hdev,
+ struct hnae3_client *client)
+{
+ int ret;
+
+ switch (client->type) {
+ case HNAE3_CLIENT_KNIC:
+ hdev->nic_client = client;
+ hdev->nic.client = client;
+
+ ret = client->ops->init_instance(&hdev->nic);
+ if (ret)
+ return ret;
+
+ if (hdev->roce_client && hnae3_dev_roce_supported(hdev)) {
+ struct hnae3_client *rc = hdev->roce_client;
+
+ ret = hclgevf_init_roce_base_info(hdev);
+ if (ret)
+ return ret;
+ ret = rc->ops->init_instance(&hdev->roce);
+ if (ret)
+ return ret;
+ }
+ break;
+ case HNAE3_CLIENT_UNIC:
+ hdev->nic_client = client;
+ hdev->nic.client = client;
+
+ ret = client->ops->init_instance(&hdev->nic);
+ if (ret)
+ return ret;
+ break;
+ case HNAE3_CLIENT_ROCE:
+ hdev->roce_client = client;
+ hdev->roce.client = client;
+
+ if (hdev->roce_client && hnae3_dev_roce_supported(hdev)) {
+ ret = hclgevf_init_roce_base_info(hdev);
+ if (ret)
+ return ret;
+
+ ret = client->ops->init_instance(&hdev->roce);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static void hclgevf_uninit_instance(struct hclgevf_dev *hdev,
+ struct hnae3_client *client)
+{
+ /* un-init roce, if it exists */
+ if (hdev->roce_client)
+ hdev->roce_client->ops->uninit_instance(&hdev->roce, 0);
+
+ /* un-init nic/unic, if this was not called by roce client */
+ if ((client->ops->uninit_instance) &&
+ (client->type != HNAE3_CLIENT_ROCE))
+ client->ops->uninit_instance(&hdev->nic, 0);
+}
+
+static int hclgevf_register_client(struct hnae3_client *client,
+ struct hnae3_ae_dev *ae_dev)
+{
+ struct hclgevf_dev *hdev = ae_dev->priv;
+
+ return hclgevf_init_instance(hdev, client);
+}
+
+static void hclgevf_unregister_client(struct hnae3_client *client,
+ struct hnae3_ae_dev *ae_dev)
+{
+ struct hclgevf_dev *hdev = ae_dev->priv;
+
+ hclgevf_uninit_instance(hdev, client);
+}
+
+static int hclgevf_pci_init(struct hclgevf_dev *hdev)
+{
+ struct pci_dev *pdev = hdev->pdev;
+ struct hclgevf_hw *hw;
+ int ret;
+
+ ret = pci_enable_device(pdev);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to enable PCI device\n");
+ goto err_no_drvdata;
+ }
+
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (ret) {
+ dev_err(&pdev->dev, "can't set consistent PCI DMA, exiting");
+ goto err_disable_device;
+ }
+
+ ret = pci_request_regions(pdev, HCLGEVF_DRIVER_NAME);
+ if (ret) {
+ dev_err(&pdev->dev, "PCI request regions failed %d\n", ret);
+ goto err_disable_device;
+ }
+
+ pci_set_master(pdev);
+ hw = &hdev->hw;
+ hw->hdev = hdev;
+ hw->io_base = pci_iomap(pdev, 2, 0);;
+ if (!hw->io_base) {
+ dev_err(&pdev->dev, "can't map configuration register space\n");
+ ret = -ENOMEM;
+ goto err_clr_master;
+ }
+
+ return 0;
+
+err_clr_master:
+ pci_clear_master(pdev);
+ pci_release_regions(pdev);
+err_disable_device:
+ pci_disable_device(pdev);
+err_no_drvdata:
+ pci_set_drvdata(pdev, NULL);
+ return ret;
+}
+
+static void hclgevf_pci_uninit(struct hclgevf_dev *hdev)
+{
+ struct pci_dev *pdev = hdev->pdev;
+
+ pci_iounmap(pdev, hdev->hw.io_base);
+ pci_clear_master(pdev);
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+ pci_set_drvdata(pdev, NULL);
+}
+
+static int hclgevf_init_ae_dev(struct hnae3_ae_dev *ae_dev)
+{
+ struct pci_dev *pdev = ae_dev->pdev;
+ struct hclgevf_dev *hdev;
+ int ret;
+
+ hdev = devm_kzalloc(&pdev->dev, sizeof(*hdev), GFP_KERNEL);
+ if (!hdev)
+ return -ENOMEM;
+
+ hdev->pdev = pdev;
+ hdev->ae_dev = ae_dev;
+ ae_dev->priv = hdev;
+
+ ret = hclgevf_pci_init(hdev);
+ if (ret) {
+ dev_err(&pdev->dev, "PCI initialization failed\n");
+ return ret;
+ }
+
+ ret = hclgevf_init_msi(hdev);
+ if (ret) {
+ dev_err(&pdev->dev, "failed(%d) to init MSI/MSI-X\n", ret);
+ goto err_irq_init;
+ }
+
+ hclgevf_state_init(hdev);
+
+ ret = hclgevf_misc_irq_init(hdev);
+ if (ret) {
+ dev_err(&pdev->dev, "failed(%d) to init Misc IRQ(vector0)\n",
+ ret);
+ goto err_misc_irq_init;
+ }
+
+ ret = hclgevf_cmd_init(hdev);
+ if (ret)
+ goto err_cmd_init;
+
+ ret = hclgevf_configure(hdev);
+ if (ret) {
+ dev_err(&pdev->dev, "failed(%d) to fetch configuration\n", ret);
+ goto err_config;
+ }
+
+ ret = hclgevf_alloc_tqps(hdev);
+ if (ret) {
+ dev_err(&pdev->dev, "failed(%d) to allocate TQPs\n", ret);
+ goto err_config;
+ }
+
+ ret = hclgevf_set_handle_info(hdev);
+ if (ret) {
+ dev_err(&pdev->dev, "failed(%d) to set handle info\n", ret);
+ goto err_config;
+ }
+
+ ret = hclgevf_enable_tso(hdev, true);
+ if (ret) {
+ dev_err(&pdev->dev, "failed(%d) to enable tso\n", ret);
+ goto err_config;
+ }
+
+ /* Initialize VF's MTA */
+ hdev->accept_mta_mc = true;
+ ret = hclgevf_cfg_func_mta_filter(&hdev->nic, hdev->accept_mta_mc);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "failed(%d) to set mta filter mode\n", ret);
+ goto err_config;
+ }
+
+ /* Initialize RSS for this VF */
+ ret = hclgevf_rss_init_hw(hdev);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "failed(%d) to initialize RSS\n", ret);
+ goto err_config;
+ }
+
+ ret = hclgevf_init_vlan_config(hdev);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "failed(%d) to initialize VLAN config\n", ret);
+ goto err_config;
+ }
+
+ pr_info("finished initializing %s driver\n", HCLGEVF_DRIVER_NAME);
+
+ return 0;
+
+err_config:
+ hclgevf_cmd_uninit(hdev);
+err_cmd_init:
+ hclgevf_misc_irq_uninit(hdev);
+err_misc_irq_init:
+ hclgevf_state_uninit(hdev);
+ hclgevf_uninit_msi(hdev);
+err_irq_init:
+ hclgevf_pci_uninit(hdev);
+ return ret;
+}
+
+static void hclgevf_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
+{
+ struct hclgevf_dev *hdev = ae_dev->priv;
+
+ hclgevf_cmd_uninit(hdev);
+ hclgevf_misc_irq_uninit(hdev);
+ hclgevf_state_uninit(hdev);
+ hclgevf_uninit_msi(hdev);
+ hclgevf_pci_uninit(hdev);
+ ae_dev->priv = NULL;
+}
+
+static const struct hnae3_ae_ops hclgevf_ops = {
+ .init_ae_dev = hclgevf_init_ae_dev,
+ .uninit_ae_dev = hclgevf_uninit_ae_dev,
+ .init_client_instance = hclgevf_register_client,
+ .uninit_client_instance = hclgevf_unregister_client,
+ .start = hclgevf_ae_start,
+ .stop = hclgevf_ae_stop,
+ .map_ring_to_vector = hclgevf_map_ring_to_vector,
+ .unmap_ring_from_vector = hclgevf_unmap_ring_from_vector,
+ .get_vector = hclgevf_get_vector,
+ .reset_queue = hclgevf_reset_tqp,
+ .set_promisc_mode = hclgevf_set_promisc_mode,
+ .get_mac_addr = hclgevf_get_mac_addr,
+ .set_mac_addr = hclgevf_set_mac_addr,
+ .add_uc_addr = hclgevf_add_uc_addr,
+ .rm_uc_addr = hclgevf_rm_uc_addr,
+ .add_mc_addr = hclgevf_add_mc_addr,
+ .rm_mc_addr = hclgevf_rm_mc_addr,
+ .get_stats = hclgevf_get_stats,
+ .update_stats = hclgevf_update_stats,
+ .get_strings = hclgevf_get_strings,
+ .get_sset_count = hclgevf_get_sset_count,
+ .get_rss_key_size = hclgevf_get_rss_key_size,
+ .get_rss_indir_size = hclgevf_get_rss_indir_size,
+ .get_rss = hclgevf_get_rss,
+ .set_rss = hclgevf_set_rss,
+ .get_tc_size = hclgevf_get_tc_size,
+ .get_fw_version = hclgevf_get_fw_version,
+ .set_vlan_filter = hclgevf_set_vlan_filter,
+};
+
+static struct hnae3_ae_algo ae_algovf = {
+ .ops = &hclgevf_ops,
+ .name = HCLGEVF_NAME,
+ .pdev_id_table = ae_algovf_pci_tbl,
+};
+
+static int hclgevf_init(void)
+{
+ pr_info("%s is initializing\n", HCLGEVF_NAME);
+
+ return hnae3_register_ae_algo(&ae_algovf);
+}
+
+static void hclgevf_exit(void)
+{
+ hnae3_unregister_ae_algo(&ae_algovf);
+}
+module_init(hclgevf_init);
+module_exit(hclgevf_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Huawei Tech. Co., Ltd.");
+MODULE_DESCRIPTION("HCLGEVF Driver");
+MODULE_VERSION(HCLGEVF_MOD_VERSION);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
new file mode 100644
index 000000000000..a63bee4a3674
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* Copyright (c) 2016-2017 Hisilicon Limited. */
+
+#ifndef __HCLGEVF_MAIN_H
+#define __HCLGEVF_MAIN_H
+#include <linux/fs.h>
+#include <linux/types.h>
+#include "hclge_mbx.h"
+#include "hclgevf_cmd.h"
+#include "hnae3.h"
+
+#define HCLGEVF_MOD_VERSION "v1.0"
+#define HCLGEVF_DRIVER_NAME "hclgevf"
+
+#define HCLGEVF_ROCEE_VECTOR_NUM 0
+#define HCLGEVF_MISC_VECTOR_NUM 0
+
+#define HCLGEVF_INVALID_VPORT 0xffff
+
+/* This number in actual depends upon the total number of VFs
+ * created by physical function. But the maximum number of
+ * possible vector-per-VF is {VFn(1-32), VECTn(32 + 1)}.
+ */
+#define HCLGEVF_MAX_VF_VECTOR_NUM (32 + 1)
+
+#define HCLGEVF_VECTOR_REG_BASE 0x20000
+#define HCLGEVF_MISC_VECTOR_REG_BASE 0x20400
+#define HCLGEVF_VECTOR_REG_OFFSET 0x4
+#define HCLGEVF_VECTOR_VF_OFFSET 0x100000
+
+/* Vector0 interrupt CMDQ event source register(RW) */
+#define HCLGEVF_VECTOR0_CMDQ_SRC_REG 0x27100
+/* CMDQ register bits for RX event(=MBX event) */
+#define HCLGEVF_VECTOR0_RX_CMDQ_INT_B 1
+
+#define HCLGEVF_TQP_RESET_TRY_TIMES 10
+
+#define HCLGEVF_RSS_IND_TBL_SIZE 512
+#define HCLGEVF_RSS_SET_BITMAP_MSK 0xffff
+#define HCLGEVF_RSS_KEY_SIZE 40
+#define HCLGEVF_RSS_HASH_ALGO_TOEPLITZ 0
+#define HCLGEVF_RSS_HASH_ALGO_SIMPLE 1
+#define HCLGEVF_RSS_HASH_ALGO_SYMMETRIC 2
+#define HCLGEVF_RSS_HASH_ALGO_MASK 0xf
+#define HCLGEVF_RSS_CFG_TBL_NUM \
+ (HCLGEVF_RSS_IND_TBL_SIZE / HCLGEVF_RSS_CFG_TBL_SIZE)
+
+/* states of hclgevf device & tasks */
+enum hclgevf_states {
+ /* device states */
+ HCLGEVF_STATE_DOWN,
+ HCLGEVF_STATE_DISABLED,
+ /* task states */
+ HCLGEVF_STATE_SERVICE_SCHED,
+ HCLGEVF_STATE_MBX_SERVICE_SCHED,
+ HCLGEVF_STATE_MBX_HANDLING,
+};
+
+#define HCLGEVF_MPF_ENBALE 1
+
+struct hclgevf_mac {
+ u8 mac_addr[ETH_ALEN];
+ int link;
+};
+
+struct hclgevf_hw {
+ void __iomem *io_base;
+ int num_vec;
+ struct hclgevf_cmq cmq;
+ struct hclgevf_mac mac;
+ void *hdev; /* hchgevf device it is part of */
+};
+
+/* TQP stats */
+struct hlcgevf_tqp_stats {
+ /* query_tqp_tx_queue_statistics ,opcode id: 0x0B03 */
+ u64 rcb_tx_ring_pktnum_rcd; /* 32bit */
+ /* query_tqp_rx_queue_statistics ,opcode id: 0x0B13 */
+ u64 rcb_rx_ring_pktnum_rcd; /* 32bit */
+};
+
+struct hclgevf_tqp {
+ struct device *dev; /* device for DMA mapping */
+ struct hnae3_queue q;
+ struct hlcgevf_tqp_stats tqp_stats;
+ u16 index; /* global index in a NIC controller */
+
+ bool alloced;
+};
+
+struct hclgevf_cfg {
+ u8 vmdq_vport_num;
+ u8 tc_num;
+ u16 tqp_desc_num;
+ u16 rx_buf_len;
+ u8 phy_addr;
+ u8 media_type;
+ u8 mac_addr[ETH_ALEN];
+ u32 numa_node_map;
+};
+
+struct hclgevf_rss_cfg {
+ u8 rss_hash_key[HCLGEVF_RSS_KEY_SIZE]; /* user configured hash keys */
+ u32 hash_algo;
+ u32 rss_size;
+ u8 hw_tc_map;
+ u8 rss_indirection_tbl[HCLGEVF_RSS_IND_TBL_SIZE]; /* shadow table */
+};
+
+struct hclgevf_misc_vector {
+ u8 __iomem *addr;
+ int vector_irq;
+};
+
+struct hclgevf_dev {
+ struct pci_dev *pdev;
+ struct hnae3_ae_dev *ae_dev;
+ struct hclgevf_hw hw;
+ struct hclgevf_misc_vector misc_vector;
+ struct hclgevf_rss_cfg rss_cfg;
+ unsigned long state;
+
+ u32 fw_version;
+ u16 num_tqps; /* num task queue pairs of this PF */
+
+ u16 alloc_rss_size; /* allocated RSS task queue */
+ u16 rss_size_max; /* HW defined max RSS task queue */
+
+ u16 num_alloc_vport; /* num vports this driver supports */
+ u32 numa_node_mask;
+ u16 rx_buf_len;
+ u16 num_desc;
+ u8 hw_tc_map;
+
+ u16 num_msi;
+ u16 num_msi_left;
+ u16 num_msi_used;
+ u32 base_msi_vector;
+ u16 *vector_status;
+ int *vector_irq;
+
+ bool accept_mta_mc; /* whether to accept mta filter multicast */
+ struct hclgevf_mbx_resp_status mbx_resp; /* mailbox response */
+
+ struct timer_list service_timer;
+ struct work_struct service_task;
+ struct work_struct mbx_service_task;
+
+ struct hclgevf_tqp *htqp;
+
+ struct hnae3_handle nic;
+ struct hnae3_handle roce;
+
+ struct hnae3_client *nic_client;
+ struct hnae3_client *roce_client;
+ u32 flag;
+};
+
+int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev, u16 code, u16 subcode,
+ const u8 *msg_data, u8 msg_len, bool need_resp,
+ u8 *resp_data, u16 resp_len);
+void hclgevf_mbx_handler(struct hclgevf_dev *hdev);
+void hclgevf_update_link_status(struct hclgevf_dev *hdev, int link_state);
+#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
new file mode 100644
index 000000000000..e39cad285fa9
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (c) 2016-2017 Hisilicon Limited.
+
+#include "hclge_mbx.h"
+#include "hclgevf_main.h"
+#include "hnae3.h"
+
+static void hclgevf_reset_mbx_resp_status(struct hclgevf_dev *hdev)
+{
+ /* this function should be called with mbx_resp.mbx_mutex held
+ * to prtect the received_response from race condition
+ */
+ hdev->mbx_resp.received_resp = false;
+ hdev->mbx_resp.origin_mbx_msg = 0;
+ hdev->mbx_resp.resp_status = 0;
+ memset(hdev->mbx_resp.additional_info, 0, HCLGE_MBX_MAX_RESP_DATA_SIZE);
+}
+
+/* hclgevf_get_mbx_resp: used to get a response from PF after VF sends a mailbox
+ * message to PF.
+ * @hdev: pointer to struct hclgevf_dev
+ * @resp_msg: pointer to store the original message type and response status
+ * @len: the resp_msg data array length.
+ */
+static int hclgevf_get_mbx_resp(struct hclgevf_dev *hdev, u16 code0, u16 code1,
+ u8 *resp_data, u16 resp_len)
+{
+#define HCLGEVF_MAX_TRY_TIMES 500
+#define HCLGEVF_SLEEP_USCOEND 1000
+ struct hclgevf_mbx_resp_status *mbx_resp;
+ u16 r_code0, r_code1;
+ int i = 0;
+
+ if (resp_len > HCLGE_MBX_MAX_RESP_DATA_SIZE) {
+ dev_err(&hdev->pdev->dev,
+ "VF mbx response len(=%d) exceeds maximum(=%d)\n",
+ resp_len,
+ HCLGE_MBX_MAX_RESP_DATA_SIZE);
+ return -EINVAL;
+ }
+
+ while ((!hdev->mbx_resp.received_resp) && (i < HCLGEVF_MAX_TRY_TIMES)) {
+ udelay(HCLGEVF_SLEEP_USCOEND);
+ i++;
+ }
+
+ if (i >= HCLGEVF_MAX_TRY_TIMES) {
+ dev_err(&hdev->pdev->dev,
+ "VF could not get mbx resp(=%d) from PF in %d tries\n",
+ hdev->mbx_resp.received_resp, i);
+ return -EIO;
+ }
+
+ mbx_resp = &hdev->mbx_resp;
+ r_code0 = (u16)(mbx_resp->origin_mbx_msg >> 16);
+ r_code1 = (u16)(mbx_resp->origin_mbx_msg & 0xff);
+ if (resp_data)
+ memcpy(resp_data, &mbx_resp->additional_info[0], resp_len);
+
+ hclgevf_reset_mbx_resp_status(hdev);
+
+ if (!(r_code0 == code0 && r_code1 == code1 && !mbx_resp->resp_status)) {
+ dev_err(&hdev->pdev->dev,
+ "VF could not match resp code(code0=%d,code1=%d), %d",
+ code0, code1, mbx_resp->resp_status);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev, u16 code, u16 subcode,
+ const u8 *msg_data, u8 msg_len, bool need_resp,
+ u8 *resp_data, u16 resp_len)
+{
+ struct hclge_mbx_vf_to_pf_cmd *req;
+ struct hclgevf_desc desc;
+ int status;
+
+ req = (struct hclge_mbx_vf_to_pf_cmd *)desc.data;
+
+ /* first two bytes are reserved for code & subcode */
+ if (msg_len > (HCLGE_MBX_MAX_MSG_SIZE - 2)) {
+ dev_err(&hdev->pdev->dev,
+ "VF send mbx msg fail, msg len %d exceeds max len %d\n",
+ msg_len, HCLGE_MBX_MAX_MSG_SIZE);
+ return -EINVAL;
+ }
+
+ hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_MBX_VF_TO_PF, false);
+ req->msg[0] = code;
+ req->msg[1] = subcode;
+ memcpy(&req->msg[2], msg_data, msg_len);
+
+ /* synchronous send */
+ if (need_resp) {
+ mutex_lock(&hdev->mbx_resp.mbx_mutex);
+ hclgevf_reset_mbx_resp_status(hdev);
+ status = hclgevf_cmd_send(&hdev->hw, &desc, 1);
+ if (status) {
+ dev_err(&hdev->pdev->dev,
+ "VF failed(=%d) to send mbx message to PF\n",
+ status);
+ mutex_unlock(&hdev->mbx_resp.mbx_mutex);
+ return status;
+ }
+
+ status = hclgevf_get_mbx_resp(hdev, code, subcode, resp_data,
+ resp_len);
+ mutex_unlock(&hdev->mbx_resp.mbx_mutex);
+ } else {
+ /* asynchronous send */
+ status = hclgevf_cmd_send(&hdev->hw, &desc, 1);
+ if (status) {
+ dev_err(&hdev->pdev->dev,
+ "VF failed(=%d) to send mbx message to PF\n",
+ status);
+ return status;
+ }
+ }
+
+ return status;
+}
+
+void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
+{
+ struct hclgevf_mbx_resp_status *resp;
+ struct hclge_mbx_pf_to_vf_cmd *req;
+ struct hclgevf_cmq_ring *crq;
+ struct hclgevf_desc *desc;
+ u16 link_status, flag;
+ u8 *temp;
+ int i;
+
+ resp = &hdev->mbx_resp;
+ crq = &hdev->hw.cmq.crq;
+
+ flag = le16_to_cpu(crq->desc[crq->next_to_use].flag);
+ while (hnae_get_bit(flag, HCLGEVF_CMDQ_RX_OUTVLD_B)) {
+ desc = &crq->desc[crq->next_to_use];
+ req = (struct hclge_mbx_pf_to_vf_cmd *)desc->data;
+
+ switch (req->msg[0]) {
+ case HCLGE_MBX_PF_VF_RESP:
+ if (resp->received_resp)
+ dev_warn(&hdev->pdev->dev,
+ "VF mbx resp flag not clear(%d)\n",
+ req->msg[1]);
+ resp->received_resp = true;
+
+ resp->origin_mbx_msg = (req->msg[1] << 16);
+ resp->origin_mbx_msg |= req->msg[2];
+ resp->resp_status = req->msg[3];
+
+ temp = (u8 *)&req->msg[4];
+ for (i = 0; i < HCLGE_MBX_MAX_RESP_DATA_SIZE; i++) {
+ resp->additional_info[i] = *temp;
+ temp++;
+ }
+ break;
+ case HCLGE_MBX_LINK_STAT_CHANGE:
+ link_status = le16_to_cpu(req->msg[1]);
+
+ /* update upper layer with new link link status */
+ hclgevf_update_link_status(hdev, link_status);
+
+ break;
+ default:
+ dev_err(&hdev->pdev->dev,
+ "VF received unsupported(%d) mbx msg from PF\n",
+ req->msg[0]);
+ break;
+ }
+ hclge_mbx_ring_ptr_move_crq(crq);
+ flag = le16_to_cpu(crq->desc[crq->next_to_use].flag);
+ }
+
+ /* Write back CMDQ_RQ header pointer, M7 need this pointer */
+ hclgevf_write_dev(&hdev->hw, HCLGEVF_NIC_CRQ_HEAD_REG,
+ crq->next_to_use);
+}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 62a18914f00f..7737a05c717c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -9101,9 +9101,11 @@ static int parse_tc_actions(struct ixgbe_adapter *adapter,
/* Redirect to a VF or a offloaded macvlan */
if (is_tcf_mirred_egress_redirect(a)) {
- int ifindex = tcf_mirred_ifindex(a);
+ struct net_device *dev = tcf_mirred_dev(a);
- err = handle_redirect_action(adapter, ifindex, queue,
+ if (!dev)
+ return -EINVAL;
+ err = handle_redirect_action(adapter, dev->ifindex, queue,
action);
if (err == 0)
return err;
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 634b2f41cc9e..a19760736b71 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -454,11 +454,11 @@
/* Various constants */
/* Coalescing */
-#define MVPP2_TXDONE_COAL_PKTS_THRESH 15
+#define MVPP2_TXDONE_COAL_PKTS_THRESH 64
#define MVPP2_TXDONE_HRTIMER_PERIOD_NS 1000000UL
#define MVPP2_TXDONE_COAL_USEC 1000
#define MVPP2_RX_COAL_PKTS 32
-#define MVPP2_RX_COAL_USEC 100
+#define MVPP2_RX_COAL_USEC 64
/* The two bytes Marvell header. Either contains a special value used
* by Marvell switches when a specific hardware mode is enabled (not
@@ -504,10 +504,12 @@
#define MVPP2_DEFAULT_RXQ 4
/* Max number of Rx descriptors */
-#define MVPP2_MAX_RXD 128
+#define MVPP2_MAX_RXD_MAX 1024
+#define MVPP2_MAX_RXD_DFLT 128
/* Max number of Tx descriptors */
-#define MVPP2_MAX_TXD 1024
+#define MVPP2_MAX_TXD_MAX 2048
+#define MVPP2_MAX_TXD_DFLT 1024
/* Amount of Tx descriptors that can be reserved at once by CPU */
#define MVPP2_CPU_DESC_CHUNK 64
@@ -5802,6 +5804,7 @@ static int mvpp2_txq_init(struct mvpp2_port *port,
txq_pcpu->reserved_num = 0;
txq_pcpu->txq_put_index = 0;
txq_pcpu->txq_get_index = 0;
+ txq_pcpu->tso_headers = NULL;
txq_pcpu->stop_threshold = txq->size - MVPP2_MAX_SKB_DESCS;
txq_pcpu->wake_threshold = txq_pcpu->stop_threshold / 2;
@@ -5829,10 +5832,13 @@ static void mvpp2_txq_deinit(struct mvpp2_port *port,
txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
kfree(txq_pcpu->buffs);
- dma_free_coherent(port->dev->dev.parent,
- txq_pcpu->size * TSO_HEADER_SIZE,
- txq_pcpu->tso_headers,
- txq_pcpu->tso_headers_dma);
+ if (txq_pcpu->tso_headers)
+ dma_free_coherent(port->dev->dev.parent,
+ txq_pcpu->size * TSO_HEADER_SIZE,
+ txq_pcpu->tso_headers,
+ txq_pcpu->tso_headers_dma);
+
+ txq_pcpu->tso_headers = NULL;
}
if (txq->descs)
@@ -6832,13 +6838,13 @@ static int mvpp2_check_ringparam_valid(struct net_device *dev,
if (ring->rx_pending == 0 || ring->tx_pending == 0)
return -EINVAL;
- if (ring->rx_pending > MVPP2_MAX_RXD)
- new_rx_pending = MVPP2_MAX_RXD;
+ if (ring->rx_pending > MVPP2_MAX_RXD_MAX)
+ new_rx_pending = MVPP2_MAX_RXD_MAX;
else if (!IS_ALIGNED(ring->rx_pending, 16))
new_rx_pending = ALIGN(ring->rx_pending, 16);
- if (ring->tx_pending > MVPP2_MAX_TXD)
- new_tx_pending = MVPP2_MAX_TXD;
+ if (ring->tx_pending > MVPP2_MAX_TXD_MAX)
+ new_tx_pending = MVPP2_MAX_TXD_MAX;
else if (!IS_ALIGNED(ring->tx_pending, 32))
new_tx_pending = ALIGN(ring->tx_pending, 32);
@@ -7318,9 +7324,10 @@ static int mvpp2_ethtool_get_coalesce(struct net_device *dev,
{
struct mvpp2_port *port = netdev_priv(dev);
- c->rx_coalesce_usecs = port->rxqs[0]->time_coal;
- c->rx_max_coalesced_frames = port->rxqs[0]->pkts_coal;
- c->tx_max_coalesced_frames = port->txqs[0]->done_pkts_coal;
+ c->rx_coalesce_usecs = port->rxqs[0]->time_coal;
+ c->rx_max_coalesced_frames = port->rxqs[0]->pkts_coal;
+ c->tx_max_coalesced_frames = port->txqs[0]->done_pkts_coal;
+ c->tx_coalesce_usecs = port->tx_time_coal;
return 0;
}
@@ -7340,8 +7347,8 @@ static void mvpp2_ethtool_get_ringparam(struct net_device *dev,
{
struct mvpp2_port *port = netdev_priv(dev);
- ring->rx_max_pending = MVPP2_MAX_RXD;
- ring->tx_max_pending = MVPP2_MAX_TXD;
+ ring->rx_max_pending = MVPP2_MAX_RXD_MAX;
+ ring->tx_max_pending = MVPP2_MAX_TXD_MAX;
ring->rx_pending = port->rx_ring_size;
ring->tx_pending = port->tx_ring_size;
}
@@ -7788,7 +7795,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
goto err_free_netdev;
}
- dev->tx_queue_len = MVPP2_MAX_TXD;
+ dev->tx_queue_len = MVPP2_MAX_TXD_MAX;
dev->watchdog_timeo = 5 * HZ;
dev->netdev_ops = &mvpp2_netdev_ops;
dev->ethtool_ops = &mvpp2_eth_tool_ops;
@@ -7871,8 +7878,8 @@ static int mvpp2_port_probe(struct platform_device *pdev,
mvpp2_port_copy_mac_addr(dev, priv, port_node, &mac_from);
- port->tx_ring_size = MVPP2_MAX_TXD;
- port->rx_ring_size = MVPP2_MAX_RXD;
+ port->tx_ring_size = MVPP2_MAX_TXD_DFLT;
+ port->rx_ring_size = MVPP2_MAX_RXD_DFLT;
SET_NETDEV_DEV(dev, &pdev->dev);
err = mvpp2_port_init(port);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index d2b057a3e512..0f5c012de52e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4308,9 +4308,6 @@ static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
{
mlx5e_ipsec_cleanup(priv);
mlx5e_vxlan_cleanup(priv);
-
- if (priv->channels.params.xdp_prog)
- bpf_prog_put(priv->channels.params.xdp_prog);
}
static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 55979ec2e88a..3e03d2e8f96a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1982,11 +1982,10 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
}
if (is_tcf_mirred_egress_redirect(a)) {
- int ifindex = tcf_mirred_ifindex(a);
struct net_device *out_dev;
struct mlx5e_priv *out_priv;
- out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex);
+ out_dev = tcf_mirred_dev(a);
if (switchdev_port_same_parent_id(priv->netdev,
out_dev)) {
@@ -1996,7 +1995,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
rpriv = out_priv->ppriv;
attr->out_rep = rpriv->rep;
} else if (encap) {
- parse_attr->mirred_ifindex = ifindex;
+ parse_attr->mirred_ifindex = out_dev->ifindex;
parse_attr->tun_info = *info;
attr->parse_attr = parse_attr;
attr->action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP |
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 9bd8d28de152..d373df7b11bd 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1571,14 +1571,11 @@ mlxsw_sp_port_add_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
const struct tc_action *a,
bool ingress)
{
- struct net *net = dev_net(mlxsw_sp_port->dev);
enum mlxsw_sp_span_type span_type;
struct mlxsw_sp_port *to_port;
struct net_device *to_dev;
- int ifindex;
- ifindex = tcf_mirred_ifindex(a);
- to_dev = __dev_get_by_index(net, ifindex);
+ to_dev = tcf_mirred_dev(a);
if (!to_dev) {
netdev_err(mlxsw_sp_port->dev, "Could not find requested device\n");
return -EINVAL;
@@ -1838,6 +1835,54 @@ static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type,
}
}
+
+static int mlxsw_sp_feature_hw_tc(struct net_device *dev, bool enable)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+
+ if (!enable && (mlxsw_sp_port->acl_rule_count ||
+ !list_empty(&mlxsw_sp_port->mall_tc_list))) {
+ netdev_err(dev, "Active offloaded tc filters, can't turn hw_tc_offload off\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+typedef int (*mlxsw_sp_feature_handler)(struct net_device *dev, bool enable);
+
+static int mlxsw_sp_handle_feature(struct net_device *dev,
+ netdev_features_t wanted_features,
+ netdev_features_t feature,
+ mlxsw_sp_feature_handler feature_handler)
+{
+ netdev_features_t changes = wanted_features ^ dev->features;
+ bool enable = !!(wanted_features & feature);
+ int err;
+
+ if (!(changes & feature))
+ return 0;
+
+ err = feature_handler(dev, enable);
+ if (err) {
+ netdev_err(dev, "%s feature %pNF failed, err %d\n",
+ enable ? "Enable" : "Disable", &feature, err);
+ return err;
+ }
+
+ if (enable)
+ dev->features |= feature;
+ else
+ dev->features &= ~feature;
+
+ return 0;
+}
+static int mlxsw_sp_set_features(struct net_device *dev,
+ netdev_features_t features)
+{
+ return mlxsw_sp_handle_feature(dev, features, NETIF_F_HW_TC,
+ mlxsw_sp_feature_hw_tc);
+}
+
static const struct net_device_ops mlxsw_sp_port_netdev_ops = {
.ndo_open = mlxsw_sp_port_open,
.ndo_stop = mlxsw_sp_port_stop,
@@ -1852,6 +1897,7 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = {
.ndo_vlan_rx_add_vid = mlxsw_sp_port_add_vid,
.ndo_vlan_rx_kill_vid = mlxsw_sp_port_kill_vid,
.ndo_get_phys_port_name = mlxsw_sp_port_get_phys_port_name,
+ .ndo_set_features = mlxsw_sp_set_features,
};
static void mlxsw_sp_port_get_drvinfo(struct net_device *dev,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 432ab9b12b7f..a0adcd886589 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -270,6 +270,7 @@ struct mlxsw_sp_port {
struct mlxsw_sp_port_sample *sample;
struct list_head vlans_list;
struct mlxsw_sp_qdisc root_qdisc;
+ unsigned acl_rule_count;
};
static inline bool
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index 2f0e57857ea4..42e8a36b9b95 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -92,7 +92,6 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
if (err)
return err;
} else if (is_tcf_mirred_egress_redirect(a)) {
- int ifindex = tcf_mirred_ifindex(a);
struct net_device *out_dev;
struct mlxsw_sp_fid *fid;
u16 fid_index;
@@ -104,7 +103,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
if (err)
return err;
- out_dev = __dev_get_by_index(dev_net(dev), ifindex);
+ out_dev = tcf_mirred_dev(a);
if (out_dev == dev)
out_dev = NULL;
@@ -424,6 +423,7 @@ int mlxsw_sp_flower_replace(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
goto err_rule_add;
mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset);
+ mlxsw_sp_port->acl_rule_count++;
return 0;
err_rule_add:
@@ -455,6 +455,7 @@ void mlxsw_sp_flower_destroy(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
}
mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset);
+ mlxsw_sp_port->acl_rule_count--;
}
int mlxsw_sp_flower_stats(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile
index 24c4408b5734..6e5ef984398b 100644
--- a/drivers/net/ethernet/netronome/nfp/Makefile
+++ b/drivers/net/ethernet/netronome/nfp/Makefile
@@ -22,6 +22,7 @@ nfp-objs := \
nfp_hwmon.o \
nfp_main.o \
nfp_net_common.o \
+ nfp_net_debugdump.o \
nfp_net_ethtool.o \
nfp_net_main.o \
nfp_net_repr.o \
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 995e95410b11..3419ad495962 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2016 Netronome Systems, Inc.
+ * Copyright (C) 2016-2017 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
@@ -66,12 +66,6 @@
next2 = nfp_meta_next(next))
static bool
-nfp_meta_has_next(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
- return meta->l.next != &nfp_prog->insns;
-}
-
-static bool
nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
return meta->l.prev != &nfp_prog->insns;
@@ -102,7 +96,7 @@ nfp_prog_offset_to_index(struct nfp_prog *nfp_prog, unsigned int offset)
/* --- Emitters --- */
static void
__emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
- u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, bool sync)
+ u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, bool sync, bool indir)
{
enum cmd_ctx_swap ctx;
u64 insn;
@@ -120,14 +114,15 @@ __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
FIELD_PREP(OP_CMD_CNT, size) |
FIELD_PREP(OP_CMD_SIG, sync) |
FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) |
+ FIELD_PREP(OP_CMD_INDIR, indir) |
FIELD_PREP(OP_CMD_MODE, mode);
nfp_prog_push(nfp_prog, insn);
}
static void
-emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
- u8 mode, u8 xfer, swreg lreg, swreg rreg, u8 size, bool sync)
+emit_cmd_any(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
+ swreg lreg, swreg rreg, u8 size, bool sync, bool indir)
{
struct nfp_insn_re_regs reg;
int err;
@@ -148,7 +143,22 @@ emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
return;
}
- __emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, sync);
+ __emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, sync,
+ indir);
+}
+
+static void
+emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
+ swreg lreg, swreg rreg, u8 size, bool sync)
+{
+ emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, sync, false);
+}
+
+static void
+emit_cmd_indir(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
+ swreg lreg, swreg rreg, u8 size, bool sync)
+{
+ emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, sync, true);
}
static void
@@ -230,9 +240,11 @@ emit_immed(struct nfp_prog *nfp_prog, swreg dst, u16 imm,
return;
}
- __emit_immed(nfp_prog, reg.areg, reg.breg, imm >> 8, width,
- invert, shift, reg.wr_both,
- reg.dst_lmextn, reg.src_lmextn);
+ /* Use reg.dst when destination is No-Dest. */
+ __emit_immed(nfp_prog,
+ swreg_type(dst) == NN_REG_NONE ? reg.dst : reg.areg,
+ reg.breg, imm >> 8, width, invert, shift,
+ reg.wr_both, reg.dst_lmextn, reg.src_lmextn);
}
static void
@@ -510,6 +522,147 @@ static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src)
wrp_mov(nfp_prog, reg_both(dst), reg_b(src));
}
+/* wrp_reg_subpart() - load @field_len bytes from @offset of @src, write the
+ * result to @dst from low end.
+ */
+static void
+wrp_reg_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, u8 field_len,
+ u8 offset)
+{
+ enum shf_sc sc = offset ? SHF_SC_R_SHF : SHF_SC_NONE;
+ u8 mask = (1 << field_len) - 1;
+
+ emit_ld_field_any(nfp_prog, dst, mask, src, sc, offset * 8, true);
+}
+
+/* NFP has Command Push Pull bus which supports bluk memory operations. */
+static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ bool descending_seq = meta->ldst_gather_len < 0;
+ s16 len = abs(meta->ldst_gather_len);
+ swreg src_base, off;
+ unsigned int i;
+ u8 xfer_num;
+
+ off = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
+ src_base = reg_a(meta->insn.src_reg * 2);
+ xfer_num = round_up(len, 4) / 4;
+
+ /* Setup PREV_ALU fields to override memory read length. */
+ if (len > 32)
+ wrp_immed(nfp_prog, reg_none(),
+ CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
+
+ /* Memory read from source addr into transfer-in registers. */
+ emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base,
+ off, xfer_num - 1, true, len > 32);
+
+ /* Move from transfer-in to transfer-out. */
+ for (i = 0; i < xfer_num; i++)
+ wrp_mov(nfp_prog, reg_xfer(i), reg_xfer(i));
+
+ off = re_load_imm_any(nfp_prog, meta->paired_st->off, imm_b(nfp_prog));
+
+ if (len <= 8) {
+ /* Use single direct_ref write8. */
+ emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
+ reg_a(meta->paired_st->dst_reg * 2), off, len - 1,
+ true);
+ } else if (len <= 32 && IS_ALIGNED(len, 4)) {
+ /* Use single direct_ref write32. */
+ emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
+ reg_a(meta->paired_st->dst_reg * 2), off, xfer_num - 1,
+ true);
+ } else if (len <= 32) {
+ /* Use single indirect_ref write8. */
+ wrp_immed(nfp_prog, reg_none(),
+ CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, len - 1));
+ emit_cmd_indir(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
+ reg_a(meta->paired_st->dst_reg * 2), off,
+ len - 1, true);
+ } else if (IS_ALIGNED(len, 4)) {
+ /* Use single indirect_ref write32. */
+ wrp_immed(nfp_prog, reg_none(),
+ CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
+ emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
+ reg_a(meta->paired_st->dst_reg * 2), off,
+ xfer_num - 1, true);
+ } else if (len <= 40) {
+ /* Use one direct_ref write32 to write the first 32-bytes, then
+ * another direct_ref write8 to write the remaining bytes.
+ */
+ emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
+ reg_a(meta->paired_st->dst_reg * 2), off, 7,
+ true);
+
+ off = re_load_imm_any(nfp_prog, meta->paired_st->off + 32,
+ imm_b(nfp_prog));
+ emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 8,
+ reg_a(meta->paired_st->dst_reg * 2), off, len - 33,
+ true);
+ } else {
+ /* Use one indirect_ref write32 to write 4-bytes aligned length,
+ * then another direct_ref write8 to write the remaining bytes.
+ */
+ u8 new_off;
+
+ wrp_immed(nfp_prog, reg_none(),
+ CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 2));
+ emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
+ reg_a(meta->paired_st->dst_reg * 2), off,
+ xfer_num - 2, true);
+ new_off = meta->paired_st->off + (xfer_num - 1) * 4;
+ off = re_load_imm_any(nfp_prog, new_off, imm_b(nfp_prog));
+ emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b,
+ xfer_num - 1, reg_a(meta->paired_st->dst_reg * 2), off,
+ (len & 0x3) - 1, true);
+ }
+
+ /* TODO: The following extra load is to make sure data flow be identical
+ * before and after we do memory copy optimization.
+ *
+ * The load destination register is not guaranteed to be dead, so we
+ * need to make sure it is loaded with the value the same as before
+ * this transformation.
+ *
+ * These extra loads could be removed once we have accurate register
+ * usage information.
+ */
+ if (descending_seq)
+ xfer_num = 0;
+ else if (BPF_SIZE(meta->insn.code) != BPF_DW)
+ xfer_num = xfer_num - 1;
+ else
+ xfer_num = xfer_num - 2;
+
+ switch (BPF_SIZE(meta->insn.code)) {
+ case BPF_B:
+ wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2),
+ reg_xfer(xfer_num), 1,
+ IS_ALIGNED(len, 4) ? 3 : (len & 3) - 1);
+ break;
+ case BPF_H:
+ wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2),
+ reg_xfer(xfer_num), 2, (len & 3) ^ 2);
+ break;
+ case BPF_W:
+ wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2),
+ reg_xfer(0));
+ break;
+ case BPF_DW:
+ wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2),
+ reg_xfer(xfer_num));
+ wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1),
+ reg_xfer(xfer_num + 1));
+ break;
+ }
+
+ if (BPF_SIZE(meta->insn.code) != BPF_DW)
+ wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
+
+ return 0;
+}
+
static int
data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size)
{
@@ -975,9 +1128,6 @@ wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
{
const struct bpf_insn *insn = &meta->insn;
- if (insn->off < 0) /* TODO */
- return -EOPNOTSUPP;
-
wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op,
insn->src_reg * 2, br_mask, insn->off);
wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op,
@@ -995,9 +1145,6 @@ wrp_cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
u8 reg = insn->dst_reg * 2;
swreg tmp_reg;
- if (insn->off < 0) /* TODO */
- return -EOPNOTSUPP;
-
tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
if (!swap)
emit_alu(nfp_prog, reg_none(), reg_a(reg), ALU_OP_SUB, tmp_reg);
@@ -1027,9 +1174,6 @@ wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
areg = insn->dst_reg * 2;
breg = insn->src_reg * 2;
- if (insn->off < 0) /* TODO */
- return -EOPNOTSUPP;
-
if (swap) {
areg ^= breg;
breg ^= areg;
@@ -1494,6 +1638,9 @@ static int
mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
unsigned int size)
{
+ if (meta->ldst_gather_len)
+ return nfp_cpp_memcpy(nfp_prog, meta);
+
if (meta->ptr.type == PTR_TO_CTX) {
if (nfp_prog->type == BPF_PROG_TYPE_XDP)
return mem_ldx_xdp(nfp_prog, meta, size);
@@ -1630,8 +1777,6 @@ static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
- if (meta->insn.off < 0) /* TODO */
- return -EOPNOTSUPP;
emit_br(nfp_prog, BR_UNC, meta->insn.off, 0);
return 0;
@@ -1646,9 +1791,6 @@ static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
or1 = reg_a(insn->dst_reg * 2);
or2 = reg_b(insn->dst_reg * 2 + 1);
- if (insn->off < 0) /* TODO */
- return -EOPNOTSUPP;
-
if (imm & ~0U) {
tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
emit_alu(nfp_prog, imm_a(nfp_prog),
@@ -1695,9 +1837,6 @@ static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
u64 imm = insn->imm; /* sign extend */
swreg tmp_reg;
- if (insn->off < 0) /* TODO */
- return -EOPNOTSUPP;
-
if (!imm) {
meta->skip = true;
return 0;
@@ -1726,9 +1865,6 @@ static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
u64 imm = insn->imm; /* sign extend */
swreg tmp_reg;
- if (insn->off < 0) /* TODO */
- return -EOPNOTSUPP;
-
if (!imm) {
emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2),
ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1));
@@ -1753,9 +1889,6 @@ static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
const struct bpf_insn *insn = &meta->insn;
- if (insn->off < 0) /* TODO */
- return -EOPNOTSUPP;
-
emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2),
ALU_OP_XOR, reg_b(insn->src_reg * 2));
emit_alu(nfp_prog, imm_b(nfp_prog), reg_a(insn->dst_reg * 2 + 1),
@@ -1887,17 +2020,22 @@ static void br_set_offset(u64 *instr, u16 offset)
/* --- Assembler logic --- */
static int nfp_fixup_branches(struct nfp_prog *nfp_prog)
{
- struct nfp_insn_meta *meta, *next;
- u32 off, br_idx;
- u32 idx;
+ struct nfp_insn_meta *meta, *jmp_dst;
+ u32 idx, br_idx;
- nfp_for_each_insn_walk2(nfp_prog, meta, next) {
+ list_for_each_entry(meta, &nfp_prog->insns, l) {
if (meta->skip)
continue;
if (BPF_CLASS(meta->insn.code) != BPF_JMP)
continue;
- br_idx = nfp_prog_offset_to_index(nfp_prog, next->off) - 1;
+ if (list_is_last(&meta->l, &nfp_prog->insns))
+ idx = nfp_prog->last_bpf_off;
+ else
+ idx = list_next_entry(meta, l)->off - 1;
+
+ br_idx = nfp_prog_offset_to_index(nfp_prog, idx);
+
if (!nfp_is_br(nfp_prog->prog[br_idx])) {
pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n",
br_idx, meta->insn.code, nfp_prog->prog[br_idx]);
@@ -1907,23 +2045,14 @@ static int nfp_fixup_branches(struct nfp_prog *nfp_prog)
if (FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx]))
continue;
- /* Find the target offset in assembler realm */
- off = meta->insn.off;
- if (!off) {
- pr_err("Fixup found zero offset!!\n");
+ if (!meta->jmp_dst) {
+ pr_err("Non-exit jump doesn't have destination info recorded!!\n");
return -ELOOP;
}
- while (off && nfp_meta_has_next(nfp_prog, next)) {
- next = nfp_meta_next(next);
- off--;
- }
- if (off) {
- pr_err("Fixup found too large jump!! %d\n", off);
- return -ELOOP;
- }
+ jmp_dst = meta->jmp_dst;
- if (next->skip) {
+ if (jmp_dst->skip) {
pr_err("Branch landing on removed instruction!!\n");
return -ELOOP;
}
@@ -1932,7 +2061,7 @@ static int nfp_fixup_branches(struct nfp_prog *nfp_prog)
idx <= br_idx; idx++) {
if (!nfp_is_br(nfp_prog->prog[idx]))
continue;
- br_set_offset(&nfp_prog->prog[idx], next->off);
+ br_set_offset(&nfp_prog->prog[idx], jmp_dst->off);
}
}
@@ -2105,6 +2234,8 @@ static int nfp_translate(struct nfp_prog *nfp_prog)
nfp_prog->n_translated++;
}
+ nfp_prog->last_bpf_off = nfp_prog_current_offset(nfp_prog) - 1;
+
nfp_outro(nfp_prog);
if (nfp_prog->error)
return nfp_prog->error;
@@ -2173,6 +2304,9 @@ static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog)
if (next.src_reg || next.dst_reg)
continue;
+ if (meta2->flags & FLAG_INSN_IS_JUMP_DST)
+ continue;
+
meta2->skip = true;
}
}
@@ -2209,17 +2343,258 @@ static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog)
if (next1.imm != 0x20 || next2.imm != 0x20)
continue;
+ if (meta2->flags & FLAG_INSN_IS_JUMP_DST ||
+ meta3->flags & FLAG_INSN_IS_JUMP_DST)
+ continue;
+
meta2->skip = true;
meta3->skip = true;
}
}
+/* load/store pair that forms memory copy sould look like the following:
+ *
+ * ld_width R, [addr_src + offset_src]
+ * st_width [addr_dest + offset_dest], R
+ *
+ * The destination register of load and source register of store should
+ * be the same, load and store should also perform at the same width.
+ * If either of addr_src or addr_dest is stack pointer, we don't do the
+ * CPP optimization as stack is modelled by registers on NFP.
+ */
+static bool
+curr_pair_is_memcpy(struct nfp_insn_meta *ld_meta,
+ struct nfp_insn_meta *st_meta)
+{
+ struct bpf_insn *ld = &ld_meta->insn;
+ struct bpf_insn *st = &st_meta->insn;
+
+ if (!is_mbpf_load(ld_meta) || !is_mbpf_store(st_meta))
+ return false;
+
+ if (ld_meta->ptr.type != PTR_TO_PACKET)
+ return false;
+
+ if (st_meta->ptr.type != PTR_TO_PACKET)
+ return false;
+
+ if (BPF_SIZE(ld->code) != BPF_SIZE(st->code))
+ return false;
+
+ if (ld->dst_reg != st->src_reg)
+ return false;
+
+ /* There is jump to the store insn in this pair. */
+ if (st_meta->flags & FLAG_INSN_IS_JUMP_DST)
+ return false;
+
+ return true;
+}
+
+/* Currently, we only support chaining load/store pairs if:
+ *
+ * - Their address base registers are the same.
+ * - Their address offsets are in the same order.
+ * - They operate at the same memory width.
+ * - There is no jump into the middle of them.
+ */
+static bool
+curr_pair_chain_with_previous(struct nfp_insn_meta *ld_meta,
+ struct nfp_insn_meta *st_meta,
+ struct bpf_insn *prev_ld,
+ struct bpf_insn *prev_st)
+{
+ u8 prev_size, curr_size, prev_ld_base, prev_st_base, prev_ld_dst;
+ struct bpf_insn *ld = &ld_meta->insn;
+ struct bpf_insn *st = &st_meta->insn;
+ s16 prev_ld_off, prev_st_off;
+
+ /* This pair is the start pair. */
+ if (!prev_ld)
+ return true;
+
+ prev_size = BPF_LDST_BYTES(prev_ld);
+ curr_size = BPF_LDST_BYTES(ld);
+ prev_ld_base = prev_ld->src_reg;
+ prev_st_base = prev_st->dst_reg;
+ prev_ld_dst = prev_ld->dst_reg;
+ prev_ld_off = prev_ld->off;
+ prev_st_off = prev_st->off;
+
+ if (ld->dst_reg != prev_ld_dst)
+ return false;
+
+ if (ld->src_reg != prev_ld_base || st->dst_reg != prev_st_base)
+ return false;
+
+ if (curr_size != prev_size)
+ return false;
+
+ /* There is jump to the head of this pair. */
+ if (ld_meta->flags & FLAG_INSN_IS_JUMP_DST)
+ return false;
+
+ /* Both in ascending order. */
+ if (prev_ld_off + prev_size == ld->off &&
+ prev_st_off + prev_size == st->off)
+ return true;
+
+ /* Both in descending order. */
+ if (ld->off + curr_size == prev_ld_off &&
+ st->off + curr_size == prev_st_off)
+ return true;
+
+ return false;
+}
+
+/* Return TRUE if cross memory access happens. Cross memory access means
+ * store area is overlapping with load area that a later load might load
+ * the value from previous store, for this case we can't treat the sequence
+ * as an memory copy.
+ */
+static bool
+cross_mem_access(struct bpf_insn *ld, struct nfp_insn_meta *head_ld_meta,
+ struct nfp_insn_meta *head_st_meta)
+{
+ s16 head_ld_off, head_st_off, ld_off;
+
+ /* Different pointer types does not overlap. */
+ if (head_ld_meta->ptr.type != head_st_meta->ptr.type)
+ return false;
+
+ /* load and store are both PTR_TO_PACKET, check ID info. */
+ if (head_ld_meta->ptr.id != head_st_meta->ptr.id)
+ return true;
+
+ /* Canonicalize the offsets. Turn all of them against the original
+ * base register.
+ */
+ head_ld_off = head_ld_meta->insn.off + head_ld_meta->ptr.off;
+ head_st_off = head_st_meta->insn.off + head_st_meta->ptr.off;
+ ld_off = ld->off + head_ld_meta->ptr.off;
+
+ /* Ascending order cross. */
+ if (ld_off > head_ld_off &&
+ head_ld_off < head_st_off && ld_off >= head_st_off)
+ return true;
+
+ /* Descending order cross. */
+ if (ld_off < head_ld_off &&
+ head_ld_off > head_st_off && ld_off <= head_st_off)
+ return true;
+
+ return false;
+}
+
+/* This pass try to identify the following instructoin sequences.
+ *
+ * load R, [regA + offA]
+ * store [regB + offB], R
+ * load R, [regA + offA + const_imm_A]
+ * store [regB + offB + const_imm_A], R
+ * load R, [regA + offA + 2 * const_imm_A]
+ * store [regB + offB + 2 * const_imm_A], R
+ * ...
+ *
+ * Above sequence is typically generated by compiler when lowering
+ * memcpy. NFP prefer using CPP instructions to accelerate it.
+ */
+static void nfp_bpf_opt_ldst_gather(struct nfp_prog *nfp_prog)
+{
+ struct nfp_insn_meta *head_ld_meta = NULL;
+ struct nfp_insn_meta *head_st_meta = NULL;
+ struct nfp_insn_meta *meta1, *meta2;
+ struct bpf_insn *prev_ld = NULL;
+ struct bpf_insn *prev_st = NULL;
+ u8 count = 0;
+
+ nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
+ struct bpf_insn *ld = &meta1->insn;
+ struct bpf_insn *st = &meta2->insn;
+
+ /* Reset record status if any of the following if true:
+ * - The current insn pair is not load/store.
+ * - The load/store pair doesn't chain with previous one.
+ * - The chained load/store pair crossed with previous pair.
+ * - The chained load/store pair has a total size of memory
+ * copy beyond 128 bytes which is the maximum length a
+ * single NFP CPP command can transfer.
+ */
+ if (!curr_pair_is_memcpy(meta1, meta2) ||
+ !curr_pair_chain_with_previous(meta1, meta2, prev_ld,
+ prev_st) ||
+ (head_ld_meta && (cross_mem_access(ld, head_ld_meta,
+ head_st_meta) ||
+ head_ld_meta->ldst_gather_len >= 128))) {
+ if (!count)
+ continue;
+
+ if (count > 1) {
+ s16 prev_ld_off = prev_ld->off;
+ s16 prev_st_off = prev_st->off;
+ s16 head_ld_off = head_ld_meta->insn.off;
+
+ if (prev_ld_off < head_ld_off) {
+ head_ld_meta->insn.off = prev_ld_off;
+ head_st_meta->insn.off = prev_st_off;
+ head_ld_meta->ldst_gather_len =
+ -head_ld_meta->ldst_gather_len;
+ }
+
+ head_ld_meta->paired_st = &head_st_meta->insn;
+ head_st_meta->skip = true;
+ } else {
+ head_ld_meta->ldst_gather_len = 0;
+ }
+
+ /* If the chain is ended by an load/store pair then this
+ * could serve as the new head of the the next chain.
+ */
+ if (curr_pair_is_memcpy(meta1, meta2)) {
+ head_ld_meta = meta1;
+ head_st_meta = meta2;
+ head_ld_meta->ldst_gather_len =
+ BPF_LDST_BYTES(ld);
+ meta1 = nfp_meta_next(meta1);
+ meta2 = nfp_meta_next(meta2);
+ prev_ld = ld;
+ prev_st = st;
+ count = 1;
+ } else {
+ head_ld_meta = NULL;
+ head_st_meta = NULL;
+ prev_ld = NULL;
+ prev_st = NULL;
+ count = 0;
+ }
+
+ continue;
+ }
+
+ if (!head_ld_meta) {
+ head_ld_meta = meta1;
+ head_st_meta = meta2;
+ } else {
+ meta1->skip = true;
+ meta2->skip = true;
+ }
+
+ head_ld_meta->ldst_gather_len += BPF_LDST_BYTES(ld);
+ meta1 = nfp_meta_next(meta1);
+ meta2 = nfp_meta_next(meta2);
+ prev_ld = ld;
+ prev_st = st;
+ count++;
+ }
+}
+
static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
{
nfp_bpf_opt_reg_init(nfp_prog);
nfp_bpf_opt_ld_mask(nfp_prog);
nfp_bpf_opt_ld_shift(nfp_prog);
+ nfp_bpf_opt_ldst_gather(nfp_prog);
return 0;
}
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index e379b78e86ef..54bfd7846f6d 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -82,12 +82,6 @@ static const char *nfp_bpf_extra_cap(struct nfp_app *app, struct nfp_net *nn)
return nfp_net_ebpf_capable(nn) ? "BPF" : "";
}
-static void nfp_bpf_vnic_free(struct nfp_app *app, struct nfp_net *nn)
-{
- if (nn->dp.bpf_offload_xdp)
- nfp_bpf_xdp_offload(app, nn, NULL);
-}
-
static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
void *type_data, void *cb_priv)
{
@@ -168,7 +162,6 @@ const struct nfp_app_type app_bpf = {
.extra_cap = nfp_bpf_extra_cap,
.vnic_alloc = nfp_app_nic_vnic_alloc,
- .vnic_free = nfp_bpf_vnic_free,
.setup_tc = nfp_bpf_setup_tc,
.tc_busy = nfp_bpf_tc_busy,
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 082a15f6dfb5..5884291ddba5 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2016 Netronome Systems, Inc.
+ * Copyright (C) 2016-2017 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
@@ -89,23 +89,37 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
#define nfp_meta_next(meta) list_next_entry(meta, l)
#define nfp_meta_prev(meta) list_prev_entry(meta, l)
+#define FLAG_INSN_IS_JUMP_DST BIT(0)
+
/**
* struct nfp_insn_meta - BPF instruction wrapper
* @insn: BPF instruction
* @ptr: pointer type for memory operations
+ * @ldst_gather_len: memcpy length gathered from load/store sequence
+ * @paired_st: the paired store insn at the head of the sequence
* @ptr_not_const: pointer is not always constant
+ * @jmp_dst: destination info for jump instructions
* @off: index of first generated machine instruction (in nfp_prog.prog)
* @n: eBPF instruction number
+ * @flags: eBPF instruction extra optimization flags
* @skip: skip this instruction (optimized out)
* @double_cb: callback for second part of the instruction
* @l: link on nfp_prog->insns list
*/
struct nfp_insn_meta {
struct bpf_insn insn;
- struct bpf_reg_state ptr;
- bool ptr_not_const;
+ union {
+ struct {
+ struct bpf_reg_state ptr;
+ struct bpf_insn *paired_st;
+ s16 ldst_gather_len;
+ bool ptr_not_const;
+ };
+ struct nfp_insn_meta *jmp_dst;
+ };
unsigned int off;
unsigned short n;
+ unsigned short flags;
bool skip;
instr_cb_t double_cb;
@@ -134,6 +148,16 @@ static inline u8 mbpf_mode(const struct nfp_insn_meta *meta)
return BPF_MODE(meta->insn.code);
}
+static inline bool is_mbpf_load(const struct nfp_insn_meta *meta)
+{
+ return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM);
+}
+
+static inline bool is_mbpf_store(const struct nfp_insn_meta *meta)
+{
+ return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM);
+}
+
/**
* struct nfp_prog - nfp BPF program
* @prog: machine code
@@ -142,6 +166,7 @@ static inline u8 mbpf_mode(const struct nfp_insn_meta *meta)
* @verifier_meta: temporary storage for verifier's insn meta
* @type: BPF program type
* @start_off: address of the first instruction in the memory
+ * @last_bpf_off: address of the last instruction translated from BPF
* @tgt_out: jump target for normal exit
* @tgt_abort: jump target for abort (e.g. access outside of packet buffer)
* @tgt_done: jump target to get the next packet
@@ -160,6 +185,7 @@ struct nfp_prog {
enum bpf_prog_type type;
unsigned int start_off;
+ unsigned int last_bpf_off;
unsigned int tgt_out;
unsigned int tgt_abort;
unsigned int tgt_done;
@@ -189,4 +215,7 @@ int nfp_bpf_translate(struct nfp_app *app, struct nfp_net *nn,
struct bpf_prog *prog);
int nfp_bpf_destroy(struct nfp_app *app, struct nfp_net *nn,
struct bpf_prog *prog);
+struct nfp_insn_meta *
+nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+ unsigned int insn_idx, unsigned int n_insns);
#endif
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index bc879aeb62d4..377976ce92dd 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2016 Netronome Systems, Inc.
+ * Copyright (C) 2016-2017 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
@@ -55,11 +55,10 @@ static int
nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog,
unsigned int cnt)
{
+ struct nfp_insn_meta *meta;
unsigned int i;
for (i = 0; i < cnt; i++) {
- struct nfp_insn_meta *meta;
-
meta = kzalloc(sizeof(*meta), GFP_KERNEL);
if (!meta)
return -ENOMEM;
@@ -70,6 +69,24 @@ nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog,
list_add_tail(&meta->l, &nfp_prog->insns);
}
+ /* Another pass to record jump information. */
+ list_for_each_entry(meta, &nfp_prog->insns, l) {
+ u64 code = meta->insn.code;
+
+ if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_EXIT &&
+ BPF_OP(code) != BPF_CALL) {
+ struct nfp_insn_meta *dst_meta;
+ unsigned short dst_indx;
+
+ dst_indx = meta->n + 1 + meta->insn.off;
+ dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_indx,
+ cnt);
+
+ meta->jmp_dst = dst_meta;
+ dst_meta->flags |= FLAG_INSN_IS_JUMP_DST;
+ }
+ }
+
return 0;
}
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index 8d43491ddd6b..d2bf29c90226 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2016 Netronome Systems, Inc.
+ * Copyright (C) 2016-2017 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
@@ -40,7 +40,7 @@
#include "main.h"
-static struct nfp_insn_meta *
+struct nfp_insn_meta *
nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
unsigned int insn_idx, unsigned int n_insns)
{
@@ -180,10 +180,10 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
if (meta->insn.code == (BPF_JMP | BPF_EXIT))
return nfp_bpf_check_exit(nfp_prog, env);
- if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM))
+ if (is_mbpf_load(meta))
return nfp_bpf_check_ptr(nfp_prog, meta, env,
meta->insn.src_reg);
- if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM))
+ if (is_mbpf_store(meta))
return nfp_bpf_check_ptr(nfp_prog, meta, env,
meta->insn.dst_reg);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index c1c595f8bb87..ca74c517f626 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -93,13 +93,11 @@ nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action,
size_t act_size = sizeof(struct nfp_fl_output);
struct net_device *out_dev;
u16 tmp_flags;
- int ifindex;
output->head.jump_id = NFP_FL_ACTION_OPCODE_OUTPUT;
output->head.len_lw = act_size >> NFP_FL_LW_SIZ;
- ifindex = tcf_mirred_ifindex(action);
- out_dev = __dev_get_by_index(dev_net(in_dev), ifindex);
+ out_dev = tcf_mirred_dev(action);
if (!out_dev)
return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index 66070741d55f..d6b63c8f14da 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -50,9 +50,6 @@
#define NFP_FLOWER_LAYER_CT BIT(6)
#define NFP_FLOWER_LAYER_VXLAN BIT(7)
-#define NFP_FLOWER_LAYER_ETHER BIT(3)
-#define NFP_FLOWER_LAYER_ARP BIT(4)
-
#define NFP_FLOWER_MASK_VLAN_PRIO GENMASK(15, 13)
#define NFP_FLOWER_MASK_VLAN_CFI BIT(12)
#define NFP_FLOWER_MASK_VLAN_VID GENMASK(11, 0)
@@ -165,20 +162,6 @@ struct nfp_fl_pop_vlan {
__be16 reserved;
};
-/* Metadata without L2 (1W/4B)
- * ----------------------------------------------------------------
- * 3 2 1
- * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | key_layers | mask_id | reserved |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- */
-struct nfp_flower_meta_one {
- u8 nfp_flow_key_layer;
- u8 mask_id;
- u16 reserved;
-};
-
struct nfp_fl_pre_tunnel {
struct nfp_fl_act_head head;
__be16 reserved;
@@ -209,7 +192,7 @@ struct nfp_fl_set_vxlan {
* NOTE: | TCI |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*/
-struct nfp_flower_meta_two {
+struct nfp_flower_meta_tci {
u8 nfp_flow_key_layer;
u8 mask_id;
__be16 tci;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
index 60614d4f0e22..1f2b879e12d4 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/match.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
@@ -38,7 +38,7 @@
#include "main.h"
static void
-nfp_flower_compile_meta_tci(struct nfp_flower_meta_two *frame,
+nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *frame,
struct tc_cls_flower_offload *flow, u8 key_type,
bool mask_version)
{
@@ -46,7 +46,7 @@ nfp_flower_compile_meta_tci(struct nfp_flower_meta_two *frame,
struct flow_dissector_key_vlan *flow_vlan;
u16 tmp_tci;
- memset(frame, 0, sizeof(struct nfp_flower_meta_two));
+ memset(frame, 0, sizeof(struct nfp_flower_meta_tci));
/* Populate the metadata frame. */
frame->nfp_flow_key_layer = key_type;
frame->mask_id = ~0;
@@ -67,14 +67,6 @@ nfp_flower_compile_meta_tci(struct nfp_flower_meta_two *frame,
}
}
-static void
-nfp_flower_compile_meta(struct nfp_flower_meta_one *frame, u8 key_type)
-{
- frame->nfp_flow_key_layer = key_type;
- frame->mask_id = 0;
- frame->reserved = 0;
-}
-
static int
nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port,
bool mask_version, enum nfp_flower_tun_type tun_type)
@@ -278,49 +270,32 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
ext = nfp_flow->unmasked_data;
msk = nfp_flow->mask_data;
- if (NFP_FLOWER_LAYER_PORT & key_ls->key_layer) {
- /* Populate Exact Metadata. */
- nfp_flower_compile_meta_tci((struct nfp_flower_meta_two *)ext,
- flow, key_ls->key_layer, false);
- /* Populate Mask Metadata. */
- nfp_flower_compile_meta_tci((struct nfp_flower_meta_two *)msk,
- flow, key_ls->key_layer, true);
- ext += sizeof(struct nfp_flower_meta_two);
- msk += sizeof(struct nfp_flower_meta_two);
-
- /* Populate Exact Port data. */
- err = nfp_flower_compile_port((struct nfp_flower_in_port *)ext,
- nfp_repr_get_port_id(netdev),
- false, tun_type);
- if (err)
- return err;
-
- /* Populate Mask Port Data. */
- err = nfp_flower_compile_port((struct nfp_flower_in_port *)msk,
- nfp_repr_get_port_id(netdev),
- true, tun_type);
- if (err)
- return err;
-
- ext += sizeof(struct nfp_flower_in_port);
- msk += sizeof(struct nfp_flower_in_port);
- } else {
- /* Populate Exact Metadata. */
- nfp_flower_compile_meta((struct nfp_flower_meta_one *)ext,
- key_ls->key_layer);
- /* Populate Mask Metadata. */
- nfp_flower_compile_meta((struct nfp_flower_meta_one *)msk,
- key_ls->key_layer);
- ext += sizeof(struct nfp_flower_meta_one);
- msk += sizeof(struct nfp_flower_meta_one);
- }
- if (NFP_FLOWER_LAYER_META & key_ls->key_layer) {
- /* Additional Metadata Fields.
- * Currently unsupported.
- */
- return -EOPNOTSUPP;
- }
+ /* Populate Exact Metadata. */
+ nfp_flower_compile_meta_tci((struct nfp_flower_meta_tci *)ext,
+ flow, key_ls->key_layer, false);
+ /* Populate Mask Metadata. */
+ nfp_flower_compile_meta_tci((struct nfp_flower_meta_tci *)msk,
+ flow, key_ls->key_layer, true);
+ ext += sizeof(struct nfp_flower_meta_tci);
+ msk += sizeof(struct nfp_flower_meta_tci);
+
+ /* Populate Exact Port data. */
+ err = nfp_flower_compile_port((struct nfp_flower_in_port *)ext,
+ nfp_repr_get_port_id(netdev),
+ false, tun_type);
+ if (err)
+ return err;
+
+ /* Populate Mask Port Data. */
+ err = nfp_flower_compile_port((struct nfp_flower_in_port *)msk,
+ nfp_repr_get_port_id(netdev),
+ true, tun_type);
+ if (err)
+ return err;
+
+ ext += sizeof(struct nfp_flower_in_port);
+ msk += sizeof(struct nfp_flower_in_port);
if (NFP_FLOWER_LAYER_MAC & key_ls->key_layer) {
/* Populate Exact MAC Data. */
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 553f94f55dce..98fb1cba3ed9 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -150,10 +150,15 @@ nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls,
return -EOPNOTSUPP;
key_layer_two = 0;
- key_layer = NFP_FLOWER_LAYER_PORT | NFP_FLOWER_LAYER_MAC;
- key_size = sizeof(struct nfp_flower_meta_one) +
- sizeof(struct nfp_flower_in_port) +
- sizeof(struct nfp_flower_mac_mpls);
+ key_layer = NFP_FLOWER_LAYER_PORT;
+ key_size = sizeof(struct nfp_flower_meta_tci) +
+ sizeof(struct nfp_flower_in_port);
+
+ if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS) ||
+ dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_MPLS)) {
+ key_layer |= NFP_FLOWER_LAYER_MAC;
+ key_size += sizeof(struct nfp_flower_mac_mpls);
+ }
if (dissector_uses_key(flow->dissector,
FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.c b/drivers/net/ethernet/netronome/nfp/nfp_asm.c
index 830f6de25f47..d3610987fb07 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.c
@@ -41,6 +41,7 @@
const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = {
[CMD_TGT_WRITE8_SWAP] = { 0x02, 0x42 },
+ [CMD_TGT_WRITE32_SWAP] = { 0x02, 0x5f },
[CMD_TGT_READ8] = { 0x01, 0x43 },
[CMD_TGT_READ32] = { 0x00, 0x5c },
[CMD_TGT_READ32_LE] = { 0x01, 0x5c },
@@ -120,7 +121,8 @@ int swreg_to_unrestricted(swreg dst, swreg lreg, swreg rreg,
reg->dst = nfp_swreg_to_unreg(dst, true);
/* Decode source operands */
- if (swreg_type(lreg) == swreg_type(rreg))
+ if (swreg_type(lreg) == swreg_type(rreg) &&
+ swreg_type(lreg) != NN_REG_NONE)
return -EFAULT;
if (swreg_type(lreg) == NN_REG_GPR_B ||
@@ -200,7 +202,8 @@ int swreg_to_restricted(swreg dst, swreg lreg, swreg rreg,
reg->dst = nfp_swreg_to_rereg(dst, true, false, NULL);
/* Decode source operands */
- if (swreg_type(lreg) == swreg_type(rreg))
+ if (swreg_type(lreg) == swreg_type(rreg) &&
+ swreg_type(lreg) != NN_REG_NONE)
return -EFAULT;
if (swreg_type(lreg) == NN_REG_GPR_B ||
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
index 74d0c11ab2f9..3387e6926eb0 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2016 Netronome Systems, Inc.
+ * Copyright (C) 2016-2017 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
@@ -209,6 +209,7 @@ enum alu_dst_ab {
#define OP_CMD_CNT 0x0000e000000ULL
#define OP_CMD_SIG 0x000f0000000ULL
#define OP_CMD_TGT_CMD 0x07f00000000ULL
+#define OP_CMD_INDIR 0x20000000000ULL
#define OP_CMD_MODE 0x1c0000000000ULL
struct cmd_tgt_act {
@@ -219,6 +220,7 @@ struct cmd_tgt_act {
enum cmd_tgt_map {
CMD_TGT_READ8,
CMD_TGT_WRITE8_SWAP,
+ CMD_TGT_WRITE32_SWAP,
CMD_TGT_READ32,
CMD_TGT_READ32_LE,
CMD_TGT_READ32_SWAP,
@@ -240,6 +242,9 @@ enum cmd_ctx_swap {
CMD_CTX_NO_SWAP = 3,
};
+#define CMD_OVE_LEN BIT(7)
+#define CMD_OV_LEN GENMASK(12, 8)
+
#define OP_LCSR_BASE 0x0fc00000000ULL
#define OP_LCSR_A_SRC 0x000000003ffULL
#define OP_LCSR_B_SRC 0x000000ffc00ULL
@@ -257,6 +262,7 @@ enum lcsr_wr_src {
#define OP_CARB_BASE 0x0e000000000ULL
#define OP_CARB_OR 0x00000010000ULL
+#define NFP_CSR_CTX_PTR 0x20
#define NFP_CSR_ACT_LM_ADDR0 0x64
#define NFP_CSR_ACT_LM_ADDR1 0x6c
#define NFP_CSR_ACT_LM_ADDR2 0x94
@@ -377,4 +383,13 @@ int swreg_to_restricted(swreg dst, swreg lreg, swreg rreg,
int nfp_ustore_check_valid_no_ecc(u64 insn);
u64 nfp_ustore_calc_ecc_insn(u64 insn);
+#define NFP_IND_ME_REFL_WR_SIG_INIT 3
+#define NFP_IND_ME_CTX_PTR_BASE_MASK GENMASK(9, 0)
+#define NFP_IND_NUM_CONTEXTS 8
+
+static inline u32 nfp_get_ind_csr_ctx_ptr_offs(u32 read_offset)
+{
+ return (read_offset & ~NFP_IND_ME_CTX_PTR_BASE_MASK) | NFP_CSR_CTX_PTR;
+}
+
#endif
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c
index 35eaccbece36..0953fa8f3109 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c
@@ -45,6 +45,7 @@
#include <linux/pci.h>
#include <linux/firmware.h>
#include <linux/vermagic.h>
+#include <linux/vmalloc.h>
#include <net/devlink.h>
#include "nfpcore/nfp.h"
@@ -509,6 +510,9 @@ static int nfp_pci_probe(struct pci_dev *pdev,
pf->mip = nfp_mip_open(pf->cpp);
pf->rtbl = __nfp_rtsym_table_read(pf->cpp, pf->mip);
+ pf->dump_flag = NFP_DUMP_NSP_DIAG;
+ pf->dumpspec = nfp_net_dump_load_dumpspec(pf->cpp, pf->rtbl);
+
err = nfp_pcie_sriov_read_nfd_limit(pf);
if (err)
goto err_fw_unload;
@@ -544,6 +548,7 @@ err_fw_unload:
nfp_fw_unload(pf);
kfree(pf->eth_tbl);
kfree(pf->nspi);
+ vfree(pf->dumpspec);
err_devlink_unreg:
devlink_unregister(devlink);
err_hwinfo_free:
@@ -579,6 +584,7 @@ static void nfp_pci_remove(struct pci_dev *pdev)
devlink_unregister(devlink);
+ vfree(pf->dumpspec);
kfree(pf->rtbl);
nfp_mip_close(pf->mip);
if (pf->fw_loaded)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.h b/drivers/net/ethernet/netronome/nfp/nfp_main.h
index be0ee59f2eb9..add46e28212b 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.h
@@ -39,6 +39,7 @@
#ifndef NFP_MAIN_H
#define NFP_MAIN_H
+#include <linux/ethtool.h>
#include <linux/list.h>
#include <linux/types.h>
#include <linux/msi.h>
@@ -62,6 +63,17 @@ struct nfp_port;
struct nfp_rtsym_table;
/**
+ * struct nfp_dumpspec - NFP FW dump specification structure
+ * @size: Size of the data
+ * @data: Sequence of TLVs, each being an instruction to dump some data
+ * from FW
+ */
+struct nfp_dumpspec {
+ u32 size;
+ u8 data[0];
+};
+
+/**
* struct nfp_pf - NFP PF-specific device structure
* @pdev: Backpointer to PCI device
* @cpp: Pointer to the CPP handle
@@ -83,6 +95,9 @@ struct nfp_rtsym_table;
* @mip: MIP handle
* @rtbl: RTsym table
* @hwinfo: HWInfo table
+ * @dumpspec: Debug dump specification
+ * @dump_flag: Store dump flag between set_dump and get_dump_flag
+ * @dump_len: Store dump length between set_dump and get_dump_flag
* @eth_tbl: NSP ETH table
* @nspi: NSP identification info
* @hwmon_dev: pointer to hwmon device
@@ -124,6 +139,9 @@ struct nfp_pf {
const struct nfp_mip *mip;
struct nfp_rtsym_table *rtbl;
struct nfp_hwinfo *hwinfo;
+ struct nfp_dumpspec *dumpspec;
+ u32 dump_flag;
+ u32 dump_len;
struct nfp_eth_table *eth_tbl;
struct nfp_nsp_identify *nspi;
@@ -157,4 +175,15 @@ void nfp_net_get_mac_addr(struct nfp_pf *pf, struct nfp_port *port);
bool nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb);
+enum nfp_dump_diag {
+ NFP_DUMP_NSP_DIAG = 0,
+};
+
+struct nfp_dumpspec *
+nfp_net_dump_load_dumpspec(struct nfp_cpp *cpp, struct nfp_rtsym_table *rtbl);
+s64 nfp_net_dump_calculate_size(struct nfp_pf *pf, struct nfp_dumpspec *spec,
+ u32 flag);
+int nfp_net_dump_populate_buffer(struct nfp_pf *pf, struct nfp_dumpspec *spec,
+ struct ethtool_dump *dump_param, void *dest);
+
#endif /* NFP_MAIN_H */
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 7f9857c276b1..3801c52098d5 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -548,6 +548,8 @@ struct nfp_net_dp {
* @max_r_vecs: Number of allocated interrupt vectors for RX/TX
* @max_tx_rings: Maximum number of TX rings supported by the Firmware
* @max_rx_rings: Maximum number of RX rings supported by the Firmware
+ * @stride_rx: Queue controller RX queue spacing
+ * @stride_tx: Queue controller TX queue spacing
* @r_vecs: Pre-allocated array of ring vectors
* @irq_entries: Pre-allocated array of MSI-X entries
* @lsc_handler: Handler for Link State Change interrupt
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 1a603fdd9e80..ad3e9f6a61e5 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -3392,6 +3392,7 @@ static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
if (nn->dp.bpf_offload_xdp)
xdp->prog_attached = XDP_ATTACHED_HW;
xdp->prog_id = nn->xdp_prog ? nn->xdp_prog->aux->id : 0;
+ xdp->flags = nn->xdp_prog ? nn->xdp_flags : 0;
return 0;
case BPF_OFFLOAD_VERIFIER_PREP:
return nfp_app_bpf_verifier_prep(nn->app, nn, xdp);
@@ -3561,9 +3562,6 @@ struct nfp_net *nfp_net_alloc(struct pci_dev *pdev, bool needs_netdev,
*/
void nfp_net_free(struct nfp_net *nn)
{
- if (nn->xdp_prog)
- bpf_prog_put(nn->xdp_prog);
-
if (nn->dp.netdev)
free_netdev(nn->dp.netdev);
else
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c b/drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c
new file mode 100644
index 000000000000..173646e17e94
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c
@@ -0,0 +1,808 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/ethtool.h>
+#include <linux/vmalloc.h>
+
+#include "nfp_asm.h"
+#include "nfp_main.h"
+#include "nfpcore/nfp.h"
+#include "nfpcore/nfp_nffw.h"
+#include "nfpcore/nfp6000/nfp6000.h"
+
+#define NFP_DUMP_SPEC_RTSYM "_abi_dump_spec"
+
+#define ALIGN8(x) ALIGN(x, 8)
+
+enum nfp_dumpspec_type {
+ NFP_DUMPSPEC_TYPE_CPP_CSR = 0,
+ NFP_DUMPSPEC_TYPE_XPB_CSR = 1,
+ NFP_DUMPSPEC_TYPE_ME_CSR = 2,
+ NFP_DUMPSPEC_TYPE_INDIRECT_ME_CSR = 3,
+ NFP_DUMPSPEC_TYPE_RTSYM = 4,
+ NFP_DUMPSPEC_TYPE_HWINFO = 5,
+ NFP_DUMPSPEC_TYPE_FWNAME = 6,
+ NFP_DUMPSPEC_TYPE_HWINFO_FIELD = 7,
+ NFP_DUMPSPEC_TYPE_PROLOG = 10000,
+ NFP_DUMPSPEC_TYPE_ERROR = 10001,
+};
+
+/* The following structs must be carefully aligned so that they can be used to
+ * interpret the binary dumpspec and populate the dump data in a deterministic
+ * way.
+ */
+
+/* generic type plus length */
+struct nfp_dump_tl {
+ __be32 type;
+ __be32 length; /* chunk length to follow, aligned to 8 bytes */
+ char data[0];
+};
+
+/* NFP CPP parameters */
+struct nfp_dumpspec_cpp_isl_id {
+ u8 target;
+ u8 action;
+ u8 token;
+ u8 island;
+};
+
+struct nfp_dump_common_cpp {
+ struct nfp_dumpspec_cpp_isl_id cpp_id;
+ __be32 offset; /* address to start dump */
+ __be32 dump_length; /* total bytes to dump, aligned to reg size */
+};
+
+/* CSR dumpables */
+struct nfp_dumpspec_csr {
+ struct nfp_dump_tl tl;
+ struct nfp_dump_common_cpp cpp;
+ __be32 register_width; /* in bits */
+};
+
+struct nfp_dumpspec_rtsym {
+ struct nfp_dump_tl tl;
+ char rtsym[0];
+};
+
+/* header for register dumpable */
+struct nfp_dump_csr {
+ struct nfp_dump_tl tl;
+ struct nfp_dump_common_cpp cpp;
+ __be32 register_width; /* in bits */
+ __be32 error; /* error code encountered while reading */
+ __be32 error_offset; /* offset being read when error occurred */
+};
+
+struct nfp_dump_rtsym {
+ struct nfp_dump_tl tl;
+ struct nfp_dump_common_cpp cpp;
+ __be32 error; /* error code encountered while reading */
+ u8 padded_name_length; /* pad so data starts at 8 byte boundary */
+ char rtsym[0];
+ /* after padded_name_length, there is dump_length data */
+};
+
+struct nfp_dump_prolog {
+ struct nfp_dump_tl tl;
+ __be32 dump_level;
+};
+
+struct nfp_dump_error {
+ struct nfp_dump_tl tl;
+ __be32 error;
+ char padding[4];
+ char spec[0];
+};
+
+/* to track state through debug size calculation TLV traversal */
+struct nfp_level_size {
+ __be32 requested_level; /* input */
+ u32 total_size; /* output */
+};
+
+/* to track state during debug dump creation TLV traversal */
+struct nfp_dump_state {
+ __be32 requested_level; /* input param */
+ u32 dumped_size; /* adds up to size of dumped data */
+ u32 buf_size; /* size of buffer pointer to by p */
+ void *p; /* current point in dump buffer */
+};
+
+typedef int (*nfp_tlv_visit)(struct nfp_pf *pf, struct nfp_dump_tl *tl,
+ void *param);
+
+static int
+nfp_traverse_tlvs(struct nfp_pf *pf, void *data, u32 data_length, void *param,
+ nfp_tlv_visit tlv_visit)
+{
+ long long remaining = data_length;
+ struct nfp_dump_tl *tl;
+ u32 total_tlv_size;
+ void *p = data;
+ int err;
+
+ while (remaining >= sizeof(*tl)) {
+ tl = p;
+ if (!tl->type && !tl->length)
+ break;
+
+ if (be32_to_cpu(tl->length) > remaining - sizeof(*tl))
+ return -EINVAL;
+
+ total_tlv_size = sizeof(*tl) + be32_to_cpu(tl->length);
+
+ /* Spec TLVs should be aligned to 4 bytes. */
+ if (total_tlv_size % 4 != 0)
+ return -EINVAL;
+
+ p += total_tlv_size;
+ remaining -= total_tlv_size;
+ err = tlv_visit(pf, tl, param);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static u32 nfp_get_numeric_cpp_id(struct nfp_dumpspec_cpp_isl_id *cpp_id)
+{
+ return NFP_CPP_ISLAND_ID(cpp_id->target, cpp_id->action, cpp_id->token,
+ cpp_id->island);
+}
+
+struct nfp_dumpspec *
+nfp_net_dump_load_dumpspec(struct nfp_cpp *cpp, struct nfp_rtsym_table *rtbl)
+{
+ const struct nfp_rtsym *specsym;
+ struct nfp_dumpspec *dumpspec;
+ int bytes_read;
+ u32 cpp_id;
+
+ specsym = nfp_rtsym_lookup(rtbl, NFP_DUMP_SPEC_RTSYM);
+ if (!specsym)
+ return NULL;
+
+ /* expected size of this buffer is in the order of tens of kilobytes */
+ dumpspec = vmalloc(sizeof(*dumpspec) + specsym->size);
+ if (!dumpspec)
+ return NULL;
+
+ dumpspec->size = specsym->size;
+
+ cpp_id = NFP_CPP_ISLAND_ID(specsym->target, NFP_CPP_ACTION_RW, 0,
+ specsym->domain);
+
+ bytes_read = nfp_cpp_read(cpp, cpp_id, specsym->addr, dumpspec->data,
+ specsym->size);
+ if (bytes_read != specsym->size) {
+ vfree(dumpspec);
+ nfp_warn(cpp, "Debug dump specification read failed.\n");
+ return NULL;
+ }
+
+ return dumpspec;
+}
+
+static int nfp_dump_error_tlv_size(struct nfp_dump_tl *spec)
+{
+ return ALIGN8(sizeof(struct nfp_dump_error) + sizeof(*spec) +
+ be32_to_cpu(spec->length));
+}
+
+static int nfp_calc_fwname_tlv_size(struct nfp_pf *pf)
+{
+ u32 fwname_len = strlen(nfp_mip_name(pf->mip));
+
+ return sizeof(struct nfp_dump_tl) + ALIGN8(fwname_len + 1);
+}
+
+static int nfp_calc_hwinfo_field_sz(struct nfp_pf *pf, struct nfp_dump_tl *spec)
+{
+ u32 tl_len, key_len;
+ const char *value;
+
+ tl_len = be32_to_cpu(spec->length);
+ key_len = strnlen(spec->data, tl_len);
+ if (key_len == tl_len)
+ return nfp_dump_error_tlv_size(spec);
+
+ value = nfp_hwinfo_lookup(pf->hwinfo, spec->data);
+ if (!value)
+ return nfp_dump_error_tlv_size(spec);
+
+ return sizeof(struct nfp_dump_tl) + ALIGN8(key_len + strlen(value) + 2);
+}
+
+static bool nfp_csr_spec_valid(struct nfp_dumpspec_csr *spec_csr)
+{
+ u32 required_read_sz = sizeof(*spec_csr) - sizeof(spec_csr->tl);
+ u32 available_sz = be32_to_cpu(spec_csr->tl.length);
+ u32 reg_width;
+
+ if (available_sz < required_read_sz)
+ return false;
+
+ reg_width = be32_to_cpu(spec_csr->register_width);
+
+ return reg_width == 32 || reg_width == 64;
+}
+
+static int
+nfp_calc_rtsym_dump_sz(struct nfp_pf *pf, struct nfp_dump_tl *spec)
+{
+ struct nfp_rtsym_table *rtbl = pf->rtbl;
+ struct nfp_dumpspec_rtsym *spec_rtsym;
+ const struct nfp_rtsym *sym;
+ u32 tl_len, key_len;
+ u32 size;
+
+ spec_rtsym = (struct nfp_dumpspec_rtsym *)spec;
+ tl_len = be32_to_cpu(spec->length);
+ key_len = strnlen(spec_rtsym->rtsym, tl_len);
+ if (key_len == tl_len)
+ return nfp_dump_error_tlv_size(spec);
+
+ sym = nfp_rtsym_lookup(rtbl, spec_rtsym->rtsym);
+ if (!sym)
+ return nfp_dump_error_tlv_size(spec);
+
+ if (sym->type == NFP_RTSYM_TYPE_ABS)
+ size = sizeof(sym->addr);
+ else
+ size = sym->size;
+
+ return ALIGN8(offsetof(struct nfp_dump_rtsym, rtsym) + key_len + 1) +
+ ALIGN8(size);
+}
+
+static int
+nfp_add_tlv_size(struct nfp_pf *pf, struct nfp_dump_tl *tl, void *param)
+{
+ struct nfp_dumpspec_csr *spec_csr;
+ u32 *size = param;
+ u32 hwinfo_size;
+
+ switch (be32_to_cpu(tl->type)) {
+ case NFP_DUMPSPEC_TYPE_FWNAME:
+ *size += nfp_calc_fwname_tlv_size(pf);
+ break;
+ case NFP_DUMPSPEC_TYPE_CPP_CSR:
+ case NFP_DUMPSPEC_TYPE_XPB_CSR:
+ case NFP_DUMPSPEC_TYPE_ME_CSR:
+ spec_csr = (struct nfp_dumpspec_csr *)tl;
+ if (!nfp_csr_spec_valid(spec_csr))
+ *size += nfp_dump_error_tlv_size(tl);
+ else
+ *size += ALIGN8(sizeof(struct nfp_dump_csr)) +
+ ALIGN8(be32_to_cpu(spec_csr->cpp.dump_length));
+ break;
+ case NFP_DUMPSPEC_TYPE_INDIRECT_ME_CSR:
+ spec_csr = (struct nfp_dumpspec_csr *)tl;
+ if (!nfp_csr_spec_valid(spec_csr))
+ *size += nfp_dump_error_tlv_size(tl);
+ else
+ *size += ALIGN8(sizeof(struct nfp_dump_csr)) +
+ ALIGN8(be32_to_cpu(spec_csr->cpp.dump_length) *
+ NFP_IND_NUM_CONTEXTS);
+ break;
+ case NFP_DUMPSPEC_TYPE_RTSYM:
+ *size += nfp_calc_rtsym_dump_sz(pf, tl);
+ break;
+ case NFP_DUMPSPEC_TYPE_HWINFO:
+ hwinfo_size = nfp_hwinfo_get_packed_str_size(pf->hwinfo);
+ *size += sizeof(struct nfp_dump_tl) + ALIGN8(hwinfo_size);
+ break;
+ case NFP_DUMPSPEC_TYPE_HWINFO_FIELD:
+ *size += nfp_calc_hwinfo_field_sz(pf, tl);
+ break;
+ default:
+ *size += nfp_dump_error_tlv_size(tl);
+ break;
+ }
+
+ return 0;
+}
+
+static int
+nfp_calc_specific_level_size(struct nfp_pf *pf, struct nfp_dump_tl *dump_level,
+ void *param)
+{
+ struct nfp_level_size *lev_sz = param;
+
+ if (dump_level->type != lev_sz->requested_level)
+ return 0;
+
+ return nfp_traverse_tlvs(pf, dump_level->data,
+ be32_to_cpu(dump_level->length),
+ &lev_sz->total_size, nfp_add_tlv_size);
+}
+
+s64 nfp_net_dump_calculate_size(struct nfp_pf *pf, struct nfp_dumpspec *spec,
+ u32 flag)
+{
+ struct nfp_level_size lev_sz;
+ int err;
+
+ lev_sz.requested_level = cpu_to_be32(flag);
+ lev_sz.total_size = ALIGN8(sizeof(struct nfp_dump_prolog));
+
+ err = nfp_traverse_tlvs(pf, spec->data, spec->size, &lev_sz,
+ nfp_calc_specific_level_size);
+ if (err)
+ return err;
+
+ return lev_sz.total_size;
+}
+
+static int nfp_add_tlv(u32 type, u32 total_tlv_sz, struct nfp_dump_state *dump)
+{
+ struct nfp_dump_tl *tl = dump->p;
+
+ if (total_tlv_sz > dump->buf_size)
+ return -ENOSPC;
+
+ if (dump->buf_size - total_tlv_sz < dump->dumped_size)
+ return -ENOSPC;
+
+ tl->type = cpu_to_be32(type);
+ tl->length = cpu_to_be32(total_tlv_sz - sizeof(*tl));
+
+ dump->dumped_size += total_tlv_sz;
+ dump->p += total_tlv_sz;
+
+ return 0;
+}
+
+static int
+nfp_dump_error_tlv(struct nfp_dump_tl *spec, int error,
+ struct nfp_dump_state *dump)
+{
+ struct nfp_dump_error *dump_header = dump->p;
+ u32 total_spec_size, total_size;
+ int err;
+
+ total_spec_size = sizeof(*spec) + be32_to_cpu(spec->length);
+ total_size = ALIGN8(sizeof(*dump_header) + total_spec_size);
+
+ err = nfp_add_tlv(NFP_DUMPSPEC_TYPE_ERROR, total_size, dump);
+ if (err)
+ return err;
+
+ dump_header->error = cpu_to_be32(error);
+ memcpy(dump_header->spec, spec, total_spec_size);
+
+ return 0;
+}
+
+static int nfp_dump_fwname(struct nfp_pf *pf, struct nfp_dump_state *dump)
+{
+ struct nfp_dump_tl *dump_header = dump->p;
+ u32 fwname_len, total_size;
+ const char *fwname;
+ int err;
+
+ fwname = nfp_mip_name(pf->mip);
+ fwname_len = strlen(fwname);
+ total_size = sizeof(*dump_header) + ALIGN8(fwname_len + 1);
+
+ err = nfp_add_tlv(NFP_DUMPSPEC_TYPE_FWNAME, total_size, dump);
+ if (err)
+ return err;
+
+ memcpy(dump_header->data, fwname, fwname_len);
+
+ return 0;
+}
+
+static int
+nfp_dump_hwinfo(struct nfp_pf *pf, struct nfp_dump_tl *spec,
+ struct nfp_dump_state *dump)
+{
+ struct nfp_dump_tl *dump_header = dump->p;
+ u32 hwinfo_size, total_size;
+ char *hwinfo;
+ int err;
+
+ hwinfo = nfp_hwinfo_get_packed_strings(pf->hwinfo);
+ hwinfo_size = nfp_hwinfo_get_packed_str_size(pf->hwinfo);
+ total_size = sizeof(*dump_header) + ALIGN8(hwinfo_size);
+
+ err = nfp_add_tlv(NFP_DUMPSPEC_TYPE_HWINFO, total_size, dump);
+ if (err)
+ return err;
+
+ memcpy(dump_header->data, hwinfo, hwinfo_size);
+
+ return 0;
+}
+
+static int nfp_dump_hwinfo_field(struct nfp_pf *pf, struct nfp_dump_tl *spec,
+ struct nfp_dump_state *dump)
+{
+ struct nfp_dump_tl *dump_header = dump->p;
+ u32 tl_len, key_len, val_len;
+ const char *key, *value;
+ u32 total_size;
+ int err;
+
+ tl_len = be32_to_cpu(spec->length);
+ key_len = strnlen(spec->data, tl_len);
+ if (key_len == tl_len)
+ return nfp_dump_error_tlv(spec, -EINVAL, dump);
+
+ key = spec->data;
+ value = nfp_hwinfo_lookup(pf->hwinfo, key);
+ if (!value)
+ return nfp_dump_error_tlv(spec, -ENOENT, dump);
+
+ val_len = strlen(value);
+ total_size = sizeof(*dump_header) + ALIGN8(key_len + val_len + 2);
+ err = nfp_add_tlv(NFP_DUMPSPEC_TYPE_HWINFO_FIELD, total_size, dump);
+ if (err)
+ return err;
+
+ memcpy(dump_header->data, key, key_len + 1);
+ memcpy(dump_header->data + key_len + 1, value, val_len + 1);
+
+ return 0;
+}
+
+static bool is_xpb_read(struct nfp_dumpspec_cpp_isl_id *cpp_id)
+{
+ return cpp_id->target == NFP_CPP_TARGET_ISLAND_XPB &&
+ cpp_id->action == 0 && cpp_id->token == 0;
+}
+
+static int
+nfp_dump_csr_range(struct nfp_pf *pf, struct nfp_dumpspec_csr *spec_csr,
+ struct nfp_dump_state *dump)
+{
+ struct nfp_dump_csr *dump_header = dump->p;
+ u32 reg_sz, header_size, total_size;
+ u32 cpp_rd_addr, max_rd_addr;
+ int bytes_read;
+ void *dest;
+ u32 cpp_id;
+ int err;
+
+ if (!nfp_csr_spec_valid(spec_csr))
+ return nfp_dump_error_tlv(&spec_csr->tl, -EINVAL, dump);
+
+ reg_sz = be32_to_cpu(spec_csr->register_width) / BITS_PER_BYTE;
+ header_size = ALIGN8(sizeof(*dump_header));
+ total_size = header_size +
+ ALIGN8(be32_to_cpu(spec_csr->cpp.dump_length));
+ dest = dump->p + header_size;
+
+ err = nfp_add_tlv(be32_to_cpu(spec_csr->tl.type), total_size, dump);
+ if (err)
+ return err;
+
+ dump_header->cpp = spec_csr->cpp;
+ dump_header->register_width = spec_csr->register_width;
+
+ cpp_id = nfp_get_numeric_cpp_id(&spec_csr->cpp.cpp_id);
+ cpp_rd_addr = be32_to_cpu(spec_csr->cpp.offset);
+ max_rd_addr = cpp_rd_addr + be32_to_cpu(spec_csr->cpp.dump_length);
+
+ while (cpp_rd_addr < max_rd_addr) {
+ if (is_xpb_read(&spec_csr->cpp.cpp_id))
+ bytes_read = nfp_xpb_readl(pf->cpp, cpp_rd_addr,
+ (u32 *)dest);
+ else
+ bytes_read = nfp_cpp_read(pf->cpp, cpp_id, cpp_rd_addr,
+ dest, reg_sz);
+ if (bytes_read != reg_sz) {
+ if (bytes_read >= 0)
+ bytes_read = -EIO;
+ dump_header->error = cpu_to_be32(bytes_read);
+ dump_header->error_offset = cpu_to_be32(cpp_rd_addr);
+ break;
+ }
+ cpp_rd_addr += reg_sz;
+ dest += reg_sz;
+ }
+
+ return 0;
+}
+
+/* Write context to CSRCtxPtr, then read from it. Then the value can be read
+ * from IndCtxStatus.
+ */
+static int
+nfp_read_indirect_csr(struct nfp_cpp *cpp,
+ struct nfp_dumpspec_cpp_isl_id cpp_params, u32 offset,
+ u32 reg_sz, u32 context, void *dest)
+{
+ u32 csr_ctx_ptr_offs;
+ u32 cpp_id;
+ int result;
+
+ csr_ctx_ptr_offs = nfp_get_ind_csr_ctx_ptr_offs(offset);
+ cpp_id = NFP_CPP_ISLAND_ID(cpp_params.target,
+ NFP_IND_ME_REFL_WR_SIG_INIT,
+ cpp_params.token, cpp_params.island);
+ result = nfp_cpp_writel(cpp, cpp_id, csr_ctx_ptr_offs, context);
+ if (result != sizeof(context))
+ return result < 0 ? result : -EIO;
+
+ cpp_id = nfp_get_numeric_cpp_id(&cpp_params);
+ result = nfp_cpp_read(cpp, cpp_id, csr_ctx_ptr_offs, dest, reg_sz);
+ if (result != reg_sz)
+ return result < 0 ? result : -EIO;
+
+ result = nfp_cpp_read(cpp, cpp_id, offset, dest, reg_sz);
+ if (result != reg_sz)
+ return result < 0 ? result : -EIO;
+
+ return 0;
+}
+
+static int
+nfp_read_all_indirect_csr_ctx(struct nfp_cpp *cpp,
+ struct nfp_dumpspec_csr *spec_csr, u32 address,
+ u32 reg_sz, void *dest)
+{
+ u32 ctx;
+ int err;
+
+ for (ctx = 0; ctx < NFP_IND_NUM_CONTEXTS; ctx++) {
+ err = nfp_read_indirect_csr(cpp, spec_csr->cpp.cpp_id, address,
+ reg_sz, ctx, dest + ctx * reg_sz);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int
+nfp_dump_indirect_csr_range(struct nfp_pf *pf,
+ struct nfp_dumpspec_csr *spec_csr,
+ struct nfp_dump_state *dump)
+{
+ struct nfp_dump_csr *dump_header = dump->p;
+ u32 reg_sz, header_size, total_size;
+ u32 cpp_rd_addr, max_rd_addr;
+ u32 reg_data_length;
+ void *dest;
+ int err;
+
+ if (!nfp_csr_spec_valid(spec_csr))
+ return nfp_dump_error_tlv(&spec_csr->tl, -EINVAL, dump);
+
+ reg_sz = be32_to_cpu(spec_csr->register_width) / BITS_PER_BYTE;
+ header_size = ALIGN8(sizeof(*dump_header));
+ reg_data_length = be32_to_cpu(spec_csr->cpp.dump_length) *
+ NFP_IND_NUM_CONTEXTS;
+ total_size = header_size + ALIGN8(reg_data_length);
+ dest = dump->p + header_size;
+
+ err = nfp_add_tlv(be32_to_cpu(spec_csr->tl.type), total_size, dump);
+ if (err)
+ return err;
+
+ dump_header->cpp = spec_csr->cpp;
+ dump_header->register_width = spec_csr->register_width;
+
+ cpp_rd_addr = be32_to_cpu(spec_csr->cpp.offset);
+ max_rd_addr = cpp_rd_addr + be32_to_cpu(spec_csr->cpp.dump_length);
+ while (cpp_rd_addr < max_rd_addr) {
+ err = nfp_read_all_indirect_csr_ctx(pf->cpp, spec_csr,
+ cpp_rd_addr, reg_sz, dest);
+ if (err) {
+ dump_header->error = cpu_to_be32(err);
+ dump_header->error_offset = cpu_to_be32(cpp_rd_addr);
+ break;
+ }
+ cpp_rd_addr += reg_sz;
+ dest += reg_sz * NFP_IND_NUM_CONTEXTS;
+ }
+
+ return 0;
+}
+
+static int
+nfp_dump_single_rtsym(struct nfp_pf *pf, struct nfp_dumpspec_rtsym *spec,
+ struct nfp_dump_state *dump)
+{
+ struct nfp_dump_rtsym *dump_header = dump->p;
+ struct nfp_dumpspec_cpp_isl_id cpp_params;
+ struct nfp_rtsym_table *rtbl = pf->rtbl;
+ const struct nfp_rtsym *sym;
+ u32 header_size, total_size;
+ u32 tl_len, key_len;
+ int bytes_read;
+ u32 cpp_id;
+ void *dest;
+ int err;
+
+ tl_len = be32_to_cpu(spec->tl.length);
+ key_len = strnlen(spec->rtsym, tl_len);
+ if (key_len == tl_len)
+ return nfp_dump_error_tlv(&spec->tl, -EINVAL, dump);
+
+ sym = nfp_rtsym_lookup(rtbl, spec->rtsym);
+ if (!sym)
+ return nfp_dump_error_tlv(&spec->tl, -ENOENT, dump);
+
+ header_size =
+ ALIGN8(offsetof(struct nfp_dump_rtsym, rtsym) + key_len + 1);
+ total_size = header_size + ALIGN8(sym->size);
+ dest = dump->p + header_size;
+
+ err = nfp_add_tlv(be32_to_cpu(spec->tl.type), total_size, dump);
+ if (err)
+ return err;
+
+ dump_header->padded_name_length =
+ header_size - offsetof(struct nfp_dump_rtsym, rtsym);
+ memcpy(dump_header->rtsym, spec->rtsym, key_len + 1);
+
+ if (sym->type == NFP_RTSYM_TYPE_ABS) {
+ dump_header->cpp.dump_length = cpu_to_be32(sizeof(sym->addr));
+ *(u64 *)dest = sym->addr;
+ } else {
+ cpp_params.target = sym->target;
+ cpp_params.action = NFP_CPP_ACTION_RW;
+ cpp_params.token = 0;
+ cpp_params.island = sym->domain;
+ cpp_id = nfp_get_numeric_cpp_id(&cpp_params);
+ dump_header->cpp.cpp_id = cpp_params;
+ dump_header->cpp.offset = cpu_to_be32(sym->addr);
+ dump_header->cpp.dump_length = cpu_to_be32(sym->size);
+ bytes_read = nfp_cpp_read(pf->cpp, cpp_id, sym->addr, dest,
+ sym->size);
+ if (bytes_read != sym->size) {
+ if (bytes_read >= 0)
+ bytes_read = -EIO;
+ dump_header->error = cpu_to_be32(bytes_read);
+ }
+ }
+
+ return 0;
+}
+
+static int
+nfp_dump_for_tlv(struct nfp_pf *pf, struct nfp_dump_tl *tl, void *param)
+{
+ struct nfp_dumpspec_rtsym *spec_rtsym;
+ struct nfp_dump_state *dump = param;
+ struct nfp_dumpspec_csr *spec_csr;
+ int err;
+
+ switch (be32_to_cpu(tl->type)) {
+ case NFP_DUMPSPEC_TYPE_FWNAME:
+ err = nfp_dump_fwname(pf, dump);
+ if (err)
+ return err;
+ break;
+ case NFP_DUMPSPEC_TYPE_CPP_CSR:
+ case NFP_DUMPSPEC_TYPE_XPB_CSR:
+ case NFP_DUMPSPEC_TYPE_ME_CSR:
+ spec_csr = (struct nfp_dumpspec_csr *)tl;
+ err = nfp_dump_csr_range(pf, spec_csr, dump);
+ if (err)
+ return err;
+ break;
+ case NFP_DUMPSPEC_TYPE_INDIRECT_ME_CSR:
+ spec_csr = (struct nfp_dumpspec_csr *)tl;
+ err = nfp_dump_indirect_csr_range(pf, spec_csr, dump);
+ if (err)
+ return err;
+ break;
+ case NFP_DUMPSPEC_TYPE_RTSYM:
+ spec_rtsym = (struct nfp_dumpspec_rtsym *)tl;
+ err = nfp_dump_single_rtsym(pf, spec_rtsym, dump);
+ if (err)
+ return err;
+ break;
+ case NFP_DUMPSPEC_TYPE_HWINFO:
+ err = nfp_dump_hwinfo(pf, tl, dump);
+ if (err)
+ return err;
+ break;
+ case NFP_DUMPSPEC_TYPE_HWINFO_FIELD:
+ err = nfp_dump_hwinfo_field(pf, tl, dump);
+ if (err)
+ return err;
+ break;
+ default:
+ err = nfp_dump_error_tlv(tl, -EOPNOTSUPP, dump);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int
+nfp_dump_specific_level(struct nfp_pf *pf, struct nfp_dump_tl *dump_level,
+ void *param)
+{
+ struct nfp_dump_state *dump = param;
+
+ if (dump_level->type != dump->requested_level)
+ return 0;
+
+ return nfp_traverse_tlvs(pf, dump_level->data,
+ be32_to_cpu(dump_level->length), dump,
+ nfp_dump_for_tlv);
+}
+
+static int nfp_dump_populate_prolog(struct nfp_dump_state *dump)
+{
+ struct nfp_dump_prolog *prolog = dump->p;
+ u32 total_size;
+ int err;
+
+ total_size = ALIGN8(sizeof(*prolog));
+
+ err = nfp_add_tlv(NFP_DUMPSPEC_TYPE_PROLOG, total_size, dump);
+ if (err)
+ return err;
+
+ prolog->dump_level = dump->requested_level;
+
+ return 0;
+}
+
+int nfp_net_dump_populate_buffer(struct nfp_pf *pf, struct nfp_dumpspec *spec,
+ struct ethtool_dump *dump_param, void *dest)
+{
+ struct nfp_dump_state dump;
+ int err;
+
+ dump.requested_level = cpu_to_be32(dump_param->flag);
+ dump.dumped_size = 0;
+ dump.p = dest;
+ dump.buf_size = dump_param->len;
+
+ err = nfp_dump_populate_prolog(&dump);
+ if (err)
+ return err;
+
+ err = nfp_traverse_tlvs(pf, spec->data, spec->size, &dump,
+ nfp_dump_specific_level);
+ if (err)
+ return err;
+
+ /* Set size of actual dump, to trigger warning if different from
+ * calculated size.
+ */
+ dump_param->len = dump.dumped_size;
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index 2801ecd09eab..00b8c642e672 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -47,18 +47,16 @@
#include <linux/interrupt.h>
#include <linux/pci.h>
#include <linux/ethtool.h>
+#include <linux/firmware.h>
#include "nfpcore/nfp.h"
#include "nfpcore/nfp_nsp.h"
#include "nfp_app.h"
+#include "nfp_main.h"
#include "nfp_net_ctrl.h"
#include "nfp_net.h"
#include "nfp_port.h"
-enum nfp_dump_diag {
- NFP_DUMP_NSP_DIAG = 0,
-};
-
struct nfp_et_stat {
char name[ETH_GSTRING_LEN];
int off;
@@ -1066,15 +1064,34 @@ exit_release:
return ret;
}
+/* Set the dump flag/level. Calculate the dump length for flag > 0 only (new TLV
+ * based dumps), since flag 0 (default) calculates the length in
+ * nfp_app_get_dump_flag(), and we need to support triggering a level 0 dump
+ * without setting the flag first, for backward compatibility.
+ */
static int nfp_app_set_dump(struct net_device *netdev, struct ethtool_dump *val)
{
struct nfp_app *app = nfp_app_from_netdev(netdev);
+ s64 len;
if (!app)
return -EOPNOTSUPP;
- if (val->flag != NFP_DUMP_NSP_DIAG)
- return -EINVAL;
+ if (val->flag == NFP_DUMP_NSP_DIAG) {
+ app->pf->dump_flag = val->flag;
+ return 0;
+ }
+
+ if (!app->pf->dumpspec)
+ return -EOPNOTSUPP;
+
+ len = nfp_net_dump_calculate_size(app->pf, app->pf->dumpspec,
+ val->flag);
+ if (len < 0)
+ return len;
+
+ app->pf->dump_flag = val->flag;
+ app->pf->dump_len = len;
return 0;
}
@@ -1082,14 +1099,37 @@ static int nfp_app_set_dump(struct net_device *netdev, struct ethtool_dump *val)
static int
nfp_app_get_dump_flag(struct net_device *netdev, struct ethtool_dump *dump)
{
- return nfp_dump_nsp_diag(nfp_app_from_netdev(netdev), dump, NULL);
+ struct nfp_app *app = nfp_app_from_netdev(netdev);
+
+ if (!app)
+ return -EOPNOTSUPP;
+
+ if (app->pf->dump_flag == NFP_DUMP_NSP_DIAG)
+ return nfp_dump_nsp_diag(app, dump, NULL);
+
+ dump->flag = app->pf->dump_flag;
+ dump->len = app->pf->dump_len;
+
+ return 0;
}
static int
nfp_app_get_dump_data(struct net_device *netdev, struct ethtool_dump *dump,
void *buffer)
{
- return nfp_dump_nsp_diag(nfp_app_from_netdev(netdev), dump, buffer);
+ struct nfp_app *app = nfp_app_from_netdev(netdev);
+
+ if (!app)
+ return -EOPNOTSUPP;
+
+ if (app->pf->dump_flag == NFP_DUMP_NSP_DIAG)
+ return nfp_dump_nsp_diag(app, dump, buffer);
+
+ dump->flag = app->pf->dump_flag;
+ dump->len = app->pf->dump_len;
+
+ return nfp_net_dump_populate_buffer(app->pf, app->pf->dumpspec, dump,
+ buffer);
}
static int nfp_net_set_coalesce(struct net_device *netdev,
@@ -1230,6 +1270,57 @@ static int nfp_net_set_channels(struct net_device *netdev,
return nfp_net_set_num_rings(nn, total_rx, total_tx);
}
+static int
+nfp_net_flash_device(struct net_device *netdev, struct ethtool_flash *flash)
+{
+ const struct firmware *fw;
+ struct nfp_app *app;
+ struct nfp_nsp *nsp;
+ struct device *dev;
+ int err;
+
+ if (flash->region != ETHTOOL_FLASH_ALL_REGIONS)
+ return -EOPNOTSUPP;
+
+ app = nfp_app_from_netdev(netdev);
+ if (!app)
+ return -EOPNOTSUPP;
+
+ dev = &app->pdev->dev;
+
+ nsp = nfp_nsp_open(app->cpp);
+ if (IS_ERR(nsp)) {
+ err = PTR_ERR(nsp);
+ dev_err(dev, "Failed to access the NSP: %d\n", err);
+ return err;
+ }
+
+ err = request_firmware_direct(&fw, flash->data, dev);
+ if (err)
+ goto exit_close_nsp;
+
+ dev_info(dev, "Please be patient while writing flash image: %s\n",
+ flash->data);
+ dev_hold(netdev);
+ rtnl_unlock();
+
+ err = nfp_nsp_write_flash(nsp, fw);
+ if (err < 0) {
+ dev_err(dev, "Flash write failed: %d\n", err);
+ goto exit_rtnl_lock;
+ }
+ dev_info(dev, "Finished writing flash image\n");
+
+exit_rtnl_lock:
+ rtnl_lock();
+ dev_put(netdev);
+ release_firmware(fw);
+
+exit_close_nsp:
+ nfp_nsp_close(nsp);
+ return err;
+}
+
static const struct ethtool_ops nfp_net_ethtool_ops = {
.get_drvinfo = nfp_net_get_drvinfo,
.get_link = ethtool_op_get_link,
@@ -1240,6 +1331,7 @@ static const struct ethtool_ops nfp_net_ethtool_ops = {
.get_sset_count = nfp_net_get_sset_count,
.get_rxnfc = nfp_net_get_rxnfc,
.set_rxnfc = nfp_net_set_rxnfc,
+ .flash_device = nfp_net_flash_device,
.get_rxfh_indir_size = nfp_net_get_rxfh_indir_size,
.get_rxfh_key_size = nfp_net_get_rxfh_key_size,
.get_rxfh = nfp_net_get_rxfh,
@@ -1265,6 +1357,7 @@ const struct ethtool_ops nfp_port_ethtool_ops = {
.get_strings = nfp_port_get_strings,
.get_ethtool_stats = nfp_port_get_stats,
.get_sset_count = nfp_port_get_sset_count,
+ .flash_device = nfp_net_flash_device,
.set_dump = nfp_app_set_dump,
.get_dump_flag = nfp_app_get_dump_flag,
.get_dump_data = nfp_app_get_dump_data,
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h
index 3ce51f03126f..ced62d112aa2 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h
@@ -49,6 +49,8 @@
struct nfp_hwinfo;
struct nfp_hwinfo *nfp_hwinfo_read(struct nfp_cpp *cpp);
const char *nfp_hwinfo_lookup(struct nfp_hwinfo *hwinfo, const char *lookup);
+char *nfp_hwinfo_get_packed_strings(struct nfp_hwinfo *hwinfo);
+u32 nfp_hwinfo_get_packed_str_size(struct nfp_hwinfo *hwinfo);
/* Implemented in nfp_nsp.c, low level functions */
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
index 04dd5758ecf5..3fcb522d2e85 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
@@ -372,8 +372,7 @@ nfp_cpp_area_alloc(struct nfp_cpp *cpp, u32 dest,
* that it can be accessed directly.
*
* NOTE: @address and @size must be 32-bit aligned values.
- *
- * NOTE: The area must also be 'released' when the structure is freed.
+ * The area must also be 'released' when the structure is freed.
*
* Return: NFP CPP Area handle, or NULL
*/
@@ -536,8 +535,7 @@ void nfp_cpp_area_release_free(struct nfp_cpp_area *area)
* Read data from indicated CPP region.
*
* NOTE: @offset and @length must be 32-bit aligned values.
- *
- * NOTE: Area must have been locked down with an 'acquire'.
+ * Area must have been locked down with an 'acquire'.
*
* Return: length of io, or -ERRNO
*/
@@ -558,8 +556,7 @@ int nfp_cpp_area_read(struct nfp_cpp_area *area,
* Write data to indicated CPP region.
*
* NOTE: @offset and @length must be 32-bit aligned values.
- *
- * NOTE: Area must have been locked down with an 'acquire'.
+ * Area must have been locked down with an 'acquire'.
*
* Return: length of io, or -ERRNO
*/
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_hwinfo.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_hwinfo.c
index 4f24aff1e772..063a9a6243d6 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_hwinfo.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_hwinfo.c
@@ -302,3 +302,13 @@ const char *nfp_hwinfo_lookup(struct nfp_hwinfo *hwinfo, const char *lookup)
return NULL;
}
+
+char *nfp_hwinfo_get_packed_strings(struct nfp_hwinfo *hwinfo)
+{
+ return hwinfo->data;
+}
+
+u32 nfp_hwinfo_get_packed_str_size(struct nfp_hwinfo *hwinfo)
+{
+ return le32_to_cpu(hwinfo->size) - sizeof(u32);
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
index 14a6d1ba51a9..39abac678b71 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
@@ -51,6 +51,9 @@
#include "nfp_cpp.h"
#include "nfp_nsp.h"
+#define NFP_NSP_TIMEOUT_DEFAULT 30
+#define NFP_NSP_TIMEOUT_BOOT 30
+
/* Offsets relative to the CSR base */
#define NSP_STATUS 0x00
#define NSP_STATUS_MAGIC GENMASK_ULL(63, 48)
@@ -93,6 +96,7 @@ enum nfp_nsp_cmd {
SPCODE_FW_LOAD = 6, /* Load fw from buffer, len in option */
SPCODE_ETH_RESCAN = 7, /* Rescan ETHs, write ETH_TABLE to buf */
SPCODE_ETH_CONTROL = 8, /* Update media config from buffer */
+ SPCODE_NSP_WRITE_FLASH = 11, /* Load and flash image from buffer */
SPCODE_NSP_SENSORS = 12, /* Read NSP sensor(s) */
SPCODE_NSP_IDENTIFY = 13, /* Read NSP version */
};
@@ -260,10 +264,10 @@ u16 nfp_nsp_get_abi_ver_minor(struct nfp_nsp *state)
}
static int
-nfp_nsp_wait_reg(struct nfp_cpp *cpp, u64 *reg,
- u32 nsp_cpp, u64 addr, u64 mask, u64 val)
+nfp_nsp_wait_reg(struct nfp_cpp *cpp, u64 *reg, u32 nsp_cpp, u64 addr,
+ u64 mask, u64 val, u32 timeout_sec)
{
- const unsigned long wait_until = jiffies + 30 * HZ;
+ const unsigned long wait_until = jiffies + timeout_sec * HZ;
int err;
for (;;) {
@@ -285,12 +289,13 @@ nfp_nsp_wait_reg(struct nfp_cpp *cpp, u64 *reg,
}
/**
- * nfp_nsp_command() - Execute a command on the NFP Service Processor
+ * __nfp_nsp_command() - Execute a command on the NFP Service Processor
* @state: NFP SP state
* @code: NFP SP Command Code
* @option: NFP SP Command Argument
* @buff_cpp: NFP SP Buffer CPP Address info
* @buff_addr: NFP SP Buffer Host address
+ * @timeout_sec:Timeout value to wait for completion in seconds
*
* Return: 0 for success with no result
*
@@ -300,10 +305,11 @@ nfp_nsp_wait_reg(struct nfp_cpp *cpp, u64 *reg,
* -ENODEV if the NSP is not a supported model
* -EBUSY if the NSP is stuck
* -EINTR if interrupted while waiting for completion
- * -ETIMEDOUT if the NSP took longer than 30 seconds to complete
+ * -ETIMEDOUT if the NSP took longer than @timeout_sec seconds to complete
*/
-static int nfp_nsp_command(struct nfp_nsp *state, u16 code, u32 option,
- u32 buff_cpp, u64 buff_addr)
+static int
+__nfp_nsp_command(struct nfp_nsp *state, u16 code, u32 option, u32 buff_cpp,
+ u64 buff_addr, u32 timeout_sec)
{
u64 reg, ret_val, nsp_base, nsp_buffer, nsp_status, nsp_command;
struct nfp_cpp *cpp = state->cpp;
@@ -341,8 +347,8 @@ static int nfp_nsp_command(struct nfp_nsp *state, u16 code, u32 option,
return err;
/* Wait for NSP_COMMAND_START to go to 0 */
- err = nfp_nsp_wait_reg(cpp, &reg,
- nsp_cpp, nsp_command, NSP_COMMAND_START, 0);
+ err = nfp_nsp_wait_reg(cpp, &reg, nsp_cpp, nsp_command,
+ NSP_COMMAND_START, 0, NFP_NSP_TIMEOUT_DEFAULT);
if (err) {
nfp_err(cpp, "Error %d waiting for code 0x%04x to start\n",
err, code);
@@ -350,8 +356,8 @@ static int nfp_nsp_command(struct nfp_nsp *state, u16 code, u32 option,
}
/* Wait for NSP_STATUS_BUSY to go to 0 */
- err = nfp_nsp_wait_reg(cpp, &reg,
- nsp_cpp, nsp_status, NSP_STATUS_BUSY, 0);
+ err = nfp_nsp_wait_reg(cpp, &reg, nsp_cpp, nsp_status, NSP_STATUS_BUSY,
+ 0, timeout_sec);
if (err) {
nfp_err(cpp, "Error %d waiting for code 0x%04x to complete\n",
err, code);
@@ -374,9 +380,18 @@ static int nfp_nsp_command(struct nfp_nsp *state, u16 code, u32 option,
return ret_val;
}
-static int nfp_nsp_command_buf(struct nfp_nsp *nsp, u16 code, u32 option,
- const void *in_buf, unsigned int in_size,
- void *out_buf, unsigned int out_size)
+static int
+nfp_nsp_command(struct nfp_nsp *state, u16 code, u32 option, u32 buff_cpp,
+ u64 buff_addr)
+{
+ return __nfp_nsp_command(state, code, option, buff_cpp, buff_addr,
+ NFP_NSP_TIMEOUT_DEFAULT);
+}
+
+static int
+__nfp_nsp_command_buf(struct nfp_nsp *nsp, u16 code, u32 option,
+ const void *in_buf, unsigned int in_size, void *out_buf,
+ unsigned int out_size, u32 timeout_sec)
{
struct nfp_cpp *cpp = nsp->cpp;
unsigned int max_size;
@@ -429,7 +444,8 @@ static int nfp_nsp_command_buf(struct nfp_nsp *nsp, u16 code, u32 option,
return err;
}
- ret = nfp_nsp_command(nsp, code, option, cpp_id, cpp_buf);
+ ret = __nfp_nsp_command(nsp, code, option, cpp_id, cpp_buf,
+ timeout_sec);
if (ret < 0)
return ret;
@@ -442,12 +458,23 @@ static int nfp_nsp_command_buf(struct nfp_nsp *nsp, u16 code, u32 option,
return ret;
}
+static int
+nfp_nsp_command_buf(struct nfp_nsp *nsp, u16 code, u32 option,
+ const void *in_buf, unsigned int in_size, void *out_buf,
+ unsigned int out_size)
+{
+ return __nfp_nsp_command_buf(nsp, code, option, in_buf, in_size,
+ out_buf, out_size,
+ NFP_NSP_TIMEOUT_DEFAULT);
+}
+
int nfp_nsp_wait(struct nfp_nsp *state)
{
- const unsigned long wait_until = jiffies + 30 * HZ;
+ const unsigned long wait_until = jiffies + NFP_NSP_TIMEOUT_BOOT * HZ;
int err;
- nfp_dbg(state->cpp, "Waiting for NSP to respond (30 sec max).\n");
+ nfp_dbg(state->cpp, "Waiting for NSP to respond (%u sec max).\n",
+ NFP_NSP_TIMEOUT_BOOT);
for (;;) {
const unsigned long start_time = jiffies;
@@ -488,6 +515,17 @@ int nfp_nsp_load_fw(struct nfp_nsp *state, const struct firmware *fw)
fw->size, NULL, 0);
}
+int nfp_nsp_write_flash(struct nfp_nsp *state, const struct firmware *fw)
+{
+ /* The flash time is specified to take a maximum of 70s so we add an
+ * additional factor to this spec time.
+ */
+ u32 timeout_sec = 2.5 * 70;
+
+ return __nfp_nsp_command_buf(state, SPCODE_NSP_WRITE_FLASH, fw->size,
+ fw->data, fw->size, NULL, 0, timeout_sec);
+}
+
int nfp_nsp_read_eth_table(struct nfp_nsp *state, void *buf, unsigned int size)
{
return nfp_nsp_command_buf(state, SPCODE_ETH_RESCAN, size, NULL, 0,
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h
index 650ca1a5bd21..e983c9d7f86c 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h
@@ -48,6 +48,7 @@ u16 nfp_nsp_get_abi_ver_minor(struct nfp_nsp *state);
int nfp_nsp_wait(struct nfp_nsp *state);
int nfp_nsp_device_soft_reset(struct nfp_nsp *state);
int nfp_nsp_load_fw(struct nfp_nsp *state, const struct firmware *fw);
+int nfp_nsp_write_flash(struct nfp_nsp *state, const struct firmware *fw);
int nfp_nsp_mac_reinit(struct nfp_nsp *state);
static inline bool nfp_nsp_has_mac_reinit(struct nfp_nsp *state)
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index 481876b5424c..cadea673a40f 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -822,7 +822,7 @@ struct fe_priv {
/*
* tx specific fields.
*/
- union ring_type get_tx, put_tx, first_tx, last_tx;
+ union ring_type get_tx, put_tx, last_tx;
struct nv_skb_map *get_tx_ctx, *put_tx_ctx;
struct nv_skb_map *first_tx_ctx, *last_tx_ctx;
struct nv_skb_map *tx_skb;
@@ -1932,7 +1932,8 @@ static void nv_init_tx(struct net_device *dev)
struct fe_priv *np = netdev_priv(dev);
int i;
- np->get_tx = np->put_tx = np->first_tx = np->tx_ring;
+ np->get_tx = np->tx_ring;
+ np->put_tx = np->tx_ring;
if (!nv_optimized(np))
np->last_tx.orig = &np->tx_ring.orig[np->tx_ring_size-1];
@@ -2248,7 +2249,7 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
offset += bcnt;
size -= bcnt;
if (unlikely(put_tx++ == np->last_tx.orig))
- put_tx = np->first_tx.orig;
+ put_tx = np->tx_ring.orig;
if (unlikely(np->put_tx_ctx++ == np->last_tx_ctx))
np->put_tx_ctx = np->first_tx_ctx;
} while (size);
@@ -2294,13 +2295,13 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
offset += bcnt;
frag_size -= bcnt;
if (unlikely(put_tx++ == np->last_tx.orig))
- put_tx = np->first_tx.orig;
+ put_tx = np->tx_ring.orig;
if (unlikely(np->put_tx_ctx++ == np->last_tx_ctx))
np->put_tx_ctx = np->first_tx_ctx;
} while (frag_size);
}
- if (unlikely(put_tx == np->first_tx.orig))
+ if (unlikely(put_tx == np->tx_ring.orig))
prev_tx = np->last_tx.orig;
else
prev_tx = put_tx - 1;
@@ -2406,7 +2407,7 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb,
offset += bcnt;
size -= bcnt;
if (unlikely(put_tx++ == np->last_tx.ex))
- put_tx = np->first_tx.ex;
+ put_tx = np->tx_ring.ex;
if (unlikely(np->put_tx_ctx++ == np->last_tx_ctx))
np->put_tx_ctx = np->first_tx_ctx;
} while (size);
@@ -2452,13 +2453,13 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb,
offset += bcnt;
frag_size -= bcnt;
if (unlikely(put_tx++ == np->last_tx.ex))
- put_tx = np->first_tx.ex;
+ put_tx = np->tx_ring.ex;
if (unlikely(np->put_tx_ctx++ == np->last_tx_ctx))
np->put_tx_ctx = np->first_tx_ctx;
} while (frag_size);
}
- if (unlikely(put_tx == np->first_tx.ex))
+ if (unlikely(put_tx == np->tx_ring.ex))
prev_tx = np->last_tx.ex;
else
prev_tx = put_tx - 1;
@@ -2563,7 +2564,7 @@ static int nv_tx_done(struct net_device *dev, int limit)
if (np->desc_ver == DESC_VER_1) {
if (flags & NV_TX_LASTPACKET) {
- if (flags & NV_TX_ERROR) {
+ if (unlikely(flags & NV_TX_ERROR)) {
if ((flags & NV_TX_RETRYERROR)
&& !(flags & NV_TX_RETRYCOUNT_MASK))
nv_legacybackoff_reseed(dev);
@@ -2580,7 +2581,7 @@ static int nv_tx_done(struct net_device *dev, int limit)
}
} else {
if (flags & NV_TX2_LASTPACKET) {
- if (flags & NV_TX2_ERROR) {
+ if (unlikely(flags & NV_TX2_ERROR)) {
if ((flags & NV_TX2_RETRYERROR)
&& !(flags & NV_TX2_RETRYCOUNT_MASK))
nv_legacybackoff_reseed(dev);
@@ -2597,7 +2598,7 @@ static int nv_tx_done(struct net_device *dev, int limit)
}
}
if (unlikely(np->get_tx.orig++ == np->last_tx.orig))
- np->get_tx.orig = np->first_tx.orig;
+ np->get_tx.orig = np->tx_ring.orig;
if (unlikely(np->get_tx_ctx++ == np->last_tx_ctx))
np->get_tx_ctx = np->first_tx_ctx;
}
@@ -2626,7 +2627,7 @@ static int nv_tx_done_optimized(struct net_device *dev, int limit)
nv_unmap_txskb(np, np->get_tx_ctx);
if (flags & NV_TX2_LASTPACKET) {
- if (flags & NV_TX2_ERROR) {
+ if (unlikely(flags & NV_TX2_ERROR)) {
if ((flags & NV_TX2_RETRYERROR)
&& !(flags & NV_TX2_RETRYCOUNT_MASK)) {
if (np->driver_data & DEV_HAS_GEAR_MODE)
@@ -2651,7 +2652,7 @@ static int nv_tx_done_optimized(struct net_device *dev, int limit)
}
if (unlikely(np->get_tx.ex++ == np->last_tx.ex))
- np->get_tx.ex = np->first_tx.ex;
+ np->get_tx.ex = np->tx_ring.ex;
if (unlikely(np->get_tx_ctx++ == np->last_tx_ctx))
np->get_tx_ctx = np->first_tx_ctx;
}
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
index 0a66389c06c2..1cd39c9a0345 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
@@ -2502,12 +2502,10 @@ netxen_collect_minidump(struct netxen_adapter *adapter)
{
int ret = 0;
struct netxen_minidump_template_hdr *hdr;
- struct timespec val;
hdr = (struct netxen_minidump_template_hdr *)
adapter->mdump.md_template;
hdr->driver_capture_mask = adapter->mdump.md_capture_mask;
- jiffies_to_timespec(jiffies, &val);
- hdr->driver_timestamp = (u32) val.tv_sec;
+ hdr->driver_timestamp = ktime_get_seconds();
hdr->driver_info_word2 = adapter->fw_version;
hdr->driver_info_word3 = NXRD32(adapter, CRB_DRIVER_VERSION);
ret = netxen_parse_md_template(adapter);
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 8f9b3eb82137..57332b3e5e64 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -1068,10 +1068,6 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
pci_set_drvdata(pdev, NULL);
- /* Release edev's reference to XDP's bpf if such exist */
- if (edev->xdp_prog)
- bpf_prog_put(edev->xdp_prog);
-
/* Use global ops since we've freed edev */
qed_ops->common->slowpath_stop(cdev);
if (system_state == SYSTEM_POWER_OFF)
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
index df21e900f874..cedacddf50fb 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
@@ -143,11 +143,7 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
- int ingress_format = RMNET_INGRESS_FORMAT_DEMUXING |
- RMNET_INGRESS_FORMAT_DEAGGREGATION |
- RMNET_INGRESS_FORMAT_MAP;
- int egress_format = RMNET_EGRESS_FORMAT_MUXING |
- RMNET_EGRESS_FORMAT_MAP;
+ int ingress_format = RMNET_INGRESS_FORMAT_DEAGGREGATION;
struct net_device *real_dev;
int mode = RMNET_EPMODE_VND;
struct rmnet_endpoint *ep;
@@ -181,13 +177,20 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
if (err)
goto err2;
- netdev_dbg(dev, "data format [ingress 0x%08X] [egress 0x%08X]\n",
- ingress_format, egress_format);
- port->egress_data_format = egress_format;
- port->ingress_data_format = ingress_format;
port->rmnet_mode = mode;
hlist_add_head_rcu(&ep->hlnode, &port->muxed_ep[mux_id]);
+
+ if (data[IFLA_VLAN_FLAGS]) {
+ struct ifla_vlan_flags *flags;
+
+ flags = nla_data(data[IFLA_VLAN_FLAGS]);
+ ingress_format = flags->flags & flags->mask;
+ }
+
+ netdev_dbg(dev, "data format [ingress 0x%08X]\n", ingress_format);
+ port->ingress_data_format = ingress_format;
+
return 0;
err2:
@@ -317,9 +320,49 @@ static int rmnet_rtnl_validate(struct nlattr *tb[], struct nlattr *data[],
return 0;
}
+static int rmnet_changelink(struct net_device *dev, struct nlattr *tb[],
+ struct nlattr *data[],
+ struct netlink_ext_ack *extack)
+{
+ struct rmnet_priv *priv = netdev_priv(dev);
+ struct net_device *real_dev;
+ struct rmnet_endpoint *ep;
+ struct rmnet_port *port;
+ u16 mux_id;
+
+ real_dev = __dev_get_by_index(dev_net(dev),
+ nla_get_u32(tb[IFLA_LINK]));
+
+ if (!real_dev || !dev || !rmnet_is_real_dev_registered(real_dev))
+ return -ENODEV;
+
+ port = rmnet_get_port_rtnl(real_dev);
+
+ if (data[IFLA_VLAN_ID]) {
+ mux_id = nla_get_u16(data[IFLA_VLAN_ID]);
+ ep = rmnet_get_endpoint(port, priv->mux_id);
+
+ hlist_del_init_rcu(&ep->hlnode);
+ hlist_add_head_rcu(&ep->hlnode, &port->muxed_ep[mux_id]);
+
+ ep->mux_id = mux_id;
+ priv->mux_id = mux_id;
+ }
+
+ if (data[IFLA_VLAN_FLAGS]) {
+ struct ifla_vlan_flags *flags;
+
+ flags = nla_data(data[IFLA_VLAN_FLAGS]);
+ port->ingress_data_format = flags->flags & flags->mask;
+ }
+
+ return 0;
+}
+
static size_t rmnet_get_size(const struct net_device *dev)
{
- return nla_total_size(2); /* IFLA_VLAN_ID */
+ return nla_total_size(2) /* IFLA_VLAN_ID */ +
+ nla_total_size(sizeof(struct ifla_vlan_flags)); /* IFLA_VLAN_FLAGS */
}
struct rtnl_link_ops rmnet_link_ops __read_mostly = {
@@ -331,6 +374,7 @@ struct rtnl_link_ops rmnet_link_ops __read_mostly = {
.newlink = rmnet_newlink,
.dellink = rmnet_dellink,
.get_size = rmnet_get_size,
+ .changelink = rmnet_changelink,
};
/* Needs either rcu_read_lock() or rtnl lock */
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
index c19259eea99e..2ea9fe326571 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
@@ -33,7 +33,6 @@ struct rmnet_endpoint {
struct rmnet_port {
struct net_device *dev;
u32 ingress_data_format;
- u32 egress_data_format;
u8 nr_rmnet_devs;
u8 rmnet_mode;
struct hlist_head muxed_ep[RMNET_MAX_LOGICAL_EP];
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
index 08e4afc0ab39..05539321ba3a 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
@@ -15,6 +15,7 @@
#include <linux/netdevice.h>
#include <linux/netdev_features.h>
+#include <linux/if_arp.h>
#include "rmnet_private.h"
#include "rmnet_config.h"
#include "rmnet_vnd.h"
@@ -104,6 +105,15 @@ rmnet_map_ingress_handler(struct sk_buff *skb,
{
struct sk_buff *skbn;
+ if (skb->dev->type == ARPHRD_ETHER) {
+ if (pskb_expand_head(skb, ETH_HLEN, 0, GFP_KERNEL)) {
+ kfree_skb(skb);
+ return;
+ }
+
+ skb_push(skb, ETH_HLEN);
+ }
+
if (port->ingress_data_format & RMNET_INGRESS_FORMAT_DEAGGREGATION) {
while ((skbn = rmnet_map_deaggregate(skb)) != NULL)
__rmnet_map_ingress_handler(skbn, port);
@@ -133,20 +143,18 @@ static int rmnet_map_egress_handler(struct sk_buff *skb,
if (!map_header)
goto fail;
- if (port->egress_data_format & RMNET_EGRESS_FORMAT_MUXING) {
- if (mux_id == 0xff)
- map_header->mux_id = 0;
- else
- map_header->mux_id = mux_id;
- }
+ if (mux_id == 0xff)
+ map_header->mux_id = 0;
+ else
+ map_header->mux_id = mux_id;
skb->protocol = htons(ETH_P_MAP);
- return RMNET_MAP_SUCCESS;
+ return 0;
fail:
kfree_skb(skb);
- return RMNET_MAP_CONSUMED;
+ return -ENOMEM;
}
static void
@@ -178,8 +186,7 @@ rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb)
switch (port->rmnet_mode) {
case RMNET_EPMODE_VND:
- if (port->ingress_data_format & RMNET_INGRESS_FORMAT_MAP)
- rmnet_map_ingress_handler(skb, port);
+ rmnet_map_ingress_handler(skb, port);
break;
case RMNET_EPMODE_BRIDGE:
rmnet_bridge_handler(skb, port->bridge_ep);
@@ -212,19 +219,8 @@ void rmnet_egress_handler(struct sk_buff *skb)
return;
}
- if (port->egress_data_format & RMNET_EGRESS_FORMAT_MAP) {
- switch (rmnet_map_egress_handler(skb, port, mux_id, orig_dev)) {
- case RMNET_MAP_CONSUMED:
- return;
-
- case RMNET_MAP_SUCCESS:
- break;
-
- default:
- kfree_skb(skb);
- return;
- }
- }
+ if (rmnet_map_egress_handler(skb, port, mux_id, orig_dev))
+ return;
rmnet_vnd_tx_fixup(skb, orig_dev);
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
index 3af3fe7b5457..4df359de28c5 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
@@ -30,15 +30,6 @@ struct rmnet_map_control_command {
};
} __aligned(1);
-enum rmnet_map_results {
- RMNET_MAP_SUCCESS,
- RMNET_MAP_CONSUMED,
- RMNET_MAP_GENERAL_FAILURE,
- RMNET_MAP_NOT_ENABLED,
- RMNET_MAP_FAILED_AGGREGATION,
- RMNET_MAP_FAILED_MUX
-};
-
enum rmnet_map_commands {
RMNET_MAP_COMMAND_NONE,
RMNET_MAP_COMMAND_FLOW_DISABLE,
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
index 49102f922b31..d21428078504 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
@@ -19,14 +19,8 @@
#define RMNET_TX_QUEUE_LEN 1000
/* Constants */
-#define RMNET_EGRESS_FORMAT_MAP BIT(1)
-#define RMNET_EGRESS_FORMAT_AGGREGATION BIT(2)
-#define RMNET_EGRESS_FORMAT_MUXING BIT(3)
-
-#define RMNET_INGRESS_FORMAT_MAP BIT(1)
-#define RMNET_INGRESS_FORMAT_DEAGGREGATION BIT(2)
-#define RMNET_INGRESS_FORMAT_DEMUXING BIT(3)
-#define RMNET_INGRESS_FORMAT_MAP_COMMANDS BIT(4)
+#define RMNET_INGRESS_FORMAT_DEAGGREGATION BIT(0)
+#define RMNET_INGRESS_FORMAT_MAP_COMMANDS BIT(1)
/* Replace skb->dev to a virtual rmnet device and pass up the stack */
#define RMNET_EPMODE_VND (1)
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
index 9caa5e387450..5bb29f44d114 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
@@ -185,6 +185,9 @@ int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev,
if (ep->egress_dev)
return -EINVAL;
+ if (rmnet_get_endpoint(port, id))
+ return -EBUSY;
+
rc = register_netdevice(rmnet_dev);
if (!rc) {
ep->egress_dev = rmnet_dev;
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index fc0d5fa65ad4..857f67bebc6a 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -4643,16 +4643,6 @@ static void rtl8169_phy_timer(struct timer_list *t)
rtl_schedule_task(tp, RTL_FLAG_TASK_PHY_PENDING);
}
-static void rtl8169_release_board(struct pci_dev *pdev, struct net_device *dev,
- void __iomem *ioaddr)
-{
- iounmap(ioaddr);
- pci_release_regions(pdev);
- pci_clear_mwi(pdev);
- pci_disable_device(pdev);
- free_netdev(dev);
-}
-
DECLARE_RTL_COND(rtl_phy_reset_cond)
{
return tp->phy_reset_pending(tp);
@@ -4784,14 +4774,6 @@ static int rtl_tbi_ioctl(struct rtl8169_private *tp, struct mii_ioctl_data *data
return -EOPNOTSUPP;
}
-static void rtl_disable_msi(struct pci_dev *pdev, struct rtl8169_private *tp)
-{
- if (tp->features & RTL_FEATURE_MSI) {
- pci_disable_msi(pdev);
- tp->features &= ~RTL_FEATURE_MSI;
- }
-}
-
static void rtl_init_mdio_ops(struct rtl8169_private *tp)
{
struct mdio_ops *ops = &tp->mdio_ops;
@@ -8256,9 +8238,6 @@ static void rtl_remove_one(struct pci_dev *pdev)
unregister_netdev(dev);
- dma_free_coherent(&tp->pci_dev->dev, sizeof(*tp->counters),
- tp->counters, tp->counters_phys_addr);
-
rtl_release_firmware(tp);
if (pci_dev_run_wake(pdev))
@@ -8266,9 +8245,6 @@ static void rtl_remove_one(struct pci_dev *pdev)
/* restore original MAC address */
rtl_rar_set(tp, dev->perm_addr);
-
- rtl_disable_msi(pdev, tp);
- rtl8169_release_board(pdev, dev, tp->mmio_addr);
}
static const struct net_device_ops rtl_netdev_ops = {
@@ -8445,11 +8421,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
MODULENAME, RTL8169_VERSION);
}
- dev = alloc_etherdev(sizeof (*tp));
- if (!dev) {
- rc = -ENOMEM;
- goto out;
- }
+ dev = devm_alloc_etherdev(&pdev->dev, sizeof (*tp));
+ if (!dev)
+ return -ENOMEM;
SET_NETDEV_DEV(dev, &pdev->dev);
dev->netdev_ops = &rtl_netdev_ops;
@@ -8472,13 +8446,13 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
PCIE_LINK_STATE_CLKPM);
/* enable device (incl. PCI PM wakeup and hotplug setup) */
- rc = pci_enable_device(pdev);
+ rc = pcim_enable_device(pdev);
if (rc < 0) {
netif_err(tp, probe, dev, "enable failure\n");
- goto err_out_free_dev_1;
+ return rc;
}
- if (pci_set_mwi(pdev) < 0)
+ if (pcim_set_mwi(pdev) < 0)
netif_info(tp, probe, dev, "Mem-Wr-Inval unavailable\n");
/* make sure PCI base addr 1 is MMIO */
@@ -8486,30 +8460,28 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
netif_err(tp, probe, dev,
"region #%d not an MMIO resource, aborting\n",
region);
- rc = -ENODEV;
- goto err_out_mwi_2;
+ return -ENODEV;
}
/* check for weird/broken PCI region reporting */
if (pci_resource_len(pdev, region) < R8169_REGS_SIZE) {
netif_err(tp, probe, dev,
"Invalid PCI region size(s), aborting\n");
- rc = -ENODEV;
- goto err_out_mwi_2;
+ return -ENODEV;
}
rc = pci_request_regions(pdev, MODULENAME);
if (rc < 0) {
netif_err(tp, probe, dev, "could not request regions\n");
- goto err_out_mwi_2;
+ return rc;
}
/* ioremap MMIO region */
- ioaddr = ioremap(pci_resource_start(pdev, region), R8169_REGS_SIZE);
+ ioaddr = devm_ioremap(&pdev->dev, pci_resource_start(pdev, region),
+ R8169_REGS_SIZE);
if (!ioaddr) {
netif_err(tp, probe, dev, "cannot remap MMIO, aborting\n");
- rc = -EIO;
- goto err_out_free_res_3;
+ return -EIO;
}
tp->mmio_addr = ioaddr;
@@ -8535,7 +8507,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
if (rc < 0) {
netif_err(tp, probe, dev, "DMA configuration failed\n");
- goto err_out_unmap_4;
+ return rc;
}
}
@@ -8697,16 +8669,15 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
tp->rtl_fw = RTL_FIRMWARE_UNKNOWN;
- tp->counters = dma_alloc_coherent (&pdev->dev, sizeof(*tp->counters),
- &tp->counters_phys_addr, GFP_KERNEL);
- if (!tp->counters) {
- rc = -ENOMEM;
- goto err_out_msi_5;
- }
+ tp->counters = dmam_alloc_coherent (&pdev->dev, sizeof(*tp->counters),
+ &tp->counters_phys_addr,
+ GFP_KERNEL);
+ if (!tp->counters)
+ return -ENOMEM;
rc = register_netdev(dev);
if (rc < 0)
- goto err_out_cnt_6;
+ return rc;
pci_set_drvdata(pdev, dev);
@@ -8735,25 +8706,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
netif_carrier_off(dev);
-out:
- return rc;
-
-err_out_cnt_6:
- dma_free_coherent(&pdev->dev, sizeof(*tp->counters), tp->counters,
- tp->counters_phys_addr);
-err_out_msi_5:
- netif_napi_del(&tp->napi);
- rtl_disable_msi(pdev, tp);
-err_out_unmap_4:
- iounmap(ioaddr);
-err_out_free_res_3:
- pci_release_regions(pdev);
-err_out_mwi_2:
- pci_clear_mwi(pdev);
- pci_disable_device(pdev);
-err_out_free_dev_1:
- free_netdev(dev);
- goto out;
+ return 0;
}
static struct pci_driver rtl8169_pci_driver = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index d7250539d0bd..c52a9963c19d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1997,22 +1997,60 @@ static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode,
static void stmmac_dma_interrupt(struct stmmac_priv *priv)
{
u32 tx_channel_count = priv->plat->tx_queues_to_use;
- int status;
+ u32 rx_channel_count = priv->plat->rx_queues_to_use;
+ u32 channels_to_check = tx_channel_count > rx_channel_count ?
+ tx_channel_count : rx_channel_count;
u32 chan;
+ bool poll_scheduled = false;
+ int status[channels_to_check];
+
+ /* Each DMA channel can be used for rx and tx simultaneously, yet
+ * napi_struct is embedded in struct stmmac_rx_queue rather than in a
+ * stmmac_channel struct.
+ * Because of this, stmmac_poll currently checks (and possibly wakes)
+ * all tx queues rather than just a single tx queue.
+ */
+ for (chan = 0; chan < channels_to_check; chan++)
+ status[chan] = priv->hw->dma->dma_interrupt(priv->ioaddr,
+ &priv->xstats,
+ chan);
- for (chan = 0; chan < tx_channel_count; chan++) {
- struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan];
+ for (chan = 0; chan < rx_channel_count; chan++) {
+ if (likely(status[chan] & handle_rx)) {
+ struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan];
- status = priv->hw->dma->dma_interrupt(priv->ioaddr,
- &priv->xstats, chan);
- if (likely((status & handle_rx)) || (status & handle_tx)) {
if (likely(napi_schedule_prep(&rx_q->napi))) {
stmmac_disable_dma_irq(priv, chan);
__napi_schedule(&rx_q->napi);
+ poll_scheduled = true;
+ }
+ }
+ }
+
+ /* If we scheduled poll, we already know that tx queues will be checked.
+ * If we didn't schedule poll, see if any DMA channel (used by tx) has a
+ * completed transmission, if so, call stmmac_poll (once).
+ */
+ if (!poll_scheduled) {
+ for (chan = 0; chan < tx_channel_count; chan++) {
+ if (status[chan] & handle_tx) {
+ /* It doesn't matter what rx queue we choose
+ * here. We use 0 since it always exists.
+ */
+ struct stmmac_rx_queue *rx_q =
+ &priv->rx_queue[0];
+
+ if (likely(napi_schedule_prep(&rx_q->napi))) {
+ stmmac_disable_dma_irq(priv, chan);
+ __napi_schedule(&rx_q->napi);
+ }
+ break;
}
}
+ }
- if (unlikely(status & tx_hard_error_bump_tc)) {
+ for (chan = 0; chan < tx_channel_count; chan++) {
+ if (unlikely(status[chan] & tx_hard_error_bump_tc)) {
/* Try to bump up the dma threshold on this failure */
if (unlikely(priv->xstats.threshold != SF_DMA_MODE) &&
(tc <= 256)) {
@@ -2029,7 +2067,7 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv)
chan);
priv->xstats.threshold = tc;
}
- } else if (unlikely(status == tx_hard_error)) {
+ } else if (unlikely(status[chan] == tx_hard_error)) {
stmmac_tx_err(priv, chan);
}
}
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index a73600dceb8b..3c85a0885f9b 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -88,6 +88,7 @@ do { \
#define CPSW_VERSION_4 0x190112
#define HOST_PORT_NUM 0
+#define CPSW_ALE_PORTS_NUM 3
#define SLIVER_SIZE 0x40
#define CPSW1_HOST_PORT_OFFSET 0x028
@@ -352,6 +353,27 @@ struct cpsw_hw_stats {
u32 rxdmaoverruns;
};
+struct cpsw_slave_data {
+ struct device_node *phy_node;
+ char phy_id[MII_BUS_ID_SIZE];
+ int phy_if;
+ u8 mac_addr[ETH_ALEN];
+ u16 dual_emac_res_vlan; /* Reserved VLAN for DualEMAC */
+};
+
+struct cpsw_platform_data {
+ struct cpsw_slave_data *slave_data;
+ u32 ss_reg_ofs; /* Subsystem control register offset */
+ u32 channels; /* number of cpdma channels (symmetric) */
+ u32 slaves; /* number of slave cpgmac ports */
+ u32 active_slave; /* time stamping, ethtool and SIOCGMIIPHY slave */
+ u32 ale_entries; /* ale table size */
+ u32 bd_ram_size; /*buffer descriptor ram size */
+ u32 mac_control; /* Mac control register */
+ u16 default_vlan; /* Def VLAN for ALE lookup in VLAN aware mode*/
+ bool dual_emac; /* Enable Dual EMAC mode */
+};
+
struct cpsw_slave {
void __iomem *regs;
struct cpsw_sliver_regs __iomem *sliver;
@@ -365,12 +387,12 @@ struct cpsw_slave {
static inline u32 slave_read(struct cpsw_slave *slave, u32 offset)
{
- return __raw_readl(slave->regs + offset);
+ return readl_relaxed(slave->regs + offset);
}
static inline void slave_write(struct cpsw_slave *slave, u32 val, u32 offset)
{
- __raw_writel(val, slave->regs + offset);
+ writel_relaxed(val, slave->regs + offset);
}
struct cpsw_vector {
@@ -660,8 +682,8 @@ static void cpsw_ndo_set_rx_mode(struct net_device *ndev)
static void cpsw_intr_enable(struct cpsw_common *cpsw)
{
- __raw_writel(0xFF, &cpsw->wr_regs->tx_en);
- __raw_writel(0xFF, &cpsw->wr_regs->rx_en);
+ writel_relaxed(0xFF, &cpsw->wr_regs->tx_en);
+ writel_relaxed(0xFF, &cpsw->wr_regs->rx_en);
cpdma_ctlr_int_ctrl(cpsw->dma, true);
return;
@@ -669,8 +691,8 @@ static void cpsw_intr_enable(struct cpsw_common *cpsw)
static void cpsw_intr_disable(struct cpsw_common *cpsw)
{
- __raw_writel(0, &cpsw->wr_regs->tx_en);
- __raw_writel(0, &cpsw->wr_regs->rx_en);
+ writel_relaxed(0, &cpsw->wr_regs->tx_en);
+ writel_relaxed(0, &cpsw->wr_regs->rx_en);
cpdma_ctlr_int_ctrl(cpsw->dma, false);
return;
@@ -949,18 +971,14 @@ static inline void soft_reset(const char *module, void __iomem *reg)
{
unsigned long timeout = jiffies + HZ;
- __raw_writel(1, reg);
+ writel_relaxed(1, reg);
do {
cpu_relax();
- } while ((__raw_readl(reg) & 1) && time_after(timeout, jiffies));
+ } while ((readl_relaxed(reg) & 1) && time_after(timeout, jiffies));
- WARN(__raw_readl(reg) & 1, "failed to soft-reset %s\n", module);
+ WARN(readl_relaxed(reg) & 1, "failed to soft-reset %s\n", module);
}
-#define mac_hi(mac) (((mac)[0] << 0) | ((mac)[1] << 8) | \
- ((mac)[2] << 16) | ((mac)[3] << 24))
-#define mac_lo(mac) (((mac)[4] << 0) | ((mac)[5] << 8))
-
static void cpsw_set_slave_mac(struct cpsw_slave *slave,
struct cpsw_priv *priv)
{
@@ -1015,7 +1033,7 @@ static void _cpsw_adjust_link(struct cpsw_slave *slave,
if (mac_control != slave->mac_control) {
phy_print_status(phy);
- __raw_writel(mac_control, &slave->sliver->mac_control);
+ writel_relaxed(mac_control, &slave->sliver->mac_control);
}
slave->mac_control = mac_control;
@@ -1278,7 +1296,7 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
soft_reset_slave(slave);
/* setup priority mapping */
- __raw_writel(RX_PRIORITY_MAPPING, &slave->sliver->rx_pri_map);
+ writel_relaxed(RX_PRIORITY_MAPPING, &slave->sliver->rx_pri_map);
switch (cpsw->version) {
case CPSW_VERSION_1:
@@ -1304,7 +1322,7 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
}
/* setup max packet size, and mac address */
- __raw_writel(cpsw->rx_packet_max, &slave->sliver->rx_maxlen);
+ writel_relaxed(cpsw->rx_packet_max, &slave->sliver->rx_maxlen);
cpsw_set_slave_mac(slave, priv);
slave->mac_control = 0; /* no link yet */
@@ -1395,9 +1413,9 @@ static void cpsw_init_host_port(struct cpsw_priv *priv)
writel(fifo_mode, &cpsw->host_port_regs->tx_in_ctl);
/* setup host port priority mapping */
- __raw_writel(CPDMA_TX_PRIORITY_MAP,
- &cpsw->host_port_regs->cpdma_tx_pri_map);
- __raw_writel(0, &cpsw->host_port_regs->cpdma_rx_chan_map);
+ writel_relaxed(CPDMA_TX_PRIORITY_MAP,
+ &cpsw->host_port_regs->cpdma_tx_pri_map);
+ writel_relaxed(0, &cpsw->host_port_regs->cpdma_rx_chan_map);
cpsw_ale_control_set(cpsw->ale, HOST_PORT_NUM,
ALE_PORT_STATE, ALE_PORT_STATE_FORWARD);
@@ -1514,10 +1532,10 @@ static int cpsw_ndo_open(struct net_device *ndev)
/* initialize shared resources for every ndev */
if (!cpsw->usage_count) {
/* disable priority elevation */
- __raw_writel(0, &cpsw->regs->ptype);
+ writel_relaxed(0, &cpsw->regs->ptype);
/* enable statistics collection only on all ports */
- __raw_writel(0x7, &cpsw->regs->stat_port_en);
+ writel_relaxed(0x7, &cpsw->regs->stat_port_en);
/* Enable internal fifo flow control */
writel(0x7, &cpsw->regs->flow_control);
@@ -1701,7 +1719,7 @@ static void cpsw_hwtstamp_v2(struct cpsw_priv *priv)
slave_write(slave, mtype, CPSW2_TS_SEQ_MTYPE);
slave_write(slave, ctrl, CPSW2_CONTROL);
- __raw_writel(ETH_P_1588, &cpsw->regs->ts_ltype);
+ writel_relaxed(ETH_P_1588, &cpsw->regs->ts_ltype);
}
static int cpsw_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
@@ -2298,7 +2316,6 @@ static int cpsw_check_ch_settings(struct cpsw_common *cpsw,
static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx)
{
- int (*poll)(struct napi_struct *, int);
struct cpsw_common *cpsw = priv->cpsw;
void (*handler)(void *, int, int);
struct netdev_queue *queue;
@@ -2309,12 +2326,10 @@ static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx)
ch = &cpsw->rx_ch_num;
vec = cpsw->rxv;
handler = cpsw_rx_handler;
- poll = cpsw_rx_poll;
} else {
ch = &cpsw->tx_ch_num;
vec = cpsw->txv;
handler = cpsw_tx_handler;
- poll = cpsw_tx_poll;
}
while (*ch < ch_num) {
@@ -3050,17 +3065,23 @@ static int cpsw_probe(struct platform_device *pdev)
}
cpsw->txv[0].ch = cpdma_chan_create(cpsw->dma, 0, cpsw_tx_handler, 0);
+ if (IS_ERR(cpsw->txv[0].ch)) {
+ dev_err(priv->dev, "error initializing tx dma channel\n");
+ ret = PTR_ERR(cpsw->txv[0].ch);
+ goto clean_dma_ret;
+ }
+
cpsw->rxv[0].ch = cpdma_chan_create(cpsw->dma, 0, cpsw_rx_handler, 1);
- if (WARN_ON(!cpsw->rxv[0].ch || !cpsw->txv[0].ch)) {
- dev_err(priv->dev, "error initializing dma channels\n");
- ret = -ENOMEM;
+ if (IS_ERR(cpsw->rxv[0].ch)) {
+ dev_err(priv->dev, "error initializing rx dma channel\n");
+ ret = PTR_ERR(cpsw->rxv[0].ch);
goto clean_dma_ret;
}
ale_params.dev = &pdev->dev;
ale_params.ale_ageout = ale_ageout;
ale_params.ale_entries = data->ale_entries;
- ale_params.ale_ports = data->slaves;
+ ale_params.ale_ports = CPSW_ALE_PORTS_NUM;
cpsw->ale = cpsw_ale_create(&ale_params);
if (!cpsw->ale) {
@@ -3072,14 +3093,14 @@ static int cpsw_probe(struct platform_device *pdev)
cpsw->cpts = cpts_create(cpsw->dev, cpts_regs, cpsw->dev->of_node);
if (IS_ERR(cpsw->cpts)) {
ret = PTR_ERR(cpsw->cpts);
- goto clean_ale_ret;
+ goto clean_dma_ret;
}
ndev->irq = platform_get_irq(pdev, 1);
if (ndev->irq < 0) {
dev_err(priv->dev, "error getting irq resource\n");
ret = ndev->irq;
- goto clean_ale_ret;
+ goto clean_dma_ret;
}
of_id = of_match_device(cpsw_of_mtable, &pdev->dev);
@@ -3103,7 +3124,7 @@ static int cpsw_probe(struct platform_device *pdev)
if (ret) {
dev_err(priv->dev, "error registering net device\n");
ret = -ENODEV;
- goto clean_ale_ret;
+ goto clean_dma_ret;
}
if (cpsw->data.dual_emac) {
@@ -3126,7 +3147,7 @@ static int cpsw_probe(struct platform_device *pdev)
irq = platform_get_irq(pdev, 1);
if (irq < 0) {
ret = irq;
- goto clean_ale_ret;
+ goto clean_dma_ret;
}
cpsw->irqs_table[0] = irq;
@@ -3134,14 +3155,14 @@ static int cpsw_probe(struct platform_device *pdev)
0, dev_name(&pdev->dev), cpsw);
if (ret < 0) {
dev_err(priv->dev, "error attaching irq (%d)\n", ret);
- goto clean_ale_ret;
+ goto clean_dma_ret;
}
/* TX IRQ */
irq = platform_get_irq(pdev, 2);
if (irq < 0) {
ret = irq;
- goto clean_ale_ret;
+ goto clean_dma_ret;
}
cpsw->irqs_table[1] = irq;
@@ -3149,7 +3170,7 @@ static int cpsw_probe(struct platform_device *pdev)
0, dev_name(&pdev->dev), cpsw);
if (ret < 0) {
dev_err(priv->dev, "error attaching irq (%d)\n", ret);
- goto clean_ale_ret;
+ goto clean_dma_ret;
}
cpsw_notice(priv, probe,
@@ -3162,8 +3183,6 @@ static int cpsw_probe(struct platform_device *pdev)
clean_unregister_netdev_ret:
unregister_netdev(ndev);
-clean_ale_ret:
- cpsw_ale_destroy(cpsw->ale);
clean_dma_ret:
cpdma_ctlr_destroy(cpsw->dma);
clean_dt_ret:
@@ -3193,7 +3212,6 @@ static int cpsw_remove(struct platform_device *pdev)
unregister_netdev(ndev);
cpts_release(cpsw->cpts);
- cpsw_ale_destroy(cpsw->ale);
cpdma_ctlr_destroy(cpsw->dma);
cpsw_remove_dt(pdev);
pm_runtime_put_sync(&pdev->dev);
diff --git a/drivers/net/ethernet/ti/cpsw.h b/drivers/net/ethernet/ti/cpsw.h
index 6c3037aa2cd3..cf111db3dc27 100644
--- a/drivers/net/ethernet/ti/cpsw.h
+++ b/drivers/net/ethernet/ti/cpsw.h
@@ -17,26 +17,9 @@
#include <linux/if_ether.h>
#include <linux/phy.h>
-struct cpsw_slave_data {
- struct device_node *phy_node;
- char phy_id[MII_BUS_ID_SIZE];
- int phy_if;
- u8 mac_addr[ETH_ALEN];
- u16 dual_emac_res_vlan; /* Reserved VLAN for DualEMAC */
-};
-
-struct cpsw_platform_data {
- struct cpsw_slave_data *slave_data;
- u32 ss_reg_ofs; /* Subsystem control register offset */
- u32 channels; /* number of cpdma channels (symmetric) */
- u32 slaves; /* number of slave cpgmac ports */
- u32 active_slave; /* time stamping, ethtool and SIOCGMIIPHY slave */
- u32 ale_entries; /* ale table size */
- u32 bd_ram_size; /*buffer descriptor ram size */
- u32 mac_control; /* Mac control register */
- u16 default_vlan; /* Def VLAN for ALE lookup in VLAN aware mode*/
- bool dual_emac; /* Enable Dual EMAC mode */
-};
+#define mac_hi(mac) (((mac)[0] << 0) | ((mac)[1] << 8) | \
+ ((mac)[2] << 16) | ((mac)[3] << 24))
+#define mac_lo(mac) (((mac)[4] << 0) | ((mac)[5] << 8))
void cpsw_phy_sel(struct device *dev, phy_interface_t phy_mode, int slave);
int ti_cm_get_macid(struct device *dev, int slave, u8 *mac_addr);
diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c
index b432a75fb874..93dc05c194d3 100644
--- a/drivers/net/ethernet/ti/cpsw_ale.c
+++ b/drivers/net/ethernet/ti/cpsw_ale.c
@@ -150,11 +150,11 @@ static int cpsw_ale_read(struct cpsw_ale *ale, int idx, u32 *ale_entry)
WARN_ON(idx > ale->params.ale_entries);
- __raw_writel(idx, ale->params.ale_regs + ALE_TABLE_CONTROL);
+ writel_relaxed(idx, ale->params.ale_regs + ALE_TABLE_CONTROL);
for (i = 0; i < ALE_ENTRY_WORDS; i++)
- ale_entry[i] = __raw_readl(ale->params.ale_regs +
- ALE_TABLE + 4 * i);
+ ale_entry[i] = readl_relaxed(ale->params.ale_regs +
+ ALE_TABLE + 4 * i);
return idx;
}
@@ -166,11 +166,11 @@ static int cpsw_ale_write(struct cpsw_ale *ale, int idx, u32 *ale_entry)
WARN_ON(idx > ale->params.ale_entries);
for (i = 0; i < ALE_ENTRY_WORDS; i++)
- __raw_writel(ale_entry[i], ale->params.ale_regs +
- ALE_TABLE + 4 * i);
+ writel_relaxed(ale_entry[i], ale->params.ale_regs +
+ ALE_TABLE + 4 * i);
- __raw_writel(idx | ALE_TABLE_WRITE, ale->params.ale_regs +
- ALE_TABLE_CONTROL);
+ writel_relaxed(idx | ALE_TABLE_WRITE, ale->params.ale_regs +
+ ALE_TABLE_CONTROL);
return idx;
}
@@ -723,7 +723,7 @@ int cpsw_ale_control_set(struct cpsw_ale *ale, int port, int control,
if (info->port_offset == 0 && info->port_shift == 0)
port = 0; /* global, port is a dont care */
- if (port < 0 || port > ale->params.ale_ports)
+ if (port < 0 || port >= ale->params.ale_ports)
return -EINVAL;
mask = BITMASK(info->bits);
@@ -733,9 +733,9 @@ int cpsw_ale_control_set(struct cpsw_ale *ale, int port, int control,
offset = info->offset + (port * info->port_offset);
shift = info->shift + (port * info->port_shift);
- tmp = __raw_readl(ale->params.ale_regs + offset);
+ tmp = readl_relaxed(ale->params.ale_regs + offset);
tmp = (tmp & ~(mask << shift)) | (value << shift);
- __raw_writel(tmp, ale->params.ale_regs + offset);
+ writel_relaxed(tmp, ale->params.ale_regs + offset);
return 0;
}
@@ -754,13 +754,13 @@ int cpsw_ale_control_get(struct cpsw_ale *ale, int port, int control)
if (info->port_offset == 0 && info->port_shift == 0)
port = 0; /* global, port is a dont care */
- if (port < 0 || port > ale->params.ale_ports)
+ if (port < 0 || port >= ale->params.ale_ports)
return -EINVAL;
offset = info->offset + (port * info->port_offset);
shift = info->shift + (port * info->port_shift);
- tmp = __raw_readl(ale->params.ale_regs + offset) >> shift;
+ tmp = readl_relaxed(ale->params.ale_regs + offset) >> shift;
return tmp & BITMASK(info->bits);
}
EXPORT_SYMBOL_GPL(cpsw_ale_control_get);
@@ -779,9 +779,37 @@ static void cpsw_ale_timer(struct timer_list *t)
void cpsw_ale_start(struct cpsw_ale *ale)
{
+ cpsw_ale_control_set(ale, 0, ALE_ENABLE, 1);
+ cpsw_ale_control_set(ale, 0, ALE_CLEAR, 1);
+
+ timer_setup(&ale->timer, cpsw_ale_timer, 0);
+ if (ale->ageout) {
+ ale->timer.expires = jiffies + ale->ageout;
+ add_timer(&ale->timer);
+ }
+}
+EXPORT_SYMBOL_GPL(cpsw_ale_start);
+
+void cpsw_ale_stop(struct cpsw_ale *ale)
+{
+ del_timer_sync(&ale->timer);
+ cpsw_ale_control_set(ale, 0, ALE_ENABLE, 0);
+}
+EXPORT_SYMBOL_GPL(cpsw_ale_stop);
+
+struct cpsw_ale *cpsw_ale_create(struct cpsw_ale_params *params)
+{
+ struct cpsw_ale *ale;
u32 rev, ale_entries;
- rev = __raw_readl(ale->params.ale_regs + ALE_IDVER);
+ ale = devm_kzalloc(params->dev, sizeof(*ale), GFP_KERNEL);
+ if (!ale)
+ return NULL;
+
+ ale->params = *params;
+ ale->ageout = ale->params.ale_ageout * HZ;
+
+ rev = readl_relaxed(ale->params.ale_regs + ALE_IDVER);
if (!ale->params.major_ver_mask)
ale->params.major_ver_mask = 0xff;
ale->version =
@@ -793,8 +821,8 @@ void cpsw_ale_start(struct cpsw_ale *ale)
if (!ale->params.ale_entries) {
ale_entries =
- __raw_readl(ale->params.ale_regs + ALE_STATUS) &
- ALE_STATUS_SIZE_MASK;
+ readl_relaxed(ale->params.ale_regs + ALE_STATUS) &
+ ALE_STATUS_SIZE_MASK;
/* ALE available on newer NetCP switches has introduced
* a register, ALE_STATUS, to indicate the size of ALE
* table which shows the size as a multiple of 1024 entries.
@@ -816,9 +844,9 @@ void cpsw_ale_start(struct cpsw_ale *ale)
"ALE Table size %ld\n", ale->params.ale_entries);
/* set default bits for existing h/w */
- ale->port_mask_bits = 3;
- ale->port_num_bits = 2;
- ale->vlan_field_bits = 3;
+ ale->port_mask_bits = ale->params.ale_ports;
+ ale->port_num_bits = order_base_2(ale->params.ale_ports);
+ ale->vlan_field_bits = ale->params.ale_ports;
/* Set defaults override for ALE on NetCP NU switch and for version
* 1R3
@@ -847,57 +875,12 @@ void cpsw_ale_start(struct cpsw_ale *ale)
ale_controls[ALE_PORT_UNTAGGED_EGRESS].shift = 0;
ale_controls[ALE_PORT_UNTAGGED_EGRESS].offset =
ALE_UNKNOWNVLAN_FORCE_UNTAG_EGRESS;
- ale->port_mask_bits = ale->params.ale_ports;
- ale->port_num_bits = ale->params.ale_ports - 1;
- ale->vlan_field_bits = ale->params.ale_ports;
- } else if (ale->version == ALE_VERSION_1R3) {
- ale->port_mask_bits = ale->params.ale_ports;
- ale->port_num_bits = 3;
- ale->vlan_field_bits = ale->params.ale_ports;
}
- cpsw_ale_control_set(ale, 0, ALE_ENABLE, 1);
- cpsw_ale_control_set(ale, 0, ALE_CLEAR, 1);
-
- timer_setup(&ale->timer, cpsw_ale_timer, 0);
- if (ale->ageout) {
- ale->timer.expires = jiffies + ale->ageout;
- add_timer(&ale->timer);
- }
-}
-EXPORT_SYMBOL_GPL(cpsw_ale_start);
-
-void cpsw_ale_stop(struct cpsw_ale *ale)
-{
- del_timer_sync(&ale->timer);
-}
-EXPORT_SYMBOL_GPL(cpsw_ale_stop);
-
-struct cpsw_ale *cpsw_ale_create(struct cpsw_ale_params *params)
-{
- struct cpsw_ale *ale;
-
- ale = kzalloc(sizeof(*ale), GFP_KERNEL);
- if (!ale)
- return NULL;
-
- ale->params = *params;
- ale->ageout = ale->params.ale_ageout * HZ;
-
return ale;
}
EXPORT_SYMBOL_GPL(cpsw_ale_create);
-int cpsw_ale_destroy(struct cpsw_ale *ale)
-{
- if (!ale)
- return -EINVAL;
- cpsw_ale_control_set(ale, 0, ALE_ENABLE, 0);
- kfree(ale);
- return 0;
-}
-EXPORT_SYMBOL_GPL(cpsw_ale_destroy);
-
void cpsw_ale_dump(struct cpsw_ale *ale, u32 *data)
{
int i;
diff --git a/drivers/net/ethernet/ti/cpsw_ale.h b/drivers/net/ethernet/ti/cpsw_ale.h
index 25d24e8d0904..d4fe9016429b 100644
--- a/drivers/net/ethernet/ti/cpsw_ale.h
+++ b/drivers/net/ethernet/ti/cpsw_ale.h
@@ -100,7 +100,6 @@ enum cpsw_ale_port_state {
#define ALE_ENTRY_WORDS DIV_ROUND_UP(ALE_ENTRY_BITS, 32)
struct cpsw_ale *cpsw_ale_create(struct cpsw_ale_params *params);
-int cpsw_ale_destroy(struct cpsw_ale *ale);
void cpsw_ale_start(struct cpsw_ale *ale);
void cpsw_ale_stop(struct cpsw_ale *ale);
diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c
index e4d6edf387b3..6f9173ff9414 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/drivers/net/ethernet/ti/davinci_cpdma.c
@@ -893,7 +893,7 @@ struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num,
chan_num = rx_type ? rx_chan_num(chan_num) : tx_chan_num(chan_num);
if (__chan_linear(chan_num) >= ctlr->num_chan)
- return NULL;
+ return ERR_PTR(-EINVAL);
chan = devm_kzalloc(ctlr->dev, sizeof(*chan), GFP_KERNEL);
if (!chan)
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index 4bb561856af5..abceea802ea1 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -1385,11 +1385,6 @@ static int emac_devioctl(struct net_device *ndev, struct ifreq *ifrq, int cmd)
return -EOPNOTSUPP;
}
-static int match_first_device(struct device *dev, void *data)
-{
- return !strncmp(dev_name(dev), "davinci_mdio", 12);
-}
-
/**
* emac_dev_open - EMAC device open
* @ndev: The DaVinci EMAC network adapter
@@ -1489,8 +1484,8 @@ static int emac_dev_open(struct net_device *ndev)
/* use the first phy on the bus if pdata did not give us a phy id */
if (!phydev && !priv->phy_id) {
- phy = bus_find_device(&mdio_bus_type, NULL, NULL,
- match_first_device);
+ phy = bus_find_device_by_name(&mdio_bus_type, NULL,
+ "davinci_mdio");
if (phy) {
priv->phy_id = dev_name(phy);
if (!priv->phy_id || !*priv->phy_id)
@@ -1875,10 +1870,17 @@ static int davinci_emac_probe(struct platform_device *pdev)
priv->txchan = cpdma_chan_create(priv->dma, EMAC_DEF_TX_CH,
emac_tx_handler, 0);
+ if (IS_ERR(priv->txchan)) {
+ dev_err(&pdev->dev, "error initializing tx dma channel\n");
+ rc = PTR_ERR(priv->txchan);
+ goto no_cpdma_chan;
+ }
+
priv->rxchan = cpdma_chan_create(priv->dma, EMAC_DEF_RX_CH,
emac_rx_handler, 1);
- if (WARN_ON(!priv->txchan || !priv->rxchan)) {
- rc = -ENOMEM;
+ if (IS_ERR(priv->rxchan)) {
+ dev_err(&pdev->dev, "error initializing rx dma channel\n");
+ rc = PTR_ERR(priv->rxchan);
goto no_cpdma_chan;
}
diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c
index e831c49713ee..56dbc0b9fedc 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -27,6 +27,7 @@
#include <linux/net_tstamp.h>
#include <linux/ethtool.h>
+#include "cpsw.h"
#include "cpsw_ale.h"
#include "netcp.h"
#include "cpts.h"
@@ -2047,10 +2048,6 @@ static const struct ethtool_ops keystone_ethtool_ops = {
.get_ts_info = keystone_get_ts_info,
};
-#define mac_hi(mac) (((mac)[0] << 0) | ((mac)[1] << 8) | \
- ((mac)[2] << 16) | ((mac)[3] << 24))
-#define mac_lo(mac) (((mac)[4] << 0) | ((mac)[5] << 8))
-
static void gbe_set_slave_mac(struct gbe_slave *slave,
struct gbe_intf *gbe_intf)
{
@@ -3692,7 +3689,6 @@ static int gbe_remove(struct netcp_device *netcp_device, void *inst_priv)
del_timer_sync(&gbe_dev->timer);
cpts_release(gbe_dev->cpts);
cpsw_ale_stop(gbe_dev->ale);
- cpsw_ale_destroy(gbe_dev->ale);
netcp_txpipe_close(&gbe_dev->tx_pipe);
free_secondary_ports(gbe_dev);
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 88ddfb92122b..0db3bd1ea06f 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -146,7 +146,6 @@ struct hv_netvsc_packet {
struct netvsc_device_info {
unsigned char mac_adr[ETH_ALEN];
- int ring_size;
u32 num_chn;
u32 send_sections;
u32 recv_sections;
@@ -188,18 +187,22 @@ struct rndis_message;
struct netvsc_device;
struct net_device_context;
+extern u32 netvsc_ring_bytes;
+extern struct reciprocal_value netvsc_ring_reciprocal;
+
struct netvsc_device *netvsc_device_add(struct hv_device *device,
const struct netvsc_device_info *info);
int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx);
void netvsc_device_remove(struct hv_device *device);
-int netvsc_send(struct net_device_context *ndc,
+int netvsc_send(struct net_device *net,
struct hv_netvsc_packet *packet,
struct rndis_message *rndis_msg,
struct hv_page_buffer *page_buffer,
struct sk_buff *skb);
-void netvsc_linkstatus_callback(struct hv_device *device_obj,
+void netvsc_linkstatus_callback(struct net_device *net,
struct rndis_message *resp);
int netvsc_recv_callback(struct net_device *net,
+ struct netvsc_device *nvdev,
struct vmbus_channel *channel,
void *data, u32 len,
const struct ndis_tcp_ip_checksum_info *csum_info,
@@ -220,7 +223,6 @@ int rndis_filter_set_rss_param(struct rndis_device *rdev,
const u8 *key);
int rndis_filter_receive(struct net_device *ndev,
struct netvsc_device *net_dev,
- struct hv_device *dev,
struct vmbus_channel *channel,
void *data, u32 buflen);
@@ -635,14 +637,27 @@ struct nvsp_message {
#define NETVSC_MTU 65535
#define NETVSC_MTU_MIN ETH_MIN_MTU
-#define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16) /* 16MB */
-#define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024*1024*15) /* 15MB */
-#define NETVSC_SEND_BUFFER_SIZE (1024 * 1024 * 15) /* 15MB */
+/* Max buffer sizes allowed by a host */
+#define NETVSC_RECEIVE_BUFFER_SIZE (1024 * 1024 * 31) /* 31MB */
+#define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024 * 1024 * 15) /* 15MB */
+#define NETVSC_RECEIVE_BUFFER_DEFAULT (1024 * 1024 * 16)
+
+#define NETVSC_SEND_BUFFER_SIZE (1024 * 1024 * 15) /* 15MB */
+#define NETVSC_SEND_BUFFER_DEFAULT (1024 * 1024)
+
#define NETVSC_INVALID_INDEX -1
#define NETVSC_SEND_SECTION_SIZE 6144
#define NETVSC_RECV_SECTION_SIZE 1728
+/* Default size of TX buf: 1MB, RX buf: 16MB */
+#define NETVSC_MIN_TX_SECTIONS 10
+#define NETVSC_DEFAULT_TX (NETVSC_SEND_BUFFER_DEFAULT \
+ / NETVSC_SEND_SECTION_SIZE)
+#define NETVSC_MIN_RX_SECTIONS 10
+#define NETVSC_DEFAULT_RX (NETVSC_RECEIVE_BUFFER_DEFAULT \
+ / NETVSC_RECV_SECTION_SIZE)
+
#define NETVSC_RECEIVE_BUFFER_ID 0xcafe
#define NETVSC_SEND_BUFFER_ID 0
@@ -690,6 +705,7 @@ struct netvsc_ethtool_stats {
unsigned long tx_busy;
unsigned long tx_send_full;
unsigned long rx_comp_busy;
+ unsigned long rx_no_memory;
unsigned long stop_queue;
unsigned long wake_queue;
};
@@ -804,13 +820,9 @@ struct netvsc_device {
struct rndis_device *extension;
- int ring_size;
-
u32 max_pkt; /* max number of pkt in one send, e.g. 8 */
u32 pkt_align; /* alignment bytes, e.g. 8 */
- atomic_t open_cnt;
-
struct netvsc_channel chan_table[VRSS_CHANNEL_MAX];
struct rcu_head rcu;
@@ -1425,32 +1437,6 @@ struct rndis_message {
(sizeof(msg) + (sizeof(struct rndis_message) - \
sizeof(union rndis_message_container)))
-/* get pointer to info buffer with message pointer */
-#define MESSAGE_TO_INFO_BUFFER(msg) \
- (((unsigned char *)(msg)) + msg->info_buf_offset)
-
-/* get pointer to status buffer with message pointer */
-#define MESSAGE_TO_STATUS_BUFFER(msg) \
- (((unsigned char *)(msg)) + msg->status_buf_offset)
-
-/* get pointer to OOBD buffer with message pointer */
-#define MESSAGE_TO_OOBD_BUFFER(msg) \
- (((unsigned char *)(msg)) + msg->oob_data_offset)
-
-/* get pointer to data buffer with message pointer */
-#define MESSAGE_TO_DATA_BUFFER(msg) \
- (((unsigned char *)(msg)) + msg->per_pkt_info_offset)
-
-/* get pointer to contained message from NDIS_MESSAGE pointer */
-#define RNDIS_MESSAGE_PTR_TO_MESSAGE_PTR(rndis_msg) \
- ((void *) &rndis_msg->msg)
-
-/* get pointer to contained message from NDIS_MESSAGE pointer */
-#define RNDIS_MESSAGE_RAW_PTR_TO_MESSAGE_PTR(rndis_msg) \
- ((void *) rndis_msg)
-
-
-
#define RNDIS_HEADER_SIZE (sizeof(struct rndis_message) - \
sizeof(union rndis_message_container))
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index bfc79698b8f4..17e529af79dc 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -31,6 +31,7 @@
#include <linux/vmalloc.h>
#include <linux/rtnetlink.h>
#include <linux/prefetch.h>
+#include <linux/reciprocal_div.h>
#include <asm/sync_bitops.h>
@@ -72,7 +73,7 @@ static struct netvsc_device *alloc_net_device(void)
init_waitqueue_head(&net_device->wait_drain);
net_device->destroy = false;
- atomic_set(&net_device->open_cnt, 0);
+
net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
@@ -267,6 +268,11 @@ static int netvsc_init_buf(struct hv_device *device,
buf_size = device_info->recv_sections * device_info->recv_section_size;
buf_size = roundup(buf_size, PAGE_SIZE);
+ /* Legacy hosts only allow smaller receive buffer */
+ if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
+ buf_size = min_t(unsigned int, buf_size,
+ NETVSC_RECEIVE_BUFFER_SIZE_LEGACY);
+
net_device->recv_buf = vzalloc(buf_size);
if (!net_device->recv_buf) {
netdev_err(ndev,
@@ -588,14 +594,11 @@ void netvsc_device_remove(struct hv_device *device)
* Get the percentage of available bytes to write in the ring.
* The return value is in range from 0 to 100.
*/
-static inline u32 hv_ringbuf_avail_percent(
- struct hv_ring_buffer_info *ring_info)
+static u32 hv_ringbuf_avail_percent(const struct hv_ring_buffer_info *ring_info)
{
- u32 avail_read, avail_write;
-
- hv_get_ringbuffer_availbytes(ring_info, &avail_read, &avail_write);
+ u32 avail_write = hv_get_bytes_to_write(ring_info);
- return avail_write * 100 / ring_info->ring_datasize;
+ return reciprocal_divide(avail_write * 100, netvsc_ring_reciprocal);
}
static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
@@ -698,26 +701,26 @@ static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
return NETVSC_INVALID_INDEX;
}
-static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
- unsigned int section_index,
- u32 pend_size,
- struct hv_netvsc_packet *packet,
- struct rndis_message *rndis_msg,
- struct hv_page_buffer *pb,
- struct sk_buff *skb)
+static void netvsc_copy_to_send_buf(struct netvsc_device *net_device,
+ unsigned int section_index,
+ u32 pend_size,
+ struct hv_netvsc_packet *packet,
+ struct rndis_message *rndis_msg,
+ struct hv_page_buffer *pb,
+ bool xmit_more)
{
char *start = net_device->send_buf;
char *dest = start + (section_index * net_device->send_section_size)
+ pend_size;
int i;
- u32 msg_size = 0;
u32 padding = 0;
- u32 remain = packet->total_data_buflen % net_device->pkt_align;
u32 page_count = packet->cp_partial ? packet->rmsg_pgcnt :
packet->page_buf_cnt;
+ u32 remain;
/* Add padding */
- if (skb->xmit_more && remain && !packet->cp_partial) {
+ remain = packet->total_data_buflen & (net_device->pkt_align - 1);
+ if (xmit_more && remain) {
padding = net_device->pkt_align - remain;
rndis_msg->msg_len += padding;
packet->total_data_buflen += padding;
@@ -729,16 +732,11 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
u32 len = pb[i].len;
memcpy(dest, (src + offset), len);
- msg_size += len;
dest += len;
}
- if (padding) {
+ if (padding)
memset(dest, 0, padding);
- msg_size += padding;
- }
-
- return msg_size;
}
static inline int netvsc_send_pkt(
@@ -831,12 +829,13 @@ static inline void move_pkt_msd(struct hv_netvsc_packet **msd_send,
}
/* RCU already held by caller */
-int netvsc_send(struct net_device_context *ndev_ctx,
+int netvsc_send(struct net_device *ndev,
struct hv_netvsc_packet *packet,
struct rndis_message *rndis_msg,
struct hv_page_buffer *pb,
struct sk_buff *skb)
{
+ struct net_device_context *ndev_ctx = netdev_priv(ndev);
struct netvsc_device *net_device
= rcu_dereference_bh(ndev_ctx->nvdev);
struct hv_device *device = ndev_ctx->device_ctx;
@@ -847,8 +846,7 @@ int netvsc_send(struct net_device_context *ndev_ctx,
struct multi_send_data *msdp;
struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL;
struct sk_buff *msd_skb = NULL;
- bool try_batch;
- bool xmit_more = (skb != NULL) ? skb->xmit_more : false;
+ bool try_batch, xmit_more;
/* If device is rescinded, return error and packet will get dropped. */
if (unlikely(!net_device || net_device->destroy))
@@ -899,10 +897,17 @@ int netvsc_send(struct net_device_context *ndev_ctx,
}
}
+ /* Keep aggregating only if stack says more data is coming
+ * and not doing mixed modes send and not flow blocked
+ */
+ xmit_more = skb->xmit_more &&
+ !packet->cp_partial &&
+ !netif_xmit_stopped(netdev_get_tx_queue(ndev, packet->q_idx));
+
if (section_index != NETVSC_INVALID_INDEX) {
netvsc_copy_to_send_buf(net_device,
section_index, msd_len,
- packet, rndis_msg, pb, skb);
+ packet, rndis_msg, pb, xmit_more);
packet->send_buf_index = section_index;
@@ -922,7 +927,7 @@ int netvsc_send(struct net_device_context *ndev_ctx,
if (msdp->skb)
dev_consume_skb_any(msdp->skb);
- if (xmit_more && !packet->cp_partial) {
+ if (xmit_more) {
msdp->skb = skb;
msdp->pkt = packet;
msdp->count++;
@@ -1085,7 +1090,7 @@ static int netvsc_receive(struct net_device *ndev,
u32 buflen = vmxferpage_packet->ranges[i].byte_count;
/* Pass it to the upper layer */
- status = rndis_filter_receive(ndev, net_device, device,
+ status = rndis_filter_receive(ndev, net_device,
channel, data, buflen);
}
@@ -1249,7 +1254,6 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,
const struct netvsc_device_info *device_info)
{
int i, ret = 0;
- int ring_size = device_info->ring_size;
struct netvsc_device *net_device;
struct net_device *ndev = hv_get_drvdata(device);
struct net_device_context *net_device_ctx = netdev_priv(ndev);
@@ -1261,8 +1265,6 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,
for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
net_device_ctx->tx_table[i] = 0;
- net_device->ring_size = ring_size;
-
/* Because the device uses NAPI, all the interrupt batching and
* control is done via Net softirq, not the channel handling
*/
@@ -1289,10 +1291,9 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,
netvsc_poll, NAPI_POLL_WEIGHT);
/* Open the channel */
- ret = vmbus_open(device->channel, ring_size * PAGE_SIZE,
- ring_size * PAGE_SIZE, NULL, 0,
- netvsc_channel_cb,
- net_device->chan_table);
+ ret = vmbus_open(device->channel, netvsc_ring_bytes,
+ netvsc_ring_bytes, NULL, 0,
+ netvsc_channel_cb, net_device->chan_table);
if (ret != 0) {
netif_napi_del(&net_device->chan_table[0].napi);
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 5129647d420c..c5584c2d440e 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -35,6 +35,7 @@
#include <linux/slab.h>
#include <linux/rtnetlink.h>
#include <linux/netpoll.h>
+#include <linux/reciprocal_div.h>
#include <net/arp.h>
#include <net/route.h>
@@ -46,17 +47,15 @@
#include "hyperv_net.h"
#define RING_SIZE_MIN 64
-#define NETVSC_MIN_TX_SECTIONS 10
-#define NETVSC_DEFAULT_TX 192 /* ~1M */
-#define NETVSC_MIN_RX_SECTIONS 10 /* ~64K */
-#define NETVSC_DEFAULT_RX 10485 /* Max ~16M */
#define LINKCHANGE_INT (2 * HZ)
#define VF_TAKEOVER_INT (HZ / 10)
-static int ring_size = 128;
-module_param(ring_size, int, S_IRUGO);
+static unsigned int ring_size __ro_after_init = 128;
+module_param(ring_size, uint, S_IRUGO);
MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
+unsigned int netvsc_ring_bytes __ro_after_init;
+struct reciprocal_value netvsc_ring_reciprocal __ro_after_init;
static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE |
NETIF_MSG_LINK | NETIF_MSG_IFUP |
@@ -174,17 +173,15 @@ out:
return ret;
}
-static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size,
- int pkt_type)
+static inline void *init_ppi_data(struct rndis_message *msg,
+ u32 ppi_size, u32 pkt_type)
{
- struct rndis_packet *rndis_pkt;
+ struct rndis_packet *rndis_pkt = &msg->msg.pkt;
struct rndis_per_packet_info *ppi;
- rndis_pkt = &msg->msg.pkt;
rndis_pkt->data_offset += ppi_size;
-
- ppi = (struct rndis_per_packet_info *)((void *)rndis_pkt +
- rndis_pkt->per_pkt_info_offset + rndis_pkt->per_pkt_info_len);
+ ppi = (void *)rndis_pkt + rndis_pkt->per_pkt_info_offset
+ + rndis_pkt->per_pkt_info_len;
ppi->size = ppi_size;
ppi->type = pkt_type;
@@ -192,7 +189,7 @@ static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size,
rndis_pkt->per_pkt_info_len += ppi_size;
- return ppi;
+ return ppi + 1;
}
/* Azure hosts don't support non-TCP port numbers in hashing for fragmented
@@ -469,10 +466,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
int ret;
unsigned int num_data_pgs;
struct rndis_message *rndis_msg;
- struct rndis_packet *rndis_pkt;
struct net_device *vf_netdev;
u32 rndis_msg_size;
- struct rndis_per_packet_info *ppi;
u32 hash;
struct hv_page_buffer pb[MAX_PAGE_BUFFER_COUNT];
@@ -527,34 +522,36 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
rndis_msg = (struct rndis_message *)skb->head;
- memset(rndis_msg, 0, RNDIS_AND_PPI_SIZE);
-
/* Add the rndis header */
rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET;
rndis_msg->msg_len = packet->total_data_buflen;
- rndis_pkt = &rndis_msg->msg.pkt;
- rndis_pkt->data_offset = sizeof(struct rndis_packet);
- rndis_pkt->data_len = packet->total_data_buflen;
- rndis_pkt->per_pkt_info_offset = sizeof(struct rndis_packet);
+
+ rndis_msg->msg.pkt = (struct rndis_packet) {
+ .data_offset = sizeof(struct rndis_packet),
+ .data_len = packet->total_data_buflen,
+ .per_pkt_info_offset = sizeof(struct rndis_packet),
+ };
rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet);
hash = skb_get_hash_raw(skb);
if (hash != 0 && net->real_num_tx_queues > 1) {
+ u32 *hash_info;
+
rndis_msg_size += NDIS_HASH_PPI_SIZE;
- ppi = init_ppi_data(rndis_msg, NDIS_HASH_PPI_SIZE,
- NBL_HASH_VALUE);
- *(u32 *)((void *)ppi + ppi->ppi_offset) = hash;
+ hash_info = init_ppi_data(rndis_msg, NDIS_HASH_PPI_SIZE,
+ NBL_HASH_VALUE);
+ *hash_info = hash;
}
if (skb_vlan_tag_present(skb)) {
struct ndis_pkt_8021q_info *vlan;
rndis_msg_size += NDIS_VLAN_PPI_SIZE;
- ppi = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE,
- IEEE_8021Q_INFO);
+ vlan = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE,
+ IEEE_8021Q_INFO);
- vlan = (void *)ppi + ppi->ppi_offset;
+ vlan->value = 0;
vlan->vlanid = skb->vlan_tci & VLAN_VID_MASK;
vlan->pri = (skb->vlan_tci & VLAN_PRIO_MASK) >>
VLAN_PRIO_SHIFT;
@@ -564,11 +561,10 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
struct ndis_tcp_lso_info *lso_info;
rndis_msg_size += NDIS_LSO_PPI_SIZE;
- ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE,
- TCP_LARGESEND_PKTINFO);
-
- lso_info = (void *)ppi + ppi->ppi_offset;
+ lso_info = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE,
+ TCP_LARGESEND_PKTINFO);
+ lso_info->value = 0;
lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
if (skb->protocol == htons(ETH_P_IP)) {
lso_info->lso_v2_transmit.ip_version =
@@ -593,12 +589,10 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
struct ndis_tcp_ip_checksum_info *csum_info;
rndis_msg_size += NDIS_CSUM_PPI_SIZE;
- ppi = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE,
- TCPIP_CHKSUM_PKTINFO);
-
- csum_info = (struct ndis_tcp_ip_checksum_info *)((void *)ppi +
- ppi->ppi_offset);
+ csum_info = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE,
+ TCPIP_CHKSUM_PKTINFO);
+ csum_info->value = 0;
csum_info->transmit.tcp_header_offset = skb_transport_offset(skb);
if (skb->protocol == htons(ETH_P_IP)) {
@@ -632,7 +626,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
/* timestamp packet in software */
skb_tx_timestamp(skb);
- ret = netvsc_send(net_device_ctx, packet, rndis_msg, pb, skb);
+ ret = netvsc_send(net, packet, rndis_msg, pb, skb);
if (likely(ret == 0))
return NETDEV_TX_OK;
@@ -658,22 +652,14 @@ no_memory:
/*
* netvsc_linkstatus_callback - Link up/down notification
*/
-void netvsc_linkstatus_callback(struct hv_device *device_obj,
+void netvsc_linkstatus_callback(struct net_device *net,
struct rndis_message *resp)
{
struct rndis_indicate_status *indicate = &resp->msg.indicate_status;
- struct net_device *net;
- struct net_device_context *ndev_ctx;
+ struct net_device_context *ndev_ctx = netdev_priv(net);
struct netvsc_reconfig *event;
unsigned long flags;
- net = hv_get_drvdata(device_obj);
-
- if (!net)
- return;
-
- ndev_ctx = netdev_priv(net);
-
/* Update the physical link speed when changing to another vSwitch */
if (indicate->status == RNDIS_STATUS_LINK_SPEED_CHANGE) {
u32 speed;
@@ -753,34 +739,26 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
* "wire" on the specified device.
*/
int netvsc_recv_callback(struct net_device *net,
+ struct netvsc_device *net_device,
struct vmbus_channel *channel,
void *data, u32 len,
const struct ndis_tcp_ip_checksum_info *csum_info,
const struct ndis_pkt_8021q_info *vlan)
{
struct net_device_context *net_device_ctx = netdev_priv(net);
- struct netvsc_device *net_device;
u16 q_idx = channel->offermsg.offer.sub_channel_index;
- struct netvsc_channel *nvchan;
+ struct netvsc_channel *nvchan = &net_device->chan_table[q_idx];
struct sk_buff *skb;
struct netvsc_stats *rx_stats;
if (net->reg_state != NETREG_REGISTERED)
return NVSP_STAT_FAIL;
- rcu_read_lock();
- net_device = rcu_dereference(net_device_ctx->nvdev);
- if (unlikely(!net_device))
- goto drop;
-
- nvchan = &net_device->chan_table[q_idx];
-
/* Allocate a skb - TODO direct I/O to pages? */
skb = netvsc_alloc_recv_skb(net, &nvchan->napi,
csum_info, vlan, data, len);
if (unlikely(!skb)) {
-drop:
- ++net->stats.rx_dropped;
+ ++net_device_ctx->eth_stats.rx_no_memory;
rcu_read_unlock();
return NVSP_STAT_FAIL;
}
@@ -804,8 +782,6 @@ drop:
u64_stats_update_end(&rx_stats->syncp);
napi_gro_receive(&nvchan->napi, skb);
- rcu_read_unlock();
-
return 0;
}
@@ -860,7 +836,6 @@ static int netvsc_set_channels(struct net_device *net,
memset(&device_info, 0, sizeof(device_info));
device_info.num_chn = count;
- device_info.ring_size = ring_size;
device_info.send_sections = nvdev->send_section_cnt;
device_info.send_section_size = nvdev->send_section_size;
device_info.recv_sections = nvdev->recv_section_cnt;
@@ -975,7 +950,6 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
rndis_filter_close(nvdev);
memset(&device_info, 0, sizeof(device_info));
- device_info.ring_size = ring_size;
device_info.num_chn = nvdev->num_chn;
device_info.send_sections = nvdev->send_section_cnt;
device_info.send_section_size = nvdev->send_section_size;
@@ -1133,12 +1107,13 @@ static const struct {
u16 offset;
} netvsc_stats[] = {
{ "tx_scattered", offsetof(struct netvsc_ethtool_stats, tx_scattered) },
- { "tx_no_memory", offsetof(struct netvsc_ethtool_stats, tx_no_memory) },
+ { "tx_no_memory", offsetof(struct netvsc_ethtool_stats, tx_no_memory) },
{ "tx_no_space", offsetof(struct netvsc_ethtool_stats, tx_no_space) },
{ "tx_too_big", offsetof(struct netvsc_ethtool_stats, tx_too_big) },
{ "tx_busy", offsetof(struct netvsc_ethtool_stats, tx_busy) },
{ "tx_send_full", offsetof(struct netvsc_ethtool_stats, tx_send_full) },
{ "rx_comp_busy", offsetof(struct netvsc_ethtool_stats, rx_comp_busy) },
+ { "rx_no_memory", offsetof(struct netvsc_ethtool_stats, rx_no_memory) },
{ "stop_queue", offsetof(struct netvsc_ethtool_stats, stop_queue) },
{ "wake_queue", offsetof(struct netvsc_ethtool_stats, wake_queue) },
}, vf_stats[] = {
@@ -1539,7 +1514,6 @@ static int netvsc_set_ringparam(struct net_device *ndev,
memset(&device_info, 0, sizeof(device_info));
device_info.num_chn = nvdev->num_chn;
- device_info.ring_size = ring_size;
device_info.send_sections = new_tx;
device_info.send_section_size = nvdev->send_section_size;
device_info.recv_sections = new_rx;
@@ -1995,7 +1969,6 @@ static int netvsc_probe(struct hv_device *dev,
/* Notify the netvsc driver of the new device */
memset(&device_info, 0, sizeof(device_info));
- device_info.ring_size = ring_size;
device_info.num_chn = VRSS_CHANNEL_DEFAULT;
device_info.send_sections = NETVSC_DEFAULT_TX;
device_info.send_section_size = NETVSC_SEND_SECTION_SIZE;
@@ -2158,11 +2131,13 @@ static int __init netvsc_drv_init(void)
if (ring_size < RING_SIZE_MIN) {
ring_size = RING_SIZE_MIN;
- pr_info("Increased ring_size to %d (min allowed)\n",
+ pr_info("Increased ring_size to %u (min allowed)\n",
ring_size);
}
- ret = vmbus_driver_register(&netvsc_drv);
+ netvsc_ring_bytes = ring_size * PAGE_SIZE;
+ netvsc_ring_reciprocal = reciprocal_value(netvsc_ring_bytes);
+ ret = vmbus_driver_register(&netvsc_drv);
if (ret)
return ret;
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 7b637c7dd1e5..91a67c5297f7 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -134,11 +134,9 @@ static void put_rndis_request(struct rndis_device *dev,
kfree(req);
}
-static void dump_rndis_message(struct hv_device *hv_dev,
+static void dump_rndis_message(struct net_device *netdev,
const struct rndis_message *rndis_msg)
{
- struct net_device *netdev = hv_get_drvdata(hv_dev);
-
switch (rndis_msg->ndis_msg_type) {
case RNDIS_MSG_PACKET:
netdev_dbg(netdev, "RNDIS_MSG_PACKET (len %u, "
@@ -217,7 +215,6 @@ static int rndis_filter_send_request(struct rndis_device *dev,
struct hv_netvsc_packet *packet;
struct hv_page_buffer page_buf[2];
struct hv_page_buffer *pb = page_buf;
- struct net_device_context *net_device_ctx = netdev_priv(dev->ndev);
int ret;
/* Setup the packet to send it */
@@ -245,7 +242,7 @@ static int rndis_filter_send_request(struct rndis_device *dev,
}
rcu_read_lock_bh();
- ret = netvsc_send(net_device_ctx, packet, NULL, pb, NULL);
+ ret = netvsc_send(dev->ndev, packet, NULL, pb, NULL);
rcu_read_unlock_bh();
return ret;
@@ -354,6 +351,7 @@ static inline void *rndis_get_ppi(struct rndis_packet *rpkt, u32 type)
}
static int rndis_filter_receive_data(struct net_device *ndev,
+ struct netvsc_device *nvdev,
struct rndis_device *dev,
struct rndis_message *msg,
struct vmbus_channel *channel,
@@ -390,14 +388,14 @@ static int rndis_filter_receive_data(struct net_device *ndev,
*/
data = (void *)((unsigned long)data + data_offset);
csum_info = rndis_get_ppi(rndis_pkt, TCPIP_CHKSUM_PKTINFO);
- return netvsc_recv_callback(ndev, channel,
+
+ return netvsc_recv_callback(ndev, nvdev, channel,
data, rndis_pkt->data_len,
csum_info, vlan);
}
int rndis_filter_receive(struct net_device *ndev,
struct netvsc_device *net_dev,
- struct hv_device *dev,
struct vmbus_channel *channel,
void *data, u32 buflen)
{
@@ -419,11 +417,12 @@ int rndis_filter_receive(struct net_device *ndev,
}
if (netif_msg_rx_status(net_device_ctx))
- dump_rndis_message(dev, rndis_msg);
+ dump_rndis_message(ndev, rndis_msg);
switch (rndis_msg->ndis_msg_type) {
case RNDIS_MSG_PACKET:
- return rndis_filter_receive_data(ndev, rndis_dev, rndis_msg,
+ return rndis_filter_receive_data(ndev, net_dev,
+ rndis_dev, rndis_msg,
channel, data, buflen);
case RNDIS_MSG_INIT_C:
case RNDIS_MSG_QUERY_C:
@@ -434,7 +433,7 @@ int rndis_filter_receive(struct net_device *ndev,
case RNDIS_MSG_INDICATE:
/* notification msgs */
- netvsc_linkstatus_callback(dev, rndis_msg);
+ netvsc_linkstatus_callback(ndev, rndis_msg);
break;
default:
netdev_err(ndev,
@@ -1040,8 +1039,8 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
/* Set the channel before opening.*/
nvchan->channel = new_sc;
- ret = vmbus_open(new_sc, nvscdev->ring_size * PAGE_SIZE,
- nvscdev->ring_size * PAGE_SIZE, NULL, 0,
+ ret = vmbus_open(new_sc, netvsc_ring_bytes,
+ netvsc_ring_bytes, NULL, 0,
netvsc_channel_cb, nvchan);
if (ret == 0)
napi_enable(&nvchan->napi);
@@ -1362,9 +1361,6 @@ int rndis_filter_open(struct netvsc_device *nvdev)
if (!nvdev)
return -EINVAL;
- if (atomic_inc_return(&nvdev->open_cnt) != 1)
- return 0;
-
return rndis_filter_open_device(nvdev->extension);
}
@@ -1373,13 +1369,12 @@ int rndis_filter_close(struct netvsc_device *nvdev)
if (!nvdev)
return -EINVAL;
- if (atomic_dec_return(&nvdev->open_cnt) != 0)
- return 0;
-
return rndis_filter_close_device(nvdev->extension);
}
bool rndis_filter_opened(const struct netvsc_device *nvdev)
{
- return atomic_read(&nvdev->open_cnt) > 0;
+ const struct rndis_device *dev = nvdev->extension;
+
+ return dev->state == RNDIS_DEV_DATAINITIALIZED;
}
diff --git a/drivers/net/ieee802154/adf7242.c b/drivers/net/ieee802154/adf7242.c
index 400fdbd3a120..64f1b1e77bc0 100644
--- a/drivers/net/ieee802154/adf7242.c
+++ b/drivers/net/ieee802154/adf7242.c
@@ -1,7 +1,7 @@
/*
* Analog Devices ADF7242 Low-Power IEEE 802.15.4 Transceiver
*
- * Copyright 2009-2015 Analog Devices Inc.
+ * Copyright 2009-2017 Analog Devices Inc.
*
* Licensed under the GPL-2 or later.
*
@@ -344,12 +344,18 @@ static int adf7242_wait_status(struct adf7242_local *lp, unsigned int status,
return ret;
}
-static int adf7242_wait_ready(struct adf7242_local *lp, int line)
+static int adf7242_wait_rc_ready(struct adf7242_local *lp, int line)
{
return adf7242_wait_status(lp, STAT_RC_READY | STAT_SPI_READY,
STAT_RC_READY | STAT_SPI_READY, line);
}
+static int adf7242_wait_spi_ready(struct adf7242_local *lp, int line)
+{
+ return adf7242_wait_status(lp, STAT_SPI_READY,
+ STAT_SPI_READY, line);
+}
+
static int adf7242_write_fbuf(struct adf7242_local *lp, u8 *data, u8 len)
{
u8 *buf = lp->buf;
@@ -369,7 +375,7 @@ static int adf7242_write_fbuf(struct adf7242_local *lp, u8 *data, u8 len)
spi_message_add_tail(&xfer_head, &msg);
spi_message_add_tail(&xfer_buf, &msg);
- adf7242_wait_ready(lp, __LINE__);
+ adf7242_wait_spi_ready(lp, __LINE__);
mutex_lock(&lp->bmux);
buf[0] = CMD_SPI_PKT_WR;
@@ -401,7 +407,7 @@ static int adf7242_read_fbuf(struct adf7242_local *lp,
spi_message_add_tail(&xfer_head, &msg);
spi_message_add_tail(&xfer_buf, &msg);
- adf7242_wait_ready(lp, __LINE__);
+ adf7242_wait_spi_ready(lp, __LINE__);
mutex_lock(&lp->bmux);
if (packet_read) {
@@ -432,7 +438,7 @@ static int adf7242_read_reg(struct adf7242_local *lp, u16 addr, u8 *data)
.rx_buf = lp->buf_read_rx,
};
- adf7242_wait_ready(lp, __LINE__);
+ adf7242_wait_spi_ready(lp, __LINE__);
mutex_lock(&lp->bmux);
lp->buf_read_tx[0] = CMD_SPI_MEM_RD(addr);
@@ -462,7 +468,7 @@ static int adf7242_write_reg(struct adf7242_local *lp, u16 addr, u8 data)
{
int status;
- adf7242_wait_ready(lp, __LINE__);
+ adf7242_wait_spi_ready(lp, __LINE__);
mutex_lock(&lp->bmux);
lp->buf_reg_tx[0] = CMD_SPI_MEM_WR(addr);
@@ -484,7 +490,7 @@ static int adf7242_cmd(struct adf7242_local *lp, unsigned int cmd)
dev_vdbg(&lp->spi->dev, "%s : CMD=0x%X\n", __func__, cmd);
if (cmd != CMD_RC_PC_RESET_NO_WAIT)
- adf7242_wait_ready(lp, __LINE__);
+ adf7242_wait_rc_ready(lp, __LINE__);
mutex_lock(&lp->bmux);
lp->buf_cmd = cmd;
@@ -557,6 +563,22 @@ static int adf7242_verify_firmware(struct adf7242_local *lp,
return 0;
}
+static void adf7242_clear_irqstat(struct adf7242_local *lp)
+{
+ adf7242_write_reg(lp, REG_IRQ1_SRC1, IRQ_CCA_COMPLETE | IRQ_SFD_RX |
+ IRQ_SFD_TX | IRQ_RX_PKT_RCVD | IRQ_TX_PKT_SENT |
+ IRQ_FRAME_VALID | IRQ_ADDRESS_VALID | IRQ_CSMA_CA);
+}
+
+static int adf7242_cmd_rx(struct adf7242_local *lp)
+{
+ /* Wait until the ACK is sent */
+ adf7242_wait_status(lp, RC_STATUS_PHY_RDY, RC_STATUS_MASK, __LINE__);
+ adf7242_clear_irqstat(lp);
+
+ return adf7242_cmd(lp, CMD_RC_RX);
+}
+
static int adf7242_set_txpower(struct ieee802154_hw *hw, int mbm)
{
struct adf7242_local *lp = hw->priv;
@@ -660,7 +682,7 @@ static int adf7242_start(struct ieee802154_hw *hw)
struct adf7242_local *lp = hw->priv;
adf7242_cmd(lp, CMD_RC_PHY_RDY);
- adf7242_write_reg(lp, REG_IRQ1_SRC1, 0xFF);
+ adf7242_clear_irqstat(lp);
enable_irq(lp->spi->irq);
set_bit(FLAG_START, &lp->flags);
@@ -671,10 +693,10 @@ static void adf7242_stop(struct ieee802154_hw *hw)
{
struct adf7242_local *lp = hw->priv;
+ disable_irq(lp->spi->irq);
adf7242_cmd(lp, CMD_RC_IDLE);
clear_bit(FLAG_START, &lp->flags);
- disable_irq(lp->spi->irq);
- adf7242_write_reg(lp, REG_IRQ1_SRC1, 0xFF);
+ adf7242_clear_irqstat(lp);
}
static int adf7242_channel(struct ieee802154_hw *hw, u8 page, u8 channel)
@@ -789,9 +811,12 @@ static int adf7242_xmit(struct ieee802154_hw *hw, struct sk_buff *skb)
struct adf7242_local *lp = hw->priv;
int ret;
+ /* ensure existing instances of the IRQ handler have completed */
+ disable_irq(lp->spi->irq);
set_bit(FLAG_XMIT, &lp->flags);
reinit_completion(&lp->tx_complete);
adf7242_cmd(lp, CMD_RC_PHY_RDY);
+ adf7242_clear_irqstat(lp);
ret = adf7242_write_fbuf(lp, skb->data, skb->len);
if (ret)
@@ -800,6 +825,7 @@ static int adf7242_xmit(struct ieee802154_hw *hw, struct sk_buff *skb)
ret = adf7242_cmd(lp, CMD_RC_CSMACA);
if (ret)
goto err;
+ enable_irq(lp->spi->irq);
ret = wait_for_completion_interruptible_timeout(&lp->tx_complete,
HZ / 10);
@@ -822,7 +848,7 @@ static int adf7242_xmit(struct ieee802154_hw *hw, struct sk_buff *skb)
err:
clear_bit(FLAG_XMIT, &lp->flags);
- adf7242_cmd(lp, CMD_RC_RX);
+ adf7242_cmd_rx(lp);
return ret;
}
@@ -846,7 +872,7 @@ static int adf7242_rx(struct adf7242_local *lp)
skb = dev_alloc_skb(len);
if (!skb) {
- adf7242_cmd(lp, CMD_RC_RX);
+ adf7242_cmd_rx(lp);
return -ENOMEM;
}
@@ -854,14 +880,14 @@ static int adf7242_rx(struct adf7242_local *lp)
ret = adf7242_read_fbuf(lp, data, len, true);
if (ret < 0) {
kfree_skb(skb);
- adf7242_cmd(lp, CMD_RC_RX);
+ adf7242_cmd_rx(lp);
return ret;
}
lqi = data[len - 2];
lp->rssi = data[len - 1];
- adf7242_cmd(lp, CMD_RC_RX);
+ ret = adf7242_cmd_rx(lp);
skb_trim(skb, len - 2); /* Don't put RSSI/LQI or CRC into the frame */
@@ -870,7 +896,7 @@ static int adf7242_rx(struct adf7242_local *lp)
dev_dbg(&lp->spi->dev, "%s: ret=%d len=%d lqi=%d rssi=%d\n",
__func__, ret, (int)len, (int)lqi, lp->rssi);
- return 0;
+ return ret;
}
static const struct ieee802154_ops adf7242_ops = {
@@ -888,7 +914,7 @@ static const struct ieee802154_ops adf7242_ops = {
.set_cca_ed_level = adf7242_set_cca_ed_level,
};
-static void adf7242_debug(u8 irq1)
+static void adf7242_debug(struct adf7242_local *lp, u8 irq1)
{
#ifdef DEBUG
u8 stat;
@@ -906,9 +932,12 @@ static void adf7242_debug(u8 irq1)
irq1 & IRQ_FRAME_VALID ? "IRQ_FRAME_VALID\n" : "",
irq1 & IRQ_ADDRESS_VALID ? "IRQ_ADDRESS_VALID\n" : "");
- dev_dbg(&lp->spi->dev, "%s STATUS = %X:\n%s\n%s%s%s%s%s\n",
+ dev_dbg(&lp->spi->dev, "%s STATUS = %X:\n%s\n%s\n%s\n%s\n%s%s%s%s%s\n",
__func__, stat,
+ stat & STAT_SPI_READY ? "SPI_READY" : "SPI_BUSY",
+ stat & STAT_IRQ_STATUS ? "IRQ_PENDING" : "IRQ_CLEAR",
stat & STAT_RC_READY ? "RC_READY" : "RC_BUSY",
+ stat & STAT_CCA_RESULT ? "CHAN_IDLE" : "CHAN_BUSY",
(stat & 0xf) == RC_STATUS_IDLE ? "RC_STATUS_IDLE" : "",
(stat & 0xf) == RC_STATUS_MEAS ? "RC_STATUS_MEAS" : "",
(stat & 0xf) == RC_STATUS_PHY_RDY ? "RC_STATUS_PHY_RDY" : "",
@@ -923,20 +952,20 @@ static irqreturn_t adf7242_isr(int irq, void *data)
unsigned int xmit;
u8 irq1;
- adf7242_wait_status(lp, RC_STATUS_PHY_RDY, RC_STATUS_MASK, __LINE__);
-
adf7242_read_reg(lp, REG_IRQ1_SRC1, &irq1);
- adf7242_write_reg(lp, REG_IRQ1_SRC1, irq1);
if (!(irq1 & (IRQ_RX_PKT_RCVD | IRQ_CSMA_CA)))
dev_err(&lp->spi->dev, "%s :ERROR IRQ1 = 0x%X\n",
__func__, irq1);
- adf7242_debug(irq1);
+ adf7242_debug(lp, irq1);
xmit = test_bit(FLAG_XMIT, &lp->flags);
if (xmit && (irq1 & IRQ_CSMA_CA)) {
+ adf7242_wait_status(lp, RC_STATUS_PHY_RDY,
+ RC_STATUS_MASK, __LINE__);
+
if (ADF7242_REPORT_CSMA_CA_STAT) {
u8 astat;
@@ -957,6 +986,7 @@ static irqreturn_t adf7242_isr(int irq, void *data)
lp->tx_stat = SUCCESS;
}
complete(&lp->tx_complete);
+ adf7242_clear_irqstat(lp);
} else if (!xmit && (irq1 & IRQ_RX_PKT_RCVD) &&
(irq1 & IRQ_FRAME_VALID)) {
adf7242_rx(lp);
@@ -965,16 +995,19 @@ static irqreturn_t adf7242_isr(int irq, void *data)
dev_dbg(&lp->spi->dev, "%s:%d : ERROR IRQ1 = 0x%X\n",
__func__, __LINE__, irq1);
adf7242_cmd(lp, CMD_RC_PHY_RDY);
- adf7242_write_reg(lp, REG_IRQ1_SRC1, 0xFF);
- adf7242_cmd(lp, CMD_RC_RX);
+ adf7242_cmd_rx(lp);
} else {
/* This can only be xmit without IRQ, likely a RX packet.
* we get an TX IRQ shortly - do nothing or let the xmit
* timeout handle this
*/
+
dev_dbg(&lp->spi->dev, "%s:%d : ERROR IRQ1 = 0x%X, xmit %d\n",
__func__, __LINE__, irq1, xmit);
+ adf7242_wait_status(lp, RC_STATUS_PHY_RDY,
+ RC_STATUS_MASK, __LINE__);
complete(&lp->tx_complete);
+ adf7242_clear_irqstat(lp);
}
return IRQ_HANDLED;
@@ -994,7 +1027,7 @@ static int adf7242_soft_reset(struct adf7242_local *lp, int line)
adf7242_set_promiscuous_mode(lp->hw, lp->promiscuous);
adf7242_set_csma_params(lp->hw, lp->min_be, lp->max_be,
lp->max_cca_retries);
- adf7242_write_reg(lp, REG_IRQ1_SRC1, 0xFF);
+ adf7242_clear_irqstat(lp);
if (test_bit(FLAG_START, &lp->flags)) {
enable_irq(lp->spi->irq);
@@ -1060,7 +1093,7 @@ static int adf7242_hw_init(struct adf7242_local *lp)
adf7242_write_reg(lp, REG_IRQ1_EN0, 0);
adf7242_write_reg(lp, REG_IRQ1_EN1, IRQ_RX_PKT_RCVD | IRQ_CSMA_CA);
- adf7242_write_reg(lp, REG_IRQ1_SRC1, 0xFF);
+ adf7242_clear_irqstat(lp);
adf7242_write_reg(lp, REG_IRQ1_SRC0, 0xFF);
adf7242_cmd(lp, CMD_RC_IDLE);
@@ -1086,8 +1119,11 @@ static int adf7242_stats_show(struct seq_file *file, void *offset)
irq1 & IRQ_FRAME_VALID ? "IRQ_FRAME_VALID\n" : "",
irq1 & IRQ_ADDRESS_VALID ? "IRQ_ADDRESS_VALID\n" : "");
- seq_printf(file, "STATUS = %X:\n%s\n%s%s%s%s%s\n", stat,
+ seq_printf(file, "STATUS = %X:\n%s\n%s\n%s\n%s\n%s%s%s%s%s\n", stat,
+ stat & STAT_SPI_READY ? "SPI_READY" : "SPI_BUSY",
+ stat & STAT_IRQ_STATUS ? "IRQ_PENDING" : "IRQ_CLEAR",
stat & STAT_RC_READY ? "RC_READY" : "RC_BUSY",
+ stat & STAT_CCA_RESULT ? "CHAN_IDLE" : "CHAN_BUSY",
(stat & 0xf) == RC_STATUS_IDLE ? "RC_STATUS_IDLE" : "",
(stat & 0xf) == RC_STATUS_MEAS ? "RC_STATUS_MEAS" : "",
(stat & 0xf) == RC_STATUS_PHY_RDY ? "RC_STATUS_PHY_RDY" : "",
@@ -1257,12 +1293,14 @@ static int adf7242_remove(struct spi_device *spi)
static const struct of_device_id adf7242_of_match[] = {
{ .compatible = "adi,adf7242", },
+ { .compatible = "adi,adf7241", },
{ },
};
MODULE_DEVICE_TABLE(of, adf7242_of_match);
static const struct spi_device_id adf7242_device_id[] = {
{ .name = "adf7242", },
+ { .name = "adf7241", },
{ },
};
MODULE_DEVICE_TABLE(spi, adf7242_device_id);
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index 77cc4fbaeace..c1f008fe4e1d 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -315,13 +315,13 @@ static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff **pskb,
*pskb = skb;
}
- ipvlan_skb_crossing_ns(skb, dev);
if (local) {
skb->pkt_type = PACKET_HOST;
if (dev_forward_skb(ipvlan->dev, skb) == NET_RX_SUCCESS)
success = true;
} else {
+ skb->dev = dev;
ret = RX_HANDLER_ANOTHER;
success = true;
}
@@ -586,7 +586,7 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
return NET_XMIT_SUCCESS;
}
- ipvlan_skb_crossing_ns(skb, ipvlan->phy_dev);
+ skb->dev = ipvlan->phy_dev;
return dev_queue_xmit(skb);
}
@@ -664,8 +664,6 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
struct sk_buff *skb = *pskb;
struct ethhdr *eth = eth_hdr(skb);
rx_handler_result_t ret = RX_HANDLER_PASS;
- void *lyr3h;
- int addr_type;
if (is_multicast_ether_addr(eth->h_dest)) {
if (ipvlan_external_frame(skb, port)) {
@@ -683,15 +681,8 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
}
}
} else {
- struct ipvl_addr *addr;
-
- lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type);
- if (!lyr3h)
- return ret;
-
- addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
- if (addr)
- ret = ipvlan_rcv_frame(addr, pskb, false);
+ /* Perform like l3 mode for non-multicast packet */
+ ret = ipvlan_handle_mode_l3(pskb, port);
}
return ret;
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 30cb803e2fe5..2469df118fbf 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -850,6 +850,19 @@ static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
return ipvlan_del_addr(ipvlan, ip6_addr, true);
}
+static bool ipvlan_is_valid_dev(const struct net_device *dev)
+{
+ struct ipvl_dev *ipvlan = netdev_priv(dev);
+
+ if (!netif_is_ipvlan(dev))
+ return false;
+
+ if (!ipvlan || !ipvlan->port)
+ return false;
+
+ return true;
+}
+
static int ipvlan_addr6_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
@@ -857,10 +870,7 @@ static int ipvlan_addr6_event(struct notifier_block *unused,
struct net_device *dev = (struct net_device *)if6->idev->dev;
struct ipvl_dev *ipvlan = netdev_priv(dev);
- if (!netif_is_ipvlan(dev))
- return NOTIFY_DONE;
-
- if (!ipvlan || !ipvlan->port)
+ if (!ipvlan_is_valid_dev(dev))
return NOTIFY_DONE;
switch (event) {
@@ -888,10 +898,7 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused,
if (in_softirq())
return NOTIFY_DONE;
- if (!netif_is_ipvlan(dev))
- return NOTIFY_DONE;
-
- if (!ipvlan || !ipvlan->port)
+ if (!ipvlan_is_valid_dev(dev))
return NOTIFY_DONE;
switch (event) {
@@ -932,10 +939,7 @@ static int ipvlan_addr4_event(struct notifier_block *unused,
struct ipvl_dev *ipvlan = netdev_priv(dev);
struct in_addr ip4_addr;
- if (!netif_is_ipvlan(dev))
- return NOTIFY_DONE;
-
- if (!ipvlan || !ipvlan->port)
+ if (!ipvlan_is_valid_dev(dev))
return NOTIFY_DONE;
switch (event) {
@@ -961,10 +965,7 @@ static int ipvlan_addr4_validator_event(struct notifier_block *unused,
struct net_device *dev = (struct net_device *)ivi->ivi_dev->dev;
struct ipvl_dev *ipvlan = netdev_priv(dev);
- if (!netif_is_ipvlan(dev))
- return NOTIFY_DONE;
-
- if (!ipvlan || !ipvlan->port)
+ if (!ipvlan_is_valid_dev(dev))
return NOTIFY_DONE;
switch (event) {
diff --git a/drivers/net/netdevsim/Makefile b/drivers/net/netdevsim/Makefile
new file mode 100644
index 000000000000..074ddebbc41d
--- /dev/null
+++ b/drivers/net/netdevsim/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_NETDEVSIM) += netdevsim.o
+
+netdevsim-objs := \
+ netdev.o \
+ bpf.o \
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
new file mode 100644
index 000000000000..078d2c37a6c1
--- /dev/null
+++ b/drivers/net/netdevsim/bpf.c
@@ -0,0 +1,373 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree.
+ *
+ * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS"
+ * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE
+ * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
+ * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+ */
+
+#include <linux/bpf.h>
+#include <linux/bpf_verifier.h>
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/rtnetlink.h>
+#include <net/pkt_cls.h>
+
+#include "netdevsim.h"
+
+struct nsim_bpf_bound_prog {
+ struct netdevsim *ns;
+ struct bpf_prog *prog;
+ struct dentry *ddir;
+ const char *state;
+ bool is_loaded;
+ struct list_head l;
+};
+
+static int nsim_debugfs_bpf_string_read(struct seq_file *file, void *data)
+{
+ const char **str = file->private;
+
+ if (*str)
+ seq_printf(file, "%s\n", *str);
+
+ return 0;
+}
+
+static int nsim_debugfs_bpf_string_open(struct inode *inode, struct file *f)
+{
+ return single_open(f, nsim_debugfs_bpf_string_read, inode->i_private);
+}
+
+static const struct file_operations nsim_bpf_string_fops = {
+ .owner = THIS_MODULE,
+ .open = nsim_debugfs_bpf_string_open,
+ .release = single_release,
+ .read = seq_read,
+ .llseek = seq_lseek
+};
+
+static int
+nsim_bpf_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn)
+{
+ struct nsim_bpf_bound_prog *state;
+
+ state = env->prog->aux->offload->dev_priv;
+ if (state->ns->bpf_bind_verifier_delay && !insn_idx)
+ msleep(state->ns->bpf_bind_verifier_delay);
+
+ return 0;
+}
+
+static const struct bpf_ext_analyzer_ops nsim_bpf_analyzer_ops = {
+ .insn_hook = nsim_bpf_verify_insn,
+};
+
+static bool nsim_xdp_offload_active(struct netdevsim *ns)
+{
+ return ns->xdp_prog_mode == XDP_ATTACHED_HW;
+}
+
+static void nsim_prog_set_loaded(struct bpf_prog *prog, bool loaded)
+{
+ struct nsim_bpf_bound_prog *state;
+
+ if (!prog || !prog->aux->offload)
+ return;
+
+ state = prog->aux->offload->dev_priv;
+ state->is_loaded = loaded;
+}
+
+static int
+nsim_bpf_offload(struct netdevsim *ns, struct bpf_prog *prog, bool oldprog)
+{
+ nsim_prog_set_loaded(ns->bpf_offloaded, false);
+
+ WARN(!!ns->bpf_offloaded != oldprog,
+ "bad offload state, expected offload %sto be active",
+ oldprog ? "" : "not ");
+ ns->bpf_offloaded = prog;
+ ns->bpf_offloaded_id = prog ? prog->aux->id : 0;
+ nsim_prog_set_loaded(prog, true);
+
+ return 0;
+}
+
+int nsim_bpf_setup_tc_block_cb(enum tc_setup_type type,
+ void *type_data, void *cb_priv)
+{
+ struct tc_cls_bpf_offload *cls_bpf = type_data;
+ struct bpf_prog *prog = cls_bpf->prog;
+ struct netdevsim *ns = cb_priv;
+ bool skip_sw;
+
+ if (type != TC_SETUP_CLSBPF ||
+ !tc_can_offload(ns->netdev) ||
+ cls_bpf->common.protocol != htons(ETH_P_ALL) ||
+ cls_bpf->common.chain_index)
+ return -EOPNOTSUPP;
+
+ skip_sw = cls_bpf->gen_flags & TCA_CLS_FLAGS_SKIP_SW;
+
+ if (nsim_xdp_offload_active(ns))
+ return -EBUSY;
+
+ if (!ns->bpf_tc_accept)
+ return -EOPNOTSUPP;
+ /* Note: progs without skip_sw will probably not be dev bound */
+ if (prog && !prog->aux->offload && !ns->bpf_tc_non_bound_accept)
+ return -EOPNOTSUPP;
+
+ switch (cls_bpf->command) {
+ case TC_CLSBPF_REPLACE:
+ return nsim_bpf_offload(ns, prog, true);
+ case TC_CLSBPF_ADD:
+ return nsim_bpf_offload(ns, prog, false);
+ case TC_CLSBPF_DESTROY:
+ return nsim_bpf_offload(ns, NULL, true);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+int nsim_bpf_disable_tc(struct netdevsim *ns)
+{
+ if (ns->bpf_offloaded && !nsim_xdp_offload_active(ns))
+ return -EBUSY;
+ return 0;
+}
+
+static int nsim_xdp_offload_prog(struct netdevsim *ns, struct netdev_bpf *bpf)
+{
+ if (!nsim_xdp_offload_active(ns) && !bpf->prog)
+ return 0;
+ if (!nsim_xdp_offload_active(ns) && bpf->prog && ns->bpf_offloaded) {
+ NSIM_EA(bpf->extack, "TC program is already loaded");
+ return -EBUSY;
+ }
+
+ return nsim_bpf_offload(ns, bpf->prog, nsim_xdp_offload_active(ns));
+}
+
+static int nsim_xdp_set_prog(struct netdevsim *ns, struct netdev_bpf *bpf)
+{
+ int err;
+
+ if (ns->xdp_prog && (bpf->flags ^ ns->xdp_flags) & XDP_FLAGS_MODES) {
+ NSIM_EA(bpf->extack, "program loaded with different flags");
+ return -EBUSY;
+ }
+
+ if (bpf->command == XDP_SETUP_PROG && !ns->bpf_xdpdrv_accept) {
+ NSIM_EA(bpf->extack, "driver XDP disabled in DebugFS");
+ return -EOPNOTSUPP;
+ }
+ if (bpf->command == XDP_SETUP_PROG_HW && !ns->bpf_xdpoffload_accept) {
+ NSIM_EA(bpf->extack, "XDP offload disabled in DebugFS");
+ return -EOPNOTSUPP;
+ }
+
+ if (bpf->command == XDP_SETUP_PROG_HW) {
+ err = nsim_xdp_offload_prog(ns, bpf);
+ if (err)
+ return err;
+ }
+
+ if (ns->xdp_prog)
+ bpf_prog_put(ns->xdp_prog);
+
+ ns->xdp_prog = bpf->prog;
+ ns->xdp_flags = bpf->flags;
+
+ if (!bpf->prog)
+ ns->xdp_prog_mode = XDP_ATTACHED_NONE;
+ else if (bpf->command == XDP_SETUP_PROG)
+ ns->xdp_prog_mode = XDP_ATTACHED_DRV;
+ else
+ ns->xdp_prog_mode = XDP_ATTACHED_HW;
+
+ return 0;
+}
+
+static int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog)
+{
+ struct nsim_bpf_bound_prog *state;
+ char name[16];
+ int err;
+
+ state = kzalloc(sizeof(*state), GFP_KERNEL);
+ if (!state)
+ return -ENOMEM;
+
+ state->ns = ns;
+ state->prog = prog;
+ state->state = "verify";
+
+ /* Program id is not populated yet when we create the state. */
+ sprintf(name, "%u", ns->prog_id_gen++);
+ state->ddir = debugfs_create_dir(name, ns->ddir_bpf_bound_progs);
+ if (IS_ERR(state->ddir)) {
+ err = PTR_ERR(state->ddir);
+ kfree(state);
+ return err;
+ }
+
+ debugfs_create_u32("id", 0400, state->ddir, &prog->aux->id);
+ debugfs_create_file("state", 0400, state->ddir,
+ &state->state, &nsim_bpf_string_fops);
+ debugfs_create_bool("loaded", 0400, state->ddir, &state->is_loaded);
+
+ list_add_tail(&state->l, &ns->bpf_bound_progs);
+
+ prog->aux->offload->dev_priv = state;
+
+ return 0;
+}
+
+static void nsim_bpf_destroy_prog(struct bpf_prog *prog)
+{
+ struct nsim_bpf_bound_prog *state;
+
+ state = prog->aux->offload->dev_priv;
+ WARN(state->is_loaded,
+ "offload state destroyed while program still bound");
+ debugfs_remove_recursive(state->ddir);
+ list_del(&state->l);
+ kfree(state);
+}
+
+static int nsim_setup_prog_checks(struct netdevsim *ns, struct netdev_bpf *bpf)
+{
+ if (bpf->prog && bpf->prog->aux->offload) {
+ NSIM_EA(bpf->extack, "attempt to load offloaded prog to drv");
+ return -EINVAL;
+ }
+ if (ns->netdev->mtu > NSIM_XDP_MAX_MTU) {
+ NSIM_EA(bpf->extack, "MTU too large w/ XDP enabled");
+ return -EINVAL;
+ }
+ if (nsim_xdp_offload_active(ns)) {
+ NSIM_EA(bpf->extack, "xdp offload active, can't load drv prog");
+ return -EBUSY;
+ }
+ return 0;
+}
+
+static int
+nsim_setup_prog_hw_checks(struct netdevsim *ns, struct netdev_bpf *bpf)
+{
+ struct nsim_bpf_bound_prog *state;
+
+ if (!bpf->prog)
+ return 0;
+
+ if (!bpf->prog->aux->offload) {
+ NSIM_EA(bpf->extack, "xdpoffload of non-bound program");
+ return -EINVAL;
+ }
+ if (bpf->prog->aux->offload->netdev != ns->netdev) {
+ NSIM_EA(bpf->extack, "program bound to different dev");
+ return -EINVAL;
+ }
+
+ state = bpf->prog->aux->offload->dev_priv;
+ if (WARN_ON(strcmp(state->state, "xlated"))) {
+ NSIM_EA(bpf->extack, "offloading program in bad state");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
+{
+ struct netdevsim *ns = netdev_priv(dev);
+ struct nsim_bpf_bound_prog *state;
+ int err;
+
+ ASSERT_RTNL();
+
+ switch (bpf->command) {
+ case BPF_OFFLOAD_VERIFIER_PREP:
+ if (!ns->bpf_bind_accept)
+ return -EOPNOTSUPP;
+
+ err = nsim_bpf_create_prog(ns, bpf->verifier.prog);
+ if (err)
+ return err;
+
+ bpf->verifier.ops = &nsim_bpf_analyzer_ops;
+ return 0;
+ case BPF_OFFLOAD_TRANSLATE:
+ state = bpf->offload.prog->aux->offload->dev_priv;
+
+ state->state = "xlated";
+ return 0;
+ case BPF_OFFLOAD_DESTROY:
+ nsim_bpf_destroy_prog(bpf->offload.prog);
+ return 0;
+ case XDP_QUERY_PROG:
+ bpf->prog_attached = ns->xdp_prog_mode;
+ bpf->prog_id = ns->xdp_prog ? ns->xdp_prog->aux->id : 0;
+ bpf->prog_flags = ns->xdp_prog ? ns->xdp_flags : 0;
+ return 0;
+ case XDP_SETUP_PROG:
+ err = nsim_setup_prog_checks(ns, bpf);
+ if (err)
+ return err;
+
+ return nsim_xdp_set_prog(ns, bpf);
+ case XDP_SETUP_PROG_HW:
+ err = nsim_setup_prog_hw_checks(ns, bpf);
+ if (err)
+ return err;
+
+ return nsim_xdp_set_prog(ns, bpf);
+ default:
+ return -EINVAL;
+ }
+}
+
+int nsim_bpf_init(struct netdevsim *ns)
+{
+ INIT_LIST_HEAD(&ns->bpf_bound_progs);
+
+ debugfs_create_u32("bpf_offloaded_id", 0400, ns->ddir,
+ &ns->bpf_offloaded_id);
+
+ ns->bpf_bind_accept = true;
+ debugfs_create_bool("bpf_bind_accept", 0600, ns->ddir,
+ &ns->bpf_bind_accept);
+ debugfs_create_u32("bpf_bind_verifier_delay", 0600, ns->ddir,
+ &ns->bpf_bind_verifier_delay);
+ ns->ddir_bpf_bound_progs =
+ debugfs_create_dir("bpf_bound_progs", ns->ddir);
+
+ ns->bpf_tc_accept = true;
+ debugfs_create_bool("bpf_tc_accept", 0600, ns->ddir,
+ &ns->bpf_tc_accept);
+ debugfs_create_bool("bpf_tc_non_bound_accept", 0600, ns->ddir,
+ &ns->bpf_tc_non_bound_accept);
+ ns->bpf_xdpdrv_accept = true;
+ debugfs_create_bool("bpf_xdpdrv_accept", 0600, ns->ddir,
+ &ns->bpf_xdpdrv_accept);
+ ns->bpf_xdpoffload_accept = true;
+ debugfs_create_bool("bpf_xdpoffload_accept", 0600, ns->ddir,
+ &ns->bpf_xdpoffload_accept);
+
+ return 0;
+}
+
+void nsim_bpf_uninit(struct netdevsim *ns)
+{
+ WARN_ON(!list_empty(&ns->bpf_bound_progs));
+ WARN_ON(ns->xdp_prog);
+ WARN_ON(ns->bpf_offloaded);
+}
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
new file mode 100644
index 000000000000..eb8c679fca9f
--- /dev/null
+++ b/drivers/net/netdevsim/netdev.c
@@ -0,0 +1,502 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree.
+ *
+ * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS"
+ * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE
+ * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
+ * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/etherdevice.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/slab.h>
+#include <net/netlink.h>
+#include <net/pkt_cls.h>
+#include <net/rtnetlink.h>
+
+#include "netdevsim.h"
+
+struct nsim_vf_config {
+ int link_state;
+ u16 min_tx_rate;
+ u16 max_tx_rate;
+ u16 vlan;
+ __be16 vlan_proto;
+ u16 qos;
+ u8 vf_mac[ETH_ALEN];
+ bool spoofchk_enabled;
+ bool trusted;
+ bool rss_query_enabled;
+};
+
+static u32 nsim_dev_id;
+
+static int nsim_num_vf(struct device *dev)
+{
+ struct netdevsim *ns = to_nsim(dev);
+
+ return ns->num_vfs;
+}
+
+static struct bus_type nsim_bus = {
+ .name = DRV_NAME,
+ .dev_name = DRV_NAME,
+ .num_vf = nsim_num_vf,
+};
+
+static int nsim_vfs_enable(struct netdevsim *ns, unsigned int num_vfs)
+{
+ ns->vfconfigs = kcalloc(num_vfs, sizeof(struct nsim_vf_config),
+ GFP_KERNEL);
+ if (!ns->vfconfigs)
+ return -ENOMEM;
+ ns->num_vfs = num_vfs;
+
+ return 0;
+}
+
+static void nsim_vfs_disable(struct netdevsim *ns)
+{
+ kfree(ns->vfconfigs);
+ ns->vfconfigs = NULL;
+ ns->num_vfs = 0;
+}
+
+static ssize_t
+nsim_numvfs_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct netdevsim *ns = to_nsim(dev);
+ unsigned int num_vfs;
+ int ret;
+
+ ret = kstrtouint(buf, 0, &num_vfs);
+ if (ret)
+ return ret;
+
+ rtnl_lock();
+ if (ns->num_vfs == num_vfs)
+ goto exit_good;
+ if (ns->num_vfs && num_vfs) {
+ ret = -EBUSY;
+ goto exit_unlock;
+ }
+
+ if (num_vfs) {
+ ret = nsim_vfs_enable(ns, num_vfs);
+ if (ret)
+ goto exit_unlock;
+ } else {
+ nsim_vfs_disable(ns);
+ }
+exit_good:
+ ret = count;
+exit_unlock:
+ rtnl_unlock();
+
+ return ret;
+}
+
+static ssize_t
+nsim_numvfs_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct netdevsim *ns = to_nsim(dev);
+
+ return sprintf(buf, "%u\n", ns->num_vfs);
+}
+
+static struct device_attribute nsim_numvfs_attr =
+ __ATTR(sriov_numvfs, 0664, nsim_numvfs_show, nsim_numvfs_store);
+
+static struct attribute *nsim_dev_attrs[] = {
+ &nsim_numvfs_attr.attr,
+ NULL,
+};
+
+static const struct attribute_group nsim_dev_attr_group = {
+ .attrs = nsim_dev_attrs,
+};
+
+static const struct attribute_group *nsim_dev_attr_groups[] = {
+ &nsim_dev_attr_group,
+ NULL,
+};
+
+static void nsim_dev_release(struct device *dev)
+{
+ struct netdevsim *ns = to_nsim(dev);
+
+ nsim_vfs_disable(ns);
+ free_netdev(ns->netdev);
+}
+
+struct device_type nsim_dev_type = {
+ .groups = nsim_dev_attr_groups,
+ .release = nsim_dev_release,
+};
+
+static int nsim_init(struct net_device *dev)
+{
+ struct netdevsim *ns = netdev_priv(dev);
+ int err;
+
+ ns->netdev = dev;
+ ns->ddir = debugfs_create_dir(netdev_name(dev), nsim_ddir);
+
+ err = nsim_bpf_init(ns);
+ if (err)
+ goto err_debugfs_destroy;
+
+ ns->dev.id = nsim_dev_id++;
+ ns->dev.bus = &nsim_bus;
+ ns->dev.type = &nsim_dev_type;
+ err = device_register(&ns->dev);
+ if (err)
+ goto err_bpf_uninit;
+
+ SET_NETDEV_DEV(dev, &ns->dev);
+
+ return 0;
+
+err_bpf_uninit:
+ nsim_bpf_uninit(ns);
+err_debugfs_destroy:
+ debugfs_remove_recursive(ns->ddir);
+ return err;
+}
+
+static void nsim_uninit(struct net_device *dev)
+{
+ struct netdevsim *ns = netdev_priv(dev);
+
+ debugfs_remove_recursive(ns->ddir);
+ nsim_bpf_uninit(ns);
+}
+
+static void nsim_free(struct net_device *dev)
+{
+ struct netdevsim *ns = netdev_priv(dev);
+
+ device_unregister(&ns->dev);
+ /* netdev and vf state will be freed out of device_release() */
+}
+
+static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct netdevsim *ns = netdev_priv(dev);
+
+ u64_stats_update_begin(&ns->syncp);
+ ns->tx_packets++;
+ ns->tx_bytes += skb->len;
+ u64_stats_update_end(&ns->syncp);
+
+ dev_kfree_skb(skb);
+
+ return NETDEV_TX_OK;
+}
+
+static void nsim_set_rx_mode(struct net_device *dev)
+{
+}
+
+static int nsim_change_mtu(struct net_device *dev, int new_mtu)
+{
+ struct netdevsim *ns = netdev_priv(dev);
+
+ if (ns->xdp_prog_mode == XDP_ATTACHED_DRV &&
+ new_mtu > NSIM_XDP_MAX_MTU)
+ return -EBUSY;
+
+ dev->mtu = new_mtu;
+
+ return 0;
+}
+
+static void
+nsim_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
+{
+ struct netdevsim *ns = netdev_priv(dev);
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin(&ns->syncp);
+ stats->tx_bytes = ns->tx_bytes;
+ stats->tx_packets = ns->tx_packets;
+ } while (u64_stats_fetch_retry(&ns->syncp, start));
+}
+
+static int
+nsim_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
+{
+ return nsim_bpf_setup_tc_block_cb(type, type_data, cb_priv);
+}
+
+static int
+nsim_setup_tc_block(struct net_device *dev, struct tc_block_offload *f)
+{
+ struct netdevsim *ns = netdev_priv(dev);
+
+ if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+ return -EOPNOTSUPP;
+
+ switch (f->command) {
+ case TC_BLOCK_BIND:
+ return tcf_block_cb_register(f->block, nsim_setup_tc_block_cb,
+ ns, ns);
+ case TC_BLOCK_UNBIND:
+ tcf_block_cb_unregister(f->block, nsim_setup_tc_block_cb, ns);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int nsim_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
+{
+ struct netdevsim *ns = netdev_priv(dev);
+
+ /* Only refuse multicast addresses, zero address can mean unset/any. */
+ if (vf >= ns->num_vfs || is_multicast_ether_addr(mac))
+ return -EINVAL;
+ memcpy(ns->vfconfigs[vf].vf_mac, mac, ETH_ALEN);
+
+ return 0;
+}
+
+static int nsim_set_vf_vlan(struct net_device *dev, int vf,
+ u16 vlan, u8 qos, __be16 vlan_proto)
+{
+ struct netdevsim *ns = netdev_priv(dev);
+
+ if (vf >= ns->num_vfs || vlan > 4095 || qos > 7)
+ return -EINVAL;
+
+ ns->vfconfigs[vf].vlan = vlan;
+ ns->vfconfigs[vf].qos = qos;
+ ns->vfconfigs[vf].vlan_proto = vlan_proto;
+
+ return 0;
+}
+
+static int nsim_set_vf_rate(struct net_device *dev, int vf, int min, int max)
+{
+ struct netdevsim *ns = netdev_priv(dev);
+
+ if (vf >= ns->num_vfs)
+ return -EINVAL;
+
+ ns->vfconfigs[vf].min_tx_rate = min;
+ ns->vfconfigs[vf].max_tx_rate = max;
+
+ return 0;
+}
+
+static int nsim_set_vf_spoofchk(struct net_device *dev, int vf, bool val)
+{
+ struct netdevsim *ns = netdev_priv(dev);
+
+ if (vf >= ns->num_vfs)
+ return -EINVAL;
+ ns->vfconfigs[vf].spoofchk_enabled = val;
+
+ return 0;
+}
+
+static int nsim_set_vf_rss_query_en(struct net_device *dev, int vf, bool val)
+{
+ struct netdevsim *ns = netdev_priv(dev);
+
+ if (vf >= ns->num_vfs)
+ return -EINVAL;
+ ns->vfconfigs[vf].rss_query_enabled = val;
+
+ return 0;
+}
+
+static int nsim_set_vf_trust(struct net_device *dev, int vf, bool val)
+{
+ struct netdevsim *ns = netdev_priv(dev);
+
+ if (vf >= ns->num_vfs)
+ return -EINVAL;
+ ns->vfconfigs[vf].trusted = val;
+
+ return 0;
+}
+
+static int
+nsim_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi)
+{
+ struct netdevsim *ns = netdev_priv(dev);
+
+ if (vf >= ns->num_vfs)
+ return -EINVAL;
+
+ ivi->vf = vf;
+ ivi->linkstate = ns->vfconfigs[vf].link_state;
+ ivi->min_tx_rate = ns->vfconfigs[vf].min_tx_rate;
+ ivi->max_tx_rate = ns->vfconfigs[vf].max_tx_rate;
+ ivi->vlan = ns->vfconfigs[vf].vlan;
+ ivi->vlan_proto = ns->vfconfigs[vf].vlan_proto;
+ ivi->qos = ns->vfconfigs[vf].qos;
+ memcpy(&ivi->mac, ns->vfconfigs[vf].vf_mac, ETH_ALEN);
+ ivi->spoofchk = ns->vfconfigs[vf].spoofchk_enabled;
+ ivi->trusted = ns->vfconfigs[vf].trusted;
+ ivi->rss_query_en = ns->vfconfigs[vf].rss_query_enabled;
+
+ return 0;
+}
+
+static int nsim_set_vf_link_state(struct net_device *dev, int vf, int state)
+{
+ struct netdevsim *ns = netdev_priv(dev);
+
+ if (vf >= ns->num_vfs)
+ return -EINVAL;
+
+ switch (state) {
+ case IFLA_VF_LINK_STATE_AUTO:
+ case IFLA_VF_LINK_STATE_ENABLE:
+ case IFLA_VF_LINK_STATE_DISABLE:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ns->vfconfigs[vf].link_state = state;
+
+ return 0;
+}
+
+static int
+nsim_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data)
+{
+ switch (type) {
+ case TC_SETUP_BLOCK:
+ return nsim_setup_tc_block(dev, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int
+nsim_set_features(struct net_device *dev, netdev_features_t features)
+{
+ struct netdevsim *ns = netdev_priv(dev);
+
+ if ((dev->features & NETIF_F_HW_TC) > (features & NETIF_F_HW_TC))
+ return nsim_bpf_disable_tc(ns);
+
+ return 0;
+}
+
+static const struct net_device_ops nsim_netdev_ops = {
+ .ndo_init = nsim_init,
+ .ndo_uninit = nsim_uninit,
+ .ndo_start_xmit = nsim_start_xmit,
+ .ndo_set_rx_mode = nsim_set_rx_mode,
+ .ndo_set_mac_address = eth_mac_addr,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_change_mtu = nsim_change_mtu,
+ .ndo_get_stats64 = nsim_get_stats64,
+ .ndo_set_vf_mac = nsim_set_vf_mac,
+ .ndo_set_vf_vlan = nsim_set_vf_vlan,
+ .ndo_set_vf_rate = nsim_set_vf_rate,
+ .ndo_set_vf_spoofchk = nsim_set_vf_spoofchk,
+ .ndo_set_vf_trust = nsim_set_vf_trust,
+ .ndo_get_vf_config = nsim_get_vf_config,
+ .ndo_set_vf_link_state = nsim_set_vf_link_state,
+ .ndo_set_vf_rss_query_en = nsim_set_vf_rss_query_en,
+ .ndo_setup_tc = nsim_setup_tc,
+ .ndo_set_features = nsim_set_features,
+ .ndo_bpf = nsim_bpf,
+};
+
+static void nsim_setup(struct net_device *dev)
+{
+ ether_setup(dev);
+ eth_hw_addr_random(dev);
+
+ dev->netdev_ops = &nsim_netdev_ops;
+ dev->priv_destructor = nsim_free;
+
+ dev->tx_queue_len = 0;
+ dev->flags |= IFF_NOARP;
+ dev->flags &= ~IFF_MULTICAST;
+ dev->priv_flags |= IFF_LIVE_ADDR_CHANGE |
+ IFF_NO_QUEUE;
+ dev->features |= NETIF_F_HIGHDMA |
+ NETIF_F_SG |
+ NETIF_F_FRAGLIST |
+ NETIF_F_HW_CSUM |
+ NETIF_F_TSO;
+ dev->hw_features |= NETIF_F_HW_TC;
+ dev->max_mtu = ETH_MAX_MTU;
+}
+
+static int nsim_validate(struct nlattr *tb[], struct nlattr *data[],
+ struct netlink_ext_ack *extack)
+{
+ if (tb[IFLA_ADDRESS]) {
+ if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
+ return -EINVAL;
+ if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
+ return -EADDRNOTAVAIL;
+ }
+ return 0;
+}
+
+static struct rtnl_link_ops nsim_link_ops __read_mostly = {
+ .kind = DRV_NAME,
+ .priv_size = sizeof(struct netdevsim),
+ .setup = nsim_setup,
+ .validate = nsim_validate,
+};
+
+struct dentry *nsim_ddir;
+
+static int __init nsim_module_init(void)
+{
+ int err;
+
+ nsim_ddir = debugfs_create_dir(DRV_NAME, NULL);
+ if (IS_ERR(nsim_ddir))
+ return PTR_ERR(nsim_ddir);
+
+ err = bus_register(&nsim_bus);
+ if (err)
+ goto err_debugfs_destroy;
+
+ err = rtnl_link_register(&nsim_link_ops);
+ if (err)
+ goto err_unreg_bus;
+
+ return 0;
+
+err_unreg_bus:
+ bus_unregister(&nsim_bus);
+err_debugfs_destroy:
+ debugfs_remove_recursive(nsim_ddir);
+ return err;
+}
+
+static void __exit nsim_module_exit(void)
+{
+ rtnl_link_unregister(&nsim_link_ops);
+ bus_unregister(&nsim_bus);
+ debugfs_remove_recursive(nsim_ddir);
+}
+
+module_init(nsim_module_init);
+module_exit(nsim_module_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK(DRV_NAME);
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
new file mode 100644
index 000000000000..32270de9395a
--- /dev/null
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree.
+ *
+ * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS"
+ * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE
+ * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
+ * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+ */
+
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/u64_stats_sync.h>
+
+#define DRV_NAME "netdevsim"
+
+#define NSIM_XDP_MAX_MTU 4000
+
+#define NSIM_EA(extack, msg) NL_SET_ERR_MSG_MOD((extack), msg)
+
+struct bpf_prog;
+struct dentry;
+struct nsim_vf_config;
+
+struct netdevsim {
+ struct net_device *netdev;
+
+ u64 tx_packets;
+ u64 tx_bytes;
+ struct u64_stats_sync syncp;
+
+ struct device dev;
+
+ struct dentry *ddir;
+
+ unsigned int num_vfs;
+ struct nsim_vf_config *vfconfigs;
+
+ struct bpf_prog *bpf_offloaded;
+ u32 bpf_offloaded_id;
+
+ u32 xdp_flags;
+ int xdp_prog_mode;
+ struct bpf_prog *xdp_prog;
+
+ u32 prog_id_gen;
+
+ bool bpf_bind_accept;
+ u32 bpf_bind_verifier_delay;
+ struct dentry *ddir_bpf_bound_progs;
+ struct list_head bpf_bound_progs;
+
+ bool bpf_tc_accept;
+ bool bpf_tc_non_bound_accept;
+ bool bpf_xdpdrv_accept;
+ bool bpf_xdpoffload_accept;
+};
+
+extern struct dentry *nsim_ddir;
+
+int nsim_bpf_init(struct netdevsim *ns);
+void nsim_bpf_uninit(struct netdevsim *ns);
+int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf);
+int nsim_bpf_disable_tc(struct netdevsim *ns);
+int nsim_bpf_setup_tc_block_cb(enum tc_setup_type type,
+ void *type_data, void *cb_priv);
+
+static inline struct netdevsim *to_nsim(struct device *ptr)
+{
+ return container_of(ptr, struct netdevsim, dev);
+}
diff --git a/drivers/net/phy/amd.c b/drivers/net/phy/amd.c
index 18141c022b13..6fe5dc9201d0 100644
--- a/drivers/net/phy/amd.c
+++ b/drivers/net/phy/amd.c
@@ -68,8 +68,6 @@ static struct phy_driver am79c_driver[] = { {
.features = PHY_BASIC_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.config_init = am79c_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = am79c_ack_interrupt,
.config_intr = am79c_config_intr,
} };
diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index e911e4990b20..1e190f3bca63 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -71,7 +71,6 @@ MODULE_LICENSE("GPL");
struct at803x_priv {
bool phy_reset:1;
- struct gpio_desc *gpiod_reset;
};
struct at803x_context {
@@ -250,22 +249,11 @@ static int at803x_probe(struct phy_device *phydev)
{
struct device *dev = &phydev->mdio.dev;
struct at803x_priv *priv;
- struct gpio_desc *gpiod_reset;
priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
- if (phydev->drv->phy_id != ATH8030_PHY_ID)
- goto does_not_require_reset_workaround;
-
- gpiod_reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW);
- if (IS_ERR(gpiod_reset))
- return PTR_ERR(gpiod_reset);
-
- priv->gpiod_reset = gpiod_reset;
-
-does_not_require_reset_workaround:
phydev->priv = priv;
return 0;
@@ -339,14 +327,14 @@ static void at803x_link_change_notify(struct phy_device *phydev)
* cannot recover from by software.
*/
if (phydev->state == PHY_NOLINK) {
- if (priv->gpiod_reset && !priv->phy_reset) {
+ if (phydev->mdio.reset && !priv->phy_reset) {
struct at803x_context context;
at803x_context_save(phydev, &context);
- gpiod_set_value(priv->gpiod_reset, 1);
+ phy_device_reset(phydev, 1);
msleep(1);
- gpiod_set_value(priv->gpiod_reset, 0);
+ phy_device_reset(phydev, 0);
msleep(1);
at803x_context_restore(phydev, &context);
@@ -404,8 +392,6 @@ static struct phy_driver at803x_driver[] = {
.resume = at803x_resume,
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = at803x_ack_interrupt,
.config_intr = at803x_config_intr,
}, {
@@ -422,8 +408,6 @@ static struct phy_driver at803x_driver[] = {
.resume = at803x_resume,
.features = PHY_BASIC_FEATURES,
.flags = PHY_HAS_INTERRUPT,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = at803x_ack_interrupt,
.config_intr = at803x_config_intr,
}, {
@@ -439,8 +423,6 @@ static struct phy_driver at803x_driver[] = {
.resume = at803x_resume,
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.aneg_done = at803x_aneg_done,
.ack_interrupt = &at803x_ack_interrupt,
.config_intr = &at803x_config_intr,
diff --git a/drivers/net/phy/bcm-cygnus.c b/drivers/net/phy/bcm-cygnus.c
index 3fe8cc5c177e..6838129839ca 100644
--- a/drivers/net/phy/bcm-cygnus.c
+++ b/drivers/net/phy/bcm-cygnus.c
@@ -136,8 +136,6 @@ static struct phy_driver bcm_cygnus_phy_driver[] = {
.name = "Broadcom Cygnus PHY",
.features = PHY_GBIT_FEATURES,
.config_init = bcm_cygnus_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = bcm_phy_ack_intr,
.config_intr = bcm_phy_config_intr,
.suspend = genphy_suspend,
diff --git a/drivers/net/phy/bcm63xx.c b/drivers/net/phy/bcm63xx.c
index b0492ef2cdaa..cf14613745c9 100644
--- a/drivers/net/phy/bcm63xx.c
+++ b/drivers/net/phy/bcm63xx.c
@@ -69,8 +69,6 @@ static struct phy_driver bcm63xx_driver[] = {
.features = (PHY_BASIC_FEATURES | SUPPORTED_Pause),
.flags = PHY_HAS_INTERRUPT | PHY_IS_INTERNAL,
.config_init = bcm63xx_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = bcm_phy_ack_intr,
.config_intr = bcm63xx_config_intr,
}, {
@@ -81,8 +79,6 @@ static struct phy_driver bcm63xx_driver[] = {
.features = (PHY_BASIC_FEATURES | SUPPORTED_Pause),
.flags = PHY_HAS_INTERRUPT | PHY_IS_INTERNAL,
.config_init = bcm63xx_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = bcm_phy_ack_intr,
.config_intr = bcm63xx_config_intr,
} };
diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c
index 8b33f688ac8a..421feb8f92fe 100644
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c
@@ -611,8 +611,6 @@ static int bcm7xxx_28nm_probe(struct phy_device *phydev)
.features = PHY_GBIT_FEATURES, \
.flags = PHY_IS_INTERNAL, \
.config_init = bcm7xxx_28nm_config_init, \
- .config_aneg = genphy_config_aneg, \
- .read_status = genphy_read_status, \
.resume = bcm7xxx_28nm_resume, \
.get_tunable = bcm7xxx_28nm_get_tunable, \
.set_tunable = bcm7xxx_28nm_set_tunable, \
@@ -630,8 +628,6 @@ static int bcm7xxx_28nm_probe(struct phy_device *phydev)
.features = PHY_BASIC_FEATURES, \
.flags = PHY_IS_INTERNAL, \
.config_init = bcm7xxx_28nm_ephy_config_init, \
- .config_aneg = genphy_config_aneg, \
- .read_status = genphy_read_status, \
.resume = bcm7xxx_28nm_ephy_resume, \
.get_sset_count = bcm_phy_get_sset_count, \
.get_strings = bcm_phy_get_strings, \
@@ -647,8 +643,6 @@ static int bcm7xxx_28nm_probe(struct phy_device *phydev)
.features = PHY_BASIC_FEATURES, \
.flags = PHY_IS_INTERNAL, \
.config_init = bcm7xxx_config_init, \
- .config_aneg = genphy_config_aneg, \
- .read_status = genphy_read_status, \
.suspend = bcm7xxx_suspend, \
.resume = bcm7xxx_config_init, \
}
diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index d7ed69deabfb..3bb6b66dc7bf 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -540,6 +540,37 @@ static int brcm_fet_config_intr(struct phy_device *phydev)
return err;
}
+struct bcm53xx_phy_priv {
+ u64 *stats;
+};
+
+static int bcm53xx_phy_probe(struct phy_device *phydev)
+{
+ struct bcm53xx_phy_priv *priv;
+
+ priv = devm_kzalloc(&phydev->mdio.dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ phydev->priv = priv;
+
+ priv->stats = devm_kcalloc(&phydev->mdio.dev,
+ bcm_phy_get_sset_count(phydev), sizeof(u64),
+ GFP_KERNEL);
+ if (!priv->stats)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void bcm53xx_phy_get_stats(struct phy_device *phydev,
+ struct ethtool_stats *stats, u64 *data)
+{
+ struct bcm53xx_phy_priv *priv = phydev->priv;
+
+ bcm_phy_get_stats(phydev, priv->stats, stats, data);
+}
+
static struct phy_driver broadcom_drivers[] = {
{
.phy_id = PHY_ID_BCM5411,
@@ -548,8 +579,6 @@ static struct phy_driver broadcom_drivers[] = {
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.config_init = bcm54xx_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = bcm_phy_ack_intr,
.config_intr = bcm_phy_config_intr,
}, {
@@ -559,8 +588,6 @@ static struct phy_driver broadcom_drivers[] = {
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.config_init = bcm54xx_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = bcm_phy_ack_intr,
.config_intr = bcm_phy_config_intr,
}, {
@@ -570,8 +597,6 @@ static struct phy_driver broadcom_drivers[] = {
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.config_init = bcm54xx_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = bcm_phy_ack_intr,
.config_intr = bcm_phy_config_intr,
}, {
@@ -581,8 +606,6 @@ static struct phy_driver broadcom_drivers[] = {
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.config_init = bcm54xx_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = bcm_phy_ack_intr,
.config_intr = bcm_phy_config_intr,
}, {
@@ -592,8 +615,6 @@ static struct phy_driver broadcom_drivers[] = {
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.config_init = bcm54xx_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = bcm_phy_ack_intr,
.config_intr = bcm_phy_config_intr,
}, {
@@ -603,8 +624,6 @@ static struct phy_driver broadcom_drivers[] = {
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.config_init = bcm54xx_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = bcm_phy_ack_intr,
.config_intr = bcm_phy_config_intr,
}, {
@@ -614,8 +633,6 @@ static struct phy_driver broadcom_drivers[] = {
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.config_init = bcm54xx_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = bcm_phy_ack_intr,
.config_intr = bcm_phy_config_intr,
}, {
@@ -626,7 +643,6 @@ static struct phy_driver broadcom_drivers[] = {
.flags = PHY_HAS_INTERRUPT,
.config_init = bcm54xx_config_init,
.config_aneg = bcm5481_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = bcm_phy_ack_intr,
.config_intr = bcm_phy_config_intr,
}, {
@@ -637,7 +653,6 @@ static struct phy_driver broadcom_drivers[] = {
.flags = PHY_HAS_INTERRUPT,
.config_init = bcm54xx_config_init,
.config_aneg = bcm5481_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = bcm_phy_ack_intr,
.config_intr = bcm_phy_config_intr,
}, {
@@ -647,7 +662,6 @@ static struct phy_driver broadcom_drivers[] = {
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.config_init = bcm5482_config_init,
- .config_aneg = genphy_config_aneg,
.read_status = bcm5482_read_status,
.ack_interrupt = bcm_phy_ack_intr,
.config_intr = bcm_phy_config_intr,
@@ -658,8 +672,6 @@ static struct phy_driver broadcom_drivers[] = {
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.config_init = bcm54xx_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = bcm_phy_ack_intr,
.config_intr = bcm_phy_config_intr,
}, {
@@ -669,8 +681,6 @@ static struct phy_driver broadcom_drivers[] = {
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.config_init = bcm54xx_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = bcm_phy_ack_intr,
.config_intr = bcm_phy_config_intr,
}, {
@@ -680,8 +690,6 @@ static struct phy_driver broadcom_drivers[] = {
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.config_init = bcm54xx_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = bcm_phy_ack_intr,
.config_intr = bcm_phy_config_intr,
}, {
@@ -691,8 +699,6 @@ static struct phy_driver broadcom_drivers[] = {
.features = PHY_BASIC_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.config_init = brcm_fet_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = brcm_fet_ack_interrupt,
.config_intr = brcm_fet_config_intr,
}, {
@@ -702,10 +708,18 @@ static struct phy_driver broadcom_drivers[] = {
.features = PHY_BASIC_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.config_init = brcm_fet_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = brcm_fet_ack_interrupt,
.config_intr = brcm_fet_config_intr,
+}, {
+ .phy_id = PHY_ID_BCM5395,
+ .phy_id_mask = 0xfffffff0,
+ .name = "Broadcom BCM5395",
+ .flags = PHY_IS_INTERNAL,
+ .features = PHY_GBIT_FEATURES,
+ .get_sset_count = bcm_phy_get_sset_count,
+ .get_strings = bcm_phy_get_strings,
+ .get_stats = bcm53xx_phy_get_stats,
+ .probe = bcm53xx_phy_probe,
} };
module_phy_driver(broadcom_drivers);
@@ -726,6 +740,7 @@ static struct mdio_device_id __maybe_unused broadcom_tbl[] = {
{ PHY_ID_BCM57780, 0xfffffff0 },
{ PHY_ID_BCMAC131, 0xfffffff0 },
{ PHY_ID_BCM5241, 0xfffffff0 },
+ { PHY_ID_BCM5395, 0xfffffff0 },
{ }
};
diff --git a/drivers/net/phy/cicada.c b/drivers/net/phy/cicada.c
index d339c1afea77..c05af00bf4b6 100644
--- a/drivers/net/phy/cicada.c
+++ b/drivers/net/phy/cicada.c
@@ -110,8 +110,6 @@ static struct phy_driver cis820x_driver[] = {
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.config_init = &cis820x_config_init,
- .config_aneg = &genphy_config_aneg,
- .read_status = &genphy_read_status,
.ack_interrupt = &cis820x_ack_interrupt,
.config_intr = &cis820x_config_intr,
}, {
@@ -121,8 +119,6 @@ static struct phy_driver cis820x_driver[] = {
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.config_init = &cis820x_config_init,
- .config_aneg = &genphy_config_aneg,
- .read_status = &genphy_read_status,
.ack_interrupt = &cis820x_ack_interrupt,
.config_intr = &cis820x_config_intr,
} };
diff --git a/drivers/net/phy/davicom.c b/drivers/net/phy/davicom.c
index e28913d9ea7e..5ee99b3b428c 100644
--- a/drivers/net/phy/davicom.c
+++ b/drivers/net/phy/davicom.c
@@ -153,7 +153,6 @@ static struct phy_driver dm91xx_driver[] = {
.flags = PHY_HAS_INTERRUPT,
.config_init = dm9161_config_init,
.config_aneg = dm9161_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = dm9161_ack_interrupt,
.config_intr = dm9161_config_intr,
}, {
@@ -164,7 +163,6 @@ static struct phy_driver dm91xx_driver[] = {
.flags = PHY_HAS_INTERRUPT,
.config_init = dm9161_config_init,
.config_aneg = dm9161_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = dm9161_ack_interrupt,
.config_intr = dm9161_config_intr,
}, {
@@ -175,7 +173,6 @@ static struct phy_driver dm91xx_driver[] = {
.flags = PHY_HAS_INTERRUPT,
.config_init = dm9161_config_init,
.config_aneg = dm9161_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = dm9161_ack_interrupt,
.config_intr = dm9161_config_intr,
}, {
@@ -184,8 +181,6 @@ static struct phy_driver dm91xx_driver[] = {
.phy_id_mask = 0x0ffffff0,
.features = PHY_BASIC_FEATURES,
.flags = PHY_HAS_INTERRUPT,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = dm9161_ack_interrupt,
.config_intr = dm9161_config_intr,
} };
diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index cbd629822f04..654f42d00092 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -1502,8 +1502,6 @@ static struct phy_driver dp83640_driver = {
.probe = dp83640_probe,
.remove = dp83640_remove,
.config_init = dp83640_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = dp83640_ack_interrupt,
.config_intr = dp83640_config_intr,
.ts_info = dp83640_ts_info,
diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c
index 14335d14e9e4..6e8a2a4f3a6e 100644
--- a/drivers/net/phy/dp83822.c
+++ b/drivers/net/phy/dp83822.c
@@ -325,8 +325,6 @@ static struct phy_driver dp83822_driver[] = {
.set_wol = dp83822_set_wol,
.ack_interrupt = dp83822_ack_interrupt,
.config_intr = dp83822_config_intr,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.suspend = dp83822_suspend,
.resume = dp83822_resume,
},
diff --git a/drivers/net/phy/dp83848.c b/drivers/net/phy/dp83848.c
index 3966d43c5146..cd09c3af2117 100644
--- a/drivers/net/phy/dp83848.c
+++ b/drivers/net/phy/dp83848.c
@@ -95,8 +95,6 @@ MODULE_DEVICE_TABLE(mdio, dp83848_tbl);
.config_init = genphy_config_init, \
.suspend = genphy_suspend, \
.resume = genphy_resume, \
- .config_aneg = genphy_config_aneg, \
- .read_status = genphy_read_status, \
\
/* IRQ related */ \
.ack_interrupt = dp83848_ack_interrupt, \
diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index c1ab976cc800..ab58224f897f 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -324,8 +324,6 @@ static struct phy_driver dp83867_driver[] = {
.ack_interrupt = dp83867_ack_interrupt,
.config_intr = dp83867_config_intr,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.suspend = genphy_suspend,
.resume = genphy_resume,
},
diff --git a/drivers/net/phy/icplus.c b/drivers/net/phy/icplus.c
index 567280a72241..791587a49215 100644
--- a/drivers/net/phy/icplus.c
+++ b/drivers/net/phy/icplus.c
@@ -227,8 +227,6 @@ static struct phy_driver icplus_driver[] = {
.phy_id_mask = 0x0ffffff0,
.features = PHY_GBIT_FEATURES,
.config_init = &ip1001_config_init,
- .config_aneg = &genphy_config_aneg,
- .read_status = &genphy_read_status,
.suspend = genphy_suspend,
.resume = genphy_resume,
}, {
@@ -239,8 +237,6 @@ static struct phy_driver icplus_driver[] = {
.flags = PHY_HAS_INTERRUPT,
.ack_interrupt = ip101a_g_ack_interrupt,
.config_init = &ip101a_g_config_init,
- .config_aneg = &genphy_config_aneg,
- .read_status = &genphy_read_status,
.suspend = genphy_suspend,
.resume = genphy_resume,
} };
diff --git a/drivers/net/phy/intel-xway.c b/drivers/net/phy/intel-xway.c
index 55f8c52dd2f1..a11f80cb5388 100644
--- a/drivers/net/phy/intel-xway.c
+++ b/drivers/net/phy/intel-xway.c
@@ -243,7 +243,6 @@ static struct phy_driver xway_gphy[] = {
.flags = PHY_HAS_INTERRUPT,
.config_init = xway_gphy_config_init,
.config_aneg = xway_gphy14_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = xway_gphy_ack_interrupt,
.did_interrupt = xway_gphy_did_interrupt,
.config_intr = xway_gphy_config_intr,
@@ -257,7 +256,6 @@ static struct phy_driver xway_gphy[] = {
.flags = PHY_HAS_INTERRUPT,
.config_init = xway_gphy_config_init,
.config_aneg = xway_gphy14_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = xway_gphy_ack_interrupt,
.did_interrupt = xway_gphy_did_interrupt,
.config_intr = xway_gphy_config_intr,
@@ -271,7 +269,6 @@ static struct phy_driver xway_gphy[] = {
.flags = PHY_HAS_INTERRUPT,
.config_init = xway_gphy_config_init,
.config_aneg = xway_gphy14_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = xway_gphy_ack_interrupt,
.did_interrupt = xway_gphy_did_interrupt,
.config_intr = xway_gphy_config_intr,
@@ -285,7 +282,6 @@ static struct phy_driver xway_gphy[] = {
.flags = PHY_HAS_INTERRUPT,
.config_init = xway_gphy_config_init,
.config_aneg = xway_gphy14_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = xway_gphy_ack_interrupt,
.did_interrupt = xway_gphy_did_interrupt,
.config_intr = xway_gphy_config_intr,
@@ -298,8 +294,6 @@ static struct phy_driver xway_gphy[] = {
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.config_init = xway_gphy_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = xway_gphy_ack_interrupt,
.did_interrupt = xway_gphy_did_interrupt,
.config_intr = xway_gphy_config_intr,
@@ -312,8 +306,6 @@ static struct phy_driver xway_gphy[] = {
.features = PHY_BASIC_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.config_init = xway_gphy_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = xway_gphy_ack_interrupt,
.did_interrupt = xway_gphy_did_interrupt,
.config_intr = xway_gphy_config_intr,
@@ -326,8 +318,6 @@ static struct phy_driver xway_gphy[] = {
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.config_init = xway_gphy_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = xway_gphy_ack_interrupt,
.did_interrupt = xway_gphy_did_interrupt,
.config_intr = xway_gphy_config_intr,
@@ -340,8 +330,6 @@ static struct phy_driver xway_gphy[] = {
.features = PHY_BASIC_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.config_init = xway_gphy_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = xway_gphy_ack_interrupt,
.did_interrupt = xway_gphy_did_interrupt,
.config_intr = xway_gphy_config_intr,
diff --git a/drivers/net/phy/lxt.c b/drivers/net/phy/lxt.c
index 09d215177fff..c14b254b2879 100644
--- a/drivers/net/phy/lxt.c
+++ b/drivers/net/phy/lxt.c
@@ -259,8 +259,6 @@ static struct phy_driver lxt97x_driver[] = {
.features = PHY_BASIC_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.config_init = lxt970_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = lxt970_ack_interrupt,
.config_intr = lxt970_config_intr,
}, {
@@ -269,8 +267,6 @@ static struct phy_driver lxt97x_driver[] = {
.phy_id_mask = 0xfffffff0,
.features = PHY_BASIC_FEATURES,
.flags = PHY_HAS_INTERRUPT,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = lxt971_ack_interrupt,
.config_intr = lxt971_config_intr,
}, {
@@ -290,7 +286,6 @@ static struct phy_driver lxt97x_driver[] = {
.flags = 0,
.probe = lxt973_probe,
.config_aneg = lxt973_config_aneg,
- .read_status = genphy_read_status,
} };
module_phy_driver(lxt97x_driver);
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index b5a8f750e433..2fc026dc170a 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -1962,7 +1962,6 @@ static struct phy_driver marvell_drivers[] = {
.probe = marvell_probe,
.config_init = &marvell_config_init,
.config_aneg = &m88e1101_config_aneg,
- .read_status = &genphy_read_status,
.ack_interrupt = &marvell_ack_interrupt,
.config_intr = &marvell_config_intr,
.resume = &genphy_resume,
@@ -1980,7 +1979,6 @@ static struct phy_driver marvell_drivers[] = {
.probe = marvell_probe,
.config_init = &m88e1111_config_init,
.config_aneg = &marvell_config_aneg,
- .read_status = &genphy_read_status,
.ack_interrupt = &marvell_ack_interrupt,
.config_intr = &marvell_config_intr,
.resume = &genphy_resume,
@@ -2016,7 +2014,6 @@ static struct phy_driver marvell_drivers[] = {
.probe = marvell_probe,
.config_init = &m88e1118_config_init,
.config_aneg = &m88e1118_config_aneg,
- .read_status = &genphy_read_status,
.ack_interrupt = &marvell_ack_interrupt,
.config_intr = &marvell_config_intr,
.resume = &genphy_resume,
@@ -2074,7 +2071,6 @@ static struct phy_driver marvell_drivers[] = {
.probe = marvell_probe,
.config_init = &m88e1145_config_init,
.config_aneg = &marvell_config_aneg,
- .read_status = &genphy_read_status,
.ack_interrupt = &marvell_ack_interrupt,
.config_intr = &marvell_config_intr,
.resume = &genphy_resume,
@@ -2092,7 +2088,6 @@ static struct phy_driver marvell_drivers[] = {
.probe = marvell_probe,
.config_init = &m88e1149_config_init,
.config_aneg = &m88e1118_config_aneg,
- .read_status = &genphy_read_status,
.ack_interrupt = &marvell_ack_interrupt,
.config_intr = &marvell_config_intr,
.resume = &genphy_resume,
@@ -2110,7 +2105,6 @@ static struct phy_driver marvell_drivers[] = {
.probe = marvell_probe,
.config_init = &m88e1111_config_init,
.config_aneg = &marvell_config_aneg,
- .read_status = &genphy_read_status,
.ack_interrupt = &marvell_ack_interrupt,
.config_intr = &marvell_config_intr,
.resume = &genphy_resume,
@@ -2127,8 +2121,6 @@ static struct phy_driver marvell_drivers[] = {
.flags = PHY_HAS_INTERRUPT,
.probe = marvell_probe,
.config_init = &m88e1116r_config_init,
- .config_aneg = &genphy_config_aneg,
- .read_status = &genphy_read_status,
.ack_interrupt = &marvell_ack_interrupt,
.config_intr = &marvell_config_intr,
.resume = &genphy_resume,
@@ -2204,7 +2196,6 @@ static struct phy_driver marvell_drivers[] = {
.features = PHY_BASIC_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.probe = marvell_probe,
- .config_aneg = &genphy_config_aneg,
.config_init = &m88e3016_config_init,
.aneg_done = &marvell_aneg_done,
.read_status = &marvell_read_status,
diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index 21b3f36e023a..f0cfba4e758b 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -84,7 +84,6 @@ static int mv3310_config_init(struct phy_device *phydev)
/* Check that the PHY interface type is compatible */
if (phydev->interface != PHY_INTERFACE_MODE_SGMII &&
- phydev->interface != PHY_INTERFACE_MODE_XGMII &&
phydev->interface != PHY_INTERFACE_MODE_XAUI &&
phydev->interface != PHY_INTERFACE_MODE_RXAUI &&
phydev->interface != PHY_INTERFACE_MODE_10GKR)
diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 54d00a1d2bef..a0f34c385cad 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -38,6 +38,7 @@
#include <linux/phy.h>
#include <linux/io.h>
#include <linux/uaccess.h>
+#include <linux/gpio/consumer.h>
#include <asm/irq.h>
@@ -48,9 +49,26 @@
int mdiobus_register_device(struct mdio_device *mdiodev)
{
+ struct gpio_desc *gpiod = NULL;
+
if (mdiodev->bus->mdio_map[mdiodev->addr])
return -EBUSY;
+ /* Deassert the optional reset signal */
+ if (mdiodev->dev.of_node)
+ gpiod = fwnode_get_named_gpiod(&mdiodev->dev.of_node->fwnode,
+ "reset-gpios", 0, GPIOD_OUT_LOW,
+ "PHY reset");
+ if (PTR_ERR(gpiod) == -ENOENT)
+ gpiod = NULL;
+ else if (IS_ERR(gpiod))
+ return PTR_ERR(gpiod);
+
+ mdiodev->reset = gpiod;
+
+ /* Assert the reset signal again */
+ mdio_device_reset(mdiodev, 1);
+
mdiodev->bus->mdio_map[mdiodev->addr] = mdiodev;
return 0;
@@ -421,6 +439,9 @@ void mdiobus_unregister(struct mii_bus *bus)
if (!mdiodev)
continue;
+ if (mdiodev->reset)
+ gpiod_put(mdiodev->reset);
+
mdiodev->device_remove(mdiodev);
mdiodev->device_free(mdiodev);
}
diff --git a/drivers/net/phy/mdio_device.c b/drivers/net/phy/mdio_device.c
index e24f28924af8..843c1dde93e4 100644
--- a/drivers/net/phy/mdio_device.c
+++ b/drivers/net/phy/mdio_device.c
@@ -12,6 +12,8 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/errno.h>
+#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
@@ -22,6 +24,7 @@
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/unistd.h>
+#include <linux/delay.h>
void mdio_device_free(struct mdio_device *mdiodev)
{
@@ -114,6 +117,21 @@ void mdio_device_remove(struct mdio_device *mdiodev)
}
EXPORT_SYMBOL(mdio_device_remove);
+void mdio_device_reset(struct mdio_device *mdiodev, int value)
+{
+ unsigned int d;
+
+ if (!mdiodev->reset)
+ return;
+
+ gpiod_set_value(mdiodev->reset, value);
+
+ d = value ? mdiodev->reset_delay : mdiodev->reset_post_delay;
+ if (d)
+ usleep_range(d, d + max_t(unsigned int, d / 10, 100));
+}
+EXPORT_SYMBOL(mdio_device_reset);
+
/**
* mdio_probe - probe an MDIO device
* @dev: device to probe
@@ -128,8 +146,16 @@ static int mdio_probe(struct device *dev)
struct mdio_driver *mdiodrv = to_mdio_driver(drv);
int err = 0;
- if (mdiodrv->probe)
+ if (mdiodrv->probe) {
+ /* Deassert the reset signal */
+ mdio_device_reset(mdiodev, 0);
+
err = mdiodrv->probe(mdiodev);
+ if (err) {
+ /* Assert the reset signal */
+ mdio_device_reset(mdiodev, 1);
+ }
+ }
return err;
}
@@ -140,9 +166,13 @@ static int mdio_remove(struct device *dev)
struct device_driver *drv = mdiodev->dev.driver;
struct mdio_driver *mdiodrv = to_mdio_driver(drv);
- if (mdiodrv->remove)
+ if (mdiodrv->remove) {
mdiodrv->remove(mdiodev);
+ /* Assert the reset signal */
+ mdio_device_reset(mdiodev, 1);
+ }
+
return 0;
}
diff --git a/drivers/net/phy/meson-gxl.c b/drivers/net/phy/meson-gxl.c
index 842eb871a6e3..4ee630afe43a 100644
--- a/drivers/net/phy/meson-gxl.c
+++ b/drivers/net/phy/meson-gxl.c
@@ -130,7 +130,6 @@ static struct phy_driver meson_gxl_phy[] = {
.features = PHY_BASIC_FEATURES,
.flags = PHY_IS_INTERNAL,
.config_init = meson_gxl_config_init,
- .config_aneg = genphy_config_aneg,
.aneg_done = genphy_aneg_done,
.read_status = meson_gxl_read_status,
.suspend = genphy_suspend,
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index ab4614113403..fd500b18e77f 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -801,8 +801,6 @@ static struct phy_driver ksphy_driver[] = {
.flags = PHY_HAS_INTERRUPT,
.driver_data = &ks8737_type,
.config_init = kszphy_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = kszphy_ack_interrupt,
.config_intr = kszphy_config_intr,
.suspend = genphy_suspend,
@@ -816,8 +814,6 @@ static struct phy_driver ksphy_driver[] = {
.driver_data = &ksz8021_type,
.probe = kszphy_probe,
.config_init = kszphy_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = kszphy_ack_interrupt,
.config_intr = kszphy_config_intr,
.get_sset_count = kszphy_get_sset_count,
@@ -834,8 +830,6 @@ static struct phy_driver ksphy_driver[] = {
.driver_data = &ksz8021_type,
.probe = kszphy_probe,
.config_init = kszphy_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = kszphy_ack_interrupt,
.config_intr = kszphy_config_intr,
.get_sset_count = kszphy_get_sset_count,
@@ -853,7 +847,6 @@ static struct phy_driver ksphy_driver[] = {
.probe = kszphy_probe,
.config_init = ksz8041_config_init,
.config_aneg = ksz8041_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = kszphy_ack_interrupt,
.config_intr = kszphy_config_intr,
.get_sset_count = kszphy_get_sset_count,
@@ -870,8 +863,6 @@ static struct phy_driver ksphy_driver[] = {
.driver_data = &ksz8041_type,
.probe = kszphy_probe,
.config_init = kszphy_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = kszphy_ack_interrupt,
.config_intr = kszphy_config_intr,
.get_sset_count = kszphy_get_sset_count,
@@ -888,8 +879,6 @@ static struct phy_driver ksphy_driver[] = {
.driver_data = &ksz8051_type,
.probe = kszphy_probe,
.config_init = kszphy_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = kszphy_ack_interrupt,
.config_intr = kszphy_config_intr,
.get_sset_count = kszphy_get_sset_count,
@@ -906,8 +895,6 @@ static struct phy_driver ksphy_driver[] = {
.driver_data = &ksz8041_type,
.probe = kszphy_probe,
.config_init = kszphy_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = kszphy_ack_interrupt,
.config_intr = kszphy_config_intr,
.get_sset_count = kszphy_get_sset_count,
@@ -924,8 +911,6 @@ static struct phy_driver ksphy_driver[] = {
.driver_data = &ksz8081_type,
.probe = kszphy_probe,
.config_init = kszphy_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = kszphy_ack_interrupt,
.config_intr = kszphy_config_intr,
.get_sset_count = kszphy_get_sset_count,
@@ -940,8 +925,6 @@ static struct phy_driver ksphy_driver[] = {
.features = PHY_BASIC_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.config_init = kszphy_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = kszphy_ack_interrupt,
.config_intr = kszphy_config_intr,
.suspend = genphy_suspend,
@@ -955,8 +938,6 @@ static struct phy_driver ksphy_driver[] = {
.driver_data = &ksz9021_type,
.probe = kszphy_probe,
.config_init = ksz9021_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = kszphy_ack_interrupt,
.config_intr = kszphy_config_intr,
.get_sset_count = kszphy_get_sset_count,
@@ -975,7 +956,6 @@ static struct phy_driver ksphy_driver[] = {
.driver_data = &ksz9021_type,
.probe = kszphy_probe,
.config_init = ksz9031_config_init,
- .config_aneg = genphy_config_aneg,
.read_status = ksz9031_read_status,
.ack_interrupt = kszphy_ack_interrupt,
.config_intr = kszphy_config_intr,
@@ -1000,8 +980,6 @@ static struct phy_driver ksphy_driver[] = {
.features = PHY_BASIC_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.config_init = kszphy_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.suspend = genphy_suspend,
.resume = genphy_resume,
}, {
@@ -1021,8 +999,6 @@ static struct phy_driver ksphy_driver[] = {
.name = "Microchip KSZ9477",
.features = PHY_GBIT_FEATURES,
.config_init = kszphy_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.suspend = genphy_suspend,
.resume = genphy_resume,
} };
diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c
index 37ee856c7680..0f293ef28935 100644
--- a/drivers/net/phy/microchip.c
+++ b/drivers/net/phy/microchip.c
@@ -153,7 +153,6 @@ static struct phy_driver microchip_phy_driver[] = {
.config_init = genphy_config_init,
.config_aneg = lan88xx_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = lan88xx_phy_ack_interrupt,
.config_intr = lan88xx_phy_config_intr,
diff --git a/drivers/net/phy/national.c b/drivers/net/phy/national.c
index 2addf1d3f619..2b1e336961f9 100644
--- a/drivers/net/phy/national.c
+++ b/drivers/net/phy/national.c
@@ -136,8 +136,6 @@ static struct phy_driver dp83865_driver[] = { {
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.config_init = ns_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = ns_ack_interrupt,
.config_intr = ns_config_intr,
} };
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index ed10d1fc8f59..0c165ad1d788 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -493,7 +493,10 @@ static int phy_start_aneg_priv(struct phy_device *phydev, bool sync)
/* Invalidate LP advertising flags */
phydev->lp_advertising = 0;
- err = phydev->drv->config_aneg(phydev);
+ if (phydev->drv->config_aneg)
+ err = phydev->drv->config_aneg(phydev);
+ else
+ err = genphy_config_aneg(phydev);
if (err < 0)
goto out_unlock;
@@ -629,9 +632,6 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat)
if (PHY_HALTED == phydev->state)
return IRQ_NONE; /* It can't be ours. */
- disable_irq_nosync(irq);
- atomic_inc(&phydev->irq_disable);
-
phy_change(phydev);
return IRQ_HANDLED;
@@ -689,7 +689,6 @@ phy_err:
*/
int phy_start_interrupts(struct phy_device *phydev)
{
- atomic_set(&phydev->irq_disable, 0);
if (request_threaded_irq(phydev->irq, NULL, phy_interrupt,
IRQF_ONESHOT | IRQF_SHARED,
phydev_name(phydev), phydev) < 0) {
@@ -716,13 +715,6 @@ int phy_stop_interrupts(struct phy_device *phydev)
free_irq(phydev->irq, phydev);
- /* If work indeed has been cancelled, disable_irq() will have
- * been left unbalanced from phy_interrupt() and enable_irq()
- * has to be called so that other devices on the line work.
- */
- while (atomic_dec_return(&phydev->irq_disable) >= 0)
- enable_irq(phydev->irq);
-
return err;
}
EXPORT_SYMBOL(phy_stop_interrupts);
@@ -736,10 +728,11 @@ void phy_change(struct phy_device *phydev)
if (phy_interrupt_is_valid(phydev)) {
if (phydev->drv->did_interrupt &&
!phydev->drv->did_interrupt(phydev))
- goto ignore;
+ return;
- if (phy_disable_interrupts(phydev))
- goto phy_err;
+ if (phydev->state == PHY_HALTED)
+ if (phy_disable_interrupts(phydev))
+ goto phy_err;
}
mutex_lock(&phydev->lock);
@@ -747,28 +740,13 @@ void phy_change(struct phy_device *phydev)
phydev->state = PHY_CHANGELINK;
mutex_unlock(&phydev->lock);
- if (phy_interrupt_is_valid(phydev)) {
- atomic_dec(&phydev->irq_disable);
- enable_irq(phydev->irq);
-
- /* Reenable interrupts */
- if (PHY_HALTED != phydev->state &&
- phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED))
- goto irq_enable_err;
- }
-
/* reschedule state queue work to run as soon as possible */
phy_trigger_machine(phydev, true);
- return;
-ignore:
- atomic_dec(&phydev->irq_disable);
- enable_irq(phydev->irq);
+ if (phy_interrupt_is_valid(phydev) && phy_clear_interrupt(phydev))
+ goto phy_err;
return;
-irq_enable_err:
- disable_irq(phydev->irq);
- atomic_inc(&phydev->irq_disable);
phy_err:
phy_error(phydev);
}
@@ -1003,10 +981,6 @@ void phy_state_machine(struct work_struct *work)
phydev->state = PHY_NOLINK;
phy_link_down(phydev, true);
}
-
- if (phy_interrupt_is_valid(phydev))
- err = phy_config_interrupt(phydev,
- PHY_INTERRUPT_ENABLED);
break;
case PHY_HALTED:
if (phydev->link) {
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index b15b31ca2618..be13b5d6a8bf 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -634,6 +634,9 @@ int phy_device_register(struct phy_device *phydev)
if (err)
return err;
+ /* Deassert the reset signal */
+ phy_device_reset(phydev, 0);
+
/* Run all of the fixups for this PHY */
err = phy_scan_fixups(phydev);
if (err) {
@@ -652,6 +655,9 @@ int phy_device_register(struct phy_device *phydev)
return 0;
out:
+ /* Assert the reset signal */
+ phy_device_reset(phydev, 1);
+
mdiobus_unregister_device(&phydev->mdio);
return err;
}
@@ -668,6 +674,10 @@ EXPORT_SYMBOL(phy_device_register);
void phy_device_remove(struct phy_device *phydev)
{
device_del(&phydev->mdio.dev);
+
+ /* Assert the reset signal */
+ phy_device_reset(phydev, 1);
+
mdiobus_unregister_device(&phydev->mdio);
}
EXPORT_SYMBOL(phy_device_remove);
@@ -851,6 +861,9 @@ int phy_init_hw(struct phy_device *phydev)
{
int ret = 0;
+ /* Deassert the reset signal */
+ phy_device_reset(phydev, 0);
+
if (!phydev->drv || !phydev->drv->config_init)
return 0;
@@ -1130,6 +1143,9 @@ void phy_detach(struct phy_device *phydev)
put_device(&phydev->mdio.dev);
if (ndev_owner != bus->owner)
module_put(bus->owner);
+
+ /* Assert the reset signal */
+ phy_device_reset(phydev, 1);
}
EXPORT_SYMBOL(phy_detach);
@@ -1208,6 +1224,30 @@ out:
}
EXPORT_SYMBOL(phy_loopback);
+/**
+ * phy_reset_after_clk_enable - perform a PHY reset if needed
+ * @phydev: target phy_device struct
+ *
+ * Description: Some PHYs are known to need a reset after their refclk was
+ * enabled. This function evaluates the flags and perform the reset if it's
+ * needed. Returns < 0 on error, 0 if the phy wasn't reset and 1 if the phy
+ * was reset.
+ */
+int phy_reset_after_clk_enable(struct phy_device *phydev)
+{
+ if (!phydev || !phydev->drv)
+ return -ENODEV;
+
+ if (phydev->drv->flags & PHY_RST_AFTER_CLK_EN) {
+ phy_device_reset(phydev, 1);
+ phy_device_reset(phydev, 0);
+ return 1;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(phy_reset_after_clk_enable);
+
/* Generic PHY support and helper functions */
/**
@@ -1813,8 +1853,16 @@ static int phy_probe(struct device *dev)
/* Set the state to READY by default */
phydev->state = PHY_READY;
- if (phydev->drv->probe)
+ if (phydev->drv->probe) {
+ /* Deassert the reset signal */
+ phy_device_reset(phydev, 0);
+
err = phydev->drv->probe(phydev);
+ if (err) {
+ /* Assert the reset signal */
+ phy_device_reset(phydev, 1);
+ }
+ }
mutex_unlock(&phydev->lock);
@@ -1831,8 +1879,12 @@ static int phy_remove(struct device *dev)
phydev->state = PHY_DOWN;
mutex_unlock(&phydev->lock);
- if (phydev->drv && phydev->drv->remove)
+ if (phydev->drv && phydev->drv->remove) {
phydev->drv->remove(phydev);
+
+ /* Assert the reset signal */
+ phy_device_reset(phydev, 1);
+ }
phydev->drv = NULL;
return 0;
@@ -1909,9 +1961,7 @@ static struct phy_driver genphy_driver = {
.features = PHY_GBIT_FEATURES | SUPPORTED_MII |
SUPPORTED_AUI | SUPPORTED_FIBRE |
SUPPORTED_BNC,
- .config_aneg = genphy_config_aneg,
.aneg_done = genphy_aneg_done,
- .read_status = genphy_read_status,
.suspend = genphy_suspend,
.resume = genphy_resume,
.set_loopback = genphy_loopback,
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 5dc9668dde34..69adc0aa141c 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -36,7 +36,11 @@ enum {
PHYLINK_DISABLE_LINK,
};
+/**
+ * struct phylink - internal data type for phylink
+ */
struct phylink {
+ /* private: */
struct net_device *netdev;
const struct phylink_mac_ops *ops;
@@ -50,6 +54,8 @@ struct phylink {
/* The link configuration settings */
struct phylink_link_state link_config;
struct gpio_desc *link_gpio;
+ void (*get_fixed_state)(struct net_device *dev,
+ struct phylink_link_state *s);
struct mutex state_mutex;
struct phylink_link_state phy_state;
@@ -87,6 +93,13 @@ static inline bool linkmode_empty(const unsigned long *src)
return bitmap_empty(src, __ETHTOOL_LINK_MODE_MASK_NBITS);
}
+/**
+ * phylink_set_port_modes() - set the port type modes in the ethtool mask
+ * @mask: ethtool link mode mask
+ *
+ * Sets all the port type modes in the ethtool mask. MAC drivers should
+ * use this in their 'validate' callback.
+ */
void phylink_set_port_modes(unsigned long *mask)
{
phylink_set(mask, TP);
@@ -117,8 +130,7 @@ static const char *phylink_an_mode_str(unsigned int mode)
static const char *modestr[] = {
[MLO_AN_PHY] = "phy",
[MLO_AN_FIXED] = "fixed",
- [MLO_AN_SGMII] = "SGMII",
- [MLO_AN_8023Z] = "802.3z",
+ [MLO_AN_INBAND] = "inband",
};
return mode < ARRAY_SIZE(modestr) ? modestr[mode] : "unknown";
@@ -132,59 +144,64 @@ static int phylink_validate(struct phylink *pl, unsigned long *supported,
return phylink_is_empty_linkmode(supported) ? -EINVAL : 0;
}
-static int phylink_parse_fixedlink(struct phylink *pl, struct device_node *np)
+static int phylink_parse_fixedlink(struct phylink *pl,
+ struct fwnode_handle *fwnode)
{
- struct device_node *fixed_node;
+ struct fwnode_handle *fixed_node;
const struct phy_setting *s;
struct gpio_desc *desc;
- const __be32 *fixed_prop;
u32 speed;
- int ret, len;
+ int ret;
- fixed_node = of_get_child_by_name(np, "fixed-link");
+ fixed_node = fwnode_get_named_child_node(fwnode, "fixed-link");
if (fixed_node) {
- ret = of_property_read_u32(fixed_node, "speed", &speed);
+ ret = fwnode_property_read_u32(fixed_node, "speed", &speed);
pl->link_config.speed = speed;
pl->link_config.duplex = DUPLEX_HALF;
- if (of_property_read_bool(fixed_node, "full-duplex"))
+ if (fwnode_property_read_bool(fixed_node, "full-duplex"))
pl->link_config.duplex = DUPLEX_FULL;
/* We treat the "pause" and "asym-pause" terminology as
* defining the link partner's ability. */
- if (of_property_read_bool(fixed_node, "pause"))
+ if (fwnode_property_read_bool(fixed_node, "pause"))
pl->link_config.pause |= MLO_PAUSE_SYM;
- if (of_property_read_bool(fixed_node, "asym-pause"))
+ if (fwnode_property_read_bool(fixed_node, "asym-pause"))
pl->link_config.pause |= MLO_PAUSE_ASYM;
if (ret == 0) {
- desc = fwnode_get_named_gpiod(&fixed_node->fwnode,
- "link-gpios", 0,
- GPIOD_IN, "?");
+ desc = fwnode_get_named_gpiod(fixed_node, "link-gpios",
+ 0, GPIOD_IN, "?");
if (!IS_ERR(desc))
pl->link_gpio = desc;
else if (desc == ERR_PTR(-EPROBE_DEFER))
ret = -EPROBE_DEFER;
}
- of_node_put(fixed_node);
+ fwnode_handle_put(fixed_node);
if (ret)
return ret;
} else {
- fixed_prop = of_get_property(np, "fixed-link", &len);
- if (!fixed_prop) {
+ u32 prop[5];
+
+ ret = fwnode_property_read_u32_array(fwnode, "fixed-link",
+ NULL, 0);
+ if (ret != ARRAY_SIZE(prop)) {
netdev_err(pl->netdev, "broken fixed-link?\n");
return -EINVAL;
}
- if (len == 5 * sizeof(*fixed_prop)) {
- pl->link_config.duplex = be32_to_cpu(fixed_prop[1]) ?
+
+ ret = fwnode_property_read_u32_array(fwnode, "fixed-link",
+ prop, ARRAY_SIZE(prop));
+ if (!ret) {
+ pl->link_config.duplex = prop[1] ?
DUPLEX_FULL : DUPLEX_HALF;
- pl->link_config.speed = be32_to_cpu(fixed_prop[2]);
- if (be32_to_cpu(fixed_prop[3]))
+ pl->link_config.speed = prop[2];
+ if (prop[3])
pl->link_config.pause |= MLO_PAUSE_SYM;
- if (be32_to_cpu(fixed_prop[4]))
+ if (prop[4])
pl->link_config.pause |= MLO_PAUSE_ASYM;
}
}
@@ -220,17 +237,17 @@ static int phylink_parse_fixedlink(struct phylink *pl, struct device_node *np)
return 0;
}
-static int phylink_parse_mode(struct phylink *pl, struct device_node *np)
+static int phylink_parse_mode(struct phylink *pl, struct fwnode_handle *fwnode)
{
- struct device_node *dn;
+ struct fwnode_handle *dn;
const char *managed;
- dn = of_get_child_by_name(np, "fixed-link");
- if (dn || of_find_property(np, "fixed-link", NULL))
+ dn = fwnode_get_named_child_node(fwnode, "fixed-link");
+ if (dn || fwnode_property_present(fwnode, "fixed-link"))
pl->link_an_mode = MLO_AN_FIXED;
- of_node_put(dn);
+ fwnode_handle_put(dn);
- if (of_property_read_string(np, "managed", &managed) == 0 &&
+ if (fwnode_property_read_string(fwnode, "managed", &managed) == 0 &&
strcmp(managed, "in-band-status") == 0) {
if (pl->link_an_mode == MLO_AN_FIXED) {
netdev_err(pl->netdev,
@@ -244,6 +261,7 @@ static int phylink_parse_mode(struct phylink *pl, struct device_node *np)
phylink_set(pl->supported, Asym_Pause);
phylink_set(pl->supported, Pause);
pl->link_config.an_enabled = true;
+ pl->link_an_mode = MLO_AN_INBAND;
switch (pl->link_config.interface) {
case PHY_INTERFACE_MODE_SGMII:
@@ -253,17 +271,14 @@ static int phylink_parse_mode(struct phylink *pl, struct device_node *np)
phylink_set(pl->supported, 100baseT_Full);
phylink_set(pl->supported, 1000baseT_Half);
phylink_set(pl->supported, 1000baseT_Full);
- pl->link_an_mode = MLO_AN_SGMII;
break;
case PHY_INTERFACE_MODE_1000BASEX:
phylink_set(pl->supported, 1000baseX_Full);
- pl->link_an_mode = MLO_AN_8023Z;
break;
case PHY_INTERFACE_MODE_2500BASEX:
phylink_set(pl->supported, 2500baseX_Full);
- pl->link_an_mode = MLO_AN_8023Z;
break;
case PHY_INTERFACE_MODE_10GKR:
@@ -280,7 +295,6 @@ static int phylink_parse_mode(struct phylink *pl, struct device_node *np)
phylink_set(pl->supported, 10000baseLR_Full);
phylink_set(pl->supported, 10000baseLRM_Full);
phylink_set(pl->supported, 10000baseER_Full);
- pl->link_an_mode = MLO_AN_SGMII;
break;
default:
@@ -320,8 +334,7 @@ static void phylink_mac_config(struct phylink *pl,
static void phylink_mac_an_restart(struct phylink *pl)
{
if (pl->link_config.an_enabled &&
- (pl->link_config.interface == PHY_INTERFACE_MODE_1000BASEX ||
- pl->link_config.interface == PHY_INTERFACE_MODE_2500BASEX))
+ phy_interface_mode_is_8023z(pl->link_config.interface))
pl->ops->mac_an_restart(pl->netdev);
}
@@ -339,12 +352,14 @@ static int phylink_get_mac_state(struct phylink *pl, struct phylink_link_state *
}
/* The fixed state is... fixed except for the link state,
- * which may be determined by a GPIO.
+ * which may be determined by a GPIO or a callback.
*/
static void phylink_get_fixed_state(struct phylink *pl, struct phylink_link_state *state)
{
*state = pl->link_config;
- if (pl->link_gpio)
+ if (pl->get_fixed_state)
+ pl->get_fixed_state(pl->netdev, state);
+ else if (pl->link_gpio)
state->link = !!gpiod_get_value(pl->link_gpio);
}
@@ -423,7 +438,7 @@ static void phylink_resolve(struct work_struct *w)
phylink_mac_config(pl, &link_state);
break;
- case MLO_AN_SGMII:
+ case MLO_AN_INBAND:
phylink_get_mac_state(pl, &link_state);
if (pl->phydev) {
bool changed = false;
@@ -449,10 +464,6 @@ static void phylink_resolve(struct work_struct *w)
}
}
break;
-
- case MLO_AN_8023Z:
- phylink_get_mac_state(pl, &link_state);
- break;
}
}
@@ -489,15 +500,27 @@ static void phylink_run_resolve(struct phylink *pl)
static const struct sfp_upstream_ops sfp_phylink_ops;
-static int phylink_register_sfp(struct phylink *pl, struct device_node *np)
+static int phylink_register_sfp(struct phylink *pl,
+ struct fwnode_handle *fwnode)
{
- struct device_node *sfp_np;
+ struct fwnode_reference_args ref;
+ int ret;
- sfp_np = of_parse_phandle(np, "sfp", 0);
- if (!sfp_np)
+ if (!fwnode)
return 0;
- pl->sfp_bus = sfp_register_upstream(sfp_np, pl->netdev, pl,
+ ret = fwnode_property_get_reference_args(fwnode, "sfp", NULL,
+ 0, 0, &ref);
+ if (ret < 0) {
+ if (ret == -ENOENT)
+ return 0;
+
+ netdev_err(pl->netdev, "unable to parse \"sfp\" node: %d\n",
+ ret);
+ return ret;
+ }
+
+ pl->sfp_bus = sfp_register_upstream(ref.fwnode, pl->netdev, pl,
&sfp_phylink_ops);
if (!pl->sfp_bus)
return -ENOMEM;
@@ -505,7 +528,22 @@ static int phylink_register_sfp(struct phylink *pl, struct device_node *np)
return 0;
}
-struct phylink *phylink_create(struct net_device *ndev, struct device_node *np,
+/**
+ * phylink_create() - create a phylink instance
+ * @ndev: a pointer to the &struct net_device
+ * @fwnode: a pointer to a &struct fwnode_handle describing the network
+ * interface
+ * @iface: the desired link mode defined by &typedef phy_interface_t
+ * @ops: a pointer to a &struct phylink_mac_ops for the MAC.
+ *
+ * Create a new phylink instance, and parse the link parameters found in @np.
+ * This will parse in-band modes, fixed-link or SFP configuration.
+ *
+ * Returns a pointer to a &struct phylink, or an error-pointer value. Users
+ * must use IS_ERR() to check for errors from this function.
+ */
+struct phylink *phylink_create(struct net_device *ndev,
+ struct fwnode_handle *fwnode,
phy_interface_t iface,
const struct phylink_mac_ops *ops)
{
@@ -521,7 +559,10 @@ struct phylink *phylink_create(struct net_device *ndev, struct device_node *np,
pl->netdev = ndev;
pl->phy_state.interface = iface;
pl->link_interface = iface;
- pl->link_port = PORT_MII;
+ if (iface == PHY_INTERFACE_MODE_MOCA)
+ pl->link_port = PORT_BNC;
+ else
+ pl->link_port = PORT_MII;
pl->link_config.interface = iface;
pl->link_config.pause = MLO_PAUSE_AN;
pl->link_config.speed = SPEED_UNKNOWN;
@@ -533,21 +574,21 @@ struct phylink *phylink_create(struct net_device *ndev, struct device_node *np,
linkmode_copy(pl->link_config.advertising, pl->supported);
phylink_validate(pl, pl->supported, &pl->link_config);
- ret = phylink_parse_mode(pl, np);
+ ret = phylink_parse_mode(pl, fwnode);
if (ret < 0) {
kfree(pl);
return ERR_PTR(ret);
}
if (pl->link_an_mode == MLO_AN_FIXED) {
- ret = phylink_parse_fixedlink(pl, np);
+ ret = phylink_parse_fixedlink(pl, fwnode);
if (ret < 0) {
kfree(pl);
return ERR_PTR(ret);
}
}
- ret = phylink_register_sfp(pl, np);
+ ret = phylink_register_sfp(pl, fwnode);
if (ret < 0) {
kfree(pl);
return ERR_PTR(ret);
@@ -557,6 +598,13 @@ struct phylink *phylink_create(struct net_device *ndev, struct device_node *np,
}
EXPORT_SYMBOL_GPL(phylink_create);
+/**
+ * phylink_destroy() - cleanup and destroy the phylink instance
+ * @pl: a pointer to a &struct phylink returned from phylink_create()
+ *
+ * Destroy a phylink instance. Any PHY that has been attached must have been
+ * cleaned up via phylink_disconnect_phy() prior to calling this function.
+ */
void phylink_destroy(struct phylink *pl)
{
if (pl->sfp_bus)
@@ -653,10 +701,36 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy)
return 0;
}
+/**
+ * phylink_connect_phy() - connect a PHY to the phylink instance
+ * @pl: a pointer to a &struct phylink returned from phylink_create()
+ * @phy: a pointer to a &struct phy_device.
+ *
+ * Connect @phy to the phylink instance specified by @pl by calling
+ * phy_attach_direct(). Configure the @phy according to the MAC driver's
+ * capabilities, start the PHYLIB state machine and enable any interrupts
+ * that the PHY supports.
+ *
+ * This updates the phylink's ethtool supported and advertising link mode
+ * masks.
+ *
+ * Returns 0 on success or a negative errno.
+ */
int phylink_connect_phy(struct phylink *pl, struct phy_device *phy)
{
int ret;
+ if (WARN_ON(pl->link_an_mode == MLO_AN_FIXED ||
+ (pl->link_an_mode == MLO_AN_INBAND &&
+ phy_interface_mode_is_8023z(pl->link_interface))))
+ return -EINVAL;
+
+ /* Use PHY device/driver interface */
+ if (pl->link_interface == PHY_INTERFACE_MODE_NA) {
+ pl->link_interface = phy->interface;
+ pl->link_config.interface = pl->link_interface;
+ }
+
ret = phy_attach_direct(pl->netdev, phy, 0, pl->link_interface);
if (ret)
return ret;
@@ -669,14 +743,29 @@ int phylink_connect_phy(struct phylink *pl, struct phy_device *phy)
}
EXPORT_SYMBOL_GPL(phylink_connect_phy);
-int phylink_of_phy_connect(struct phylink *pl, struct device_node *dn)
+/**
+ * phylink_of_phy_connect() - connect the PHY specified in the DT mode.
+ * @pl: a pointer to a &struct phylink returned from phylink_create()
+ * @dn: a pointer to a &struct device_node.
+ * @flags: PHY-specific flags to communicate to the PHY device driver
+ *
+ * Connect the phy specified in the device node @dn to the phylink instance
+ * specified by @pl. Actions specified in phylink_connect_phy() will be
+ * performed.
+ *
+ * Returns 0 on success or a negative errno.
+ */
+int phylink_of_phy_connect(struct phylink *pl, struct device_node *dn,
+ u32 flags)
{
struct device_node *phy_node;
struct phy_device *phy_dev;
int ret;
- /* Fixed links are handled without needing a PHY */
- if (pl->link_an_mode == MLO_AN_FIXED)
+ /* Fixed links and 802.3z are handled without needing a PHY */
+ if (pl->link_an_mode == MLO_AN_FIXED ||
+ (pl->link_an_mode == MLO_AN_INBAND &&
+ phy_interface_mode_is_8023z(pl->link_interface)))
return 0;
phy_node = of_parse_phandle(dn, "phy-handle", 0);
@@ -686,14 +775,13 @@ int phylink_of_phy_connect(struct phylink *pl, struct device_node *dn)
phy_node = of_parse_phandle(dn, "phy-device", 0);
if (!phy_node) {
- if (pl->link_an_mode == MLO_AN_PHY) {
- netdev_err(pl->netdev, "unable to find PHY node\n");
+ if (pl->link_an_mode == MLO_AN_PHY)
return -ENODEV;
- }
return 0;
}
- phy_dev = of_phy_attach(pl->netdev, phy_node, 0, pl->link_interface);
+ phy_dev = of_phy_attach(pl->netdev, phy_node, flags,
+ pl->link_interface);
/* We're done with the phy_node handle */
of_node_put(phy_node);
@@ -708,6 +796,13 @@ int phylink_of_phy_connect(struct phylink *pl, struct device_node *dn)
}
EXPORT_SYMBOL_GPL(phylink_of_phy_connect);
+/**
+ * phylink_disconnect_phy() - disconnect any PHY attached to the phylink
+ * instance.
+ * @pl: a pointer to a &struct phylink returned from phylink_create()
+ *
+ * Disconnect any current PHY from the phylink instance described by @pl.
+ */
void phylink_disconnect_phy(struct phylink *pl)
{
struct phy_device *phy;
@@ -729,6 +824,40 @@ void phylink_disconnect_phy(struct phylink *pl)
}
EXPORT_SYMBOL_GPL(phylink_disconnect_phy);
+/**
+ * phylink_fixed_state_cb() - allow setting a fixed link callback
+ * @pl: a pointer to a &struct phylink returned from phylink_create()
+ * @cb: callback to execute to determine the fixed link state.
+ *
+ * The MAC driver should call this driver when the state of its link
+ * can be determined through e.g: an out of band MMIO register.
+ */
+int phylink_fixed_state_cb(struct phylink *pl,
+ void (*cb)(struct net_device *dev,
+ struct phylink_link_state *state))
+{
+ /* It does not make sense to let the link be overriden unless we use
+ * MLO_AN_FIXED
+ */
+ if (pl->link_an_mode != MLO_AN_FIXED)
+ return -EINVAL;
+
+ mutex_lock(&pl->state_mutex);
+ pl->get_fixed_state = cb;
+ mutex_unlock(&pl->state_mutex);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(phylink_fixed_state_cb);
+
+/**
+ * phylink_mac_change() - notify phylink of a change in MAC state
+ * @pl: a pointer to a &struct phylink returned from phylink_create()
+ * @up: indicates whether the link is currently up.
+ *
+ * The MAC driver should call this driver when the state of its link
+ * changes (eg, link failure, new negotiation results, etc.)
+ */
void phylink_mac_change(struct phylink *pl, bool up)
{
if (!up)
@@ -738,6 +867,14 @@ void phylink_mac_change(struct phylink *pl, bool up)
}
EXPORT_SYMBOL_GPL(phylink_mac_change);
+/**
+ * phylink_start() - start a phylink instance
+ * @pl: a pointer to a &struct phylink returned from phylink_create()
+ *
+ * Start the phylink instance specified by @pl, configuring the MAC for the
+ * desired link mode(s) and negotiation style. This should be called from the
+ * network device driver's &struct net_device_ops ndo_open() method.
+ */
void phylink_start(struct phylink *pl)
{
WARN_ON(!lockdep_rtnl_is_held());
@@ -753,6 +890,12 @@ void phylink_start(struct phylink *pl)
phylink_resolve_flow(pl, &pl->link_config);
phylink_mac_config(pl, &pl->link_config);
+ /* Restart autonegotiation if using 802.3z to ensure that the link
+ * parameters are properly negotiated. This is necessary for DSA
+ * switches using 802.3z negotiation to ensure they see our modes.
+ */
+ phylink_mac_an_restart(pl);
+
clear_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state);
phylink_run_resolve(pl);
@@ -763,6 +906,15 @@ void phylink_start(struct phylink *pl)
}
EXPORT_SYMBOL_GPL(phylink_start);
+/**
+ * phylink_stop() - stop a phylink instance
+ * @pl: a pointer to a &struct phylink returned from phylink_create()
+ *
+ * Stop the phylink instance specified by @pl. This should be called from the
+ * network device driver's &struct net_device_ops ndo_stop() method. The
+ * network device's carrier state should not be changed prior to calling this
+ * function.
+ */
void phylink_stop(struct phylink *pl)
{
WARN_ON(!lockdep_rtnl_is_held());
@@ -778,6 +930,15 @@ void phylink_stop(struct phylink *pl)
}
EXPORT_SYMBOL_GPL(phylink_stop);
+/**
+ * phylink_ethtool_get_wol() - get the wake on lan parameters for the PHY
+ * @pl: a pointer to a &struct phylink returned from phylink_create()
+ * @wol: a pointer to &struct ethtool_wolinfo to hold the read parameters
+ *
+ * Read the wake on lan parameters from the PHY attached to the phylink
+ * instance specified by @pl. If no PHY is currently attached, report no
+ * support for wake on lan.
+ */
void phylink_ethtool_get_wol(struct phylink *pl, struct ethtool_wolinfo *wol)
{
WARN_ON(!lockdep_rtnl_is_held());
@@ -790,6 +951,17 @@ void phylink_ethtool_get_wol(struct phylink *pl, struct ethtool_wolinfo *wol)
}
EXPORT_SYMBOL_GPL(phylink_ethtool_get_wol);
+/**
+ * phylink_ethtool_set_wol() - set wake on lan parameters
+ * @pl: a pointer to a &struct phylink returned from phylink_create()
+ * @wol: a pointer to &struct ethtool_wolinfo for the desired parameters
+ *
+ * Set the wake on lan parameters for the PHY attached to the phylink
+ * instance specified by @pl. If no PHY is attached, returns %EOPNOTSUPP
+ * error.
+ *
+ * Returns zero on success or negative errno code.
+ */
int phylink_ethtool_set_wol(struct phylink *pl, struct ethtool_wolinfo *wol)
{
int ret = -EOPNOTSUPP;
@@ -825,6 +997,15 @@ static void phylink_get_ksettings(const struct phylink_link_state *state,
AUTONEG_DISABLE;
}
+/**
+ * phylink_ethtool_ksettings_get() - get the current link settings
+ * @pl: a pointer to a &struct phylink returned from phylink_create()
+ * @kset: a pointer to a &struct ethtool_link_ksettings to hold link settings
+ *
+ * Read the current link settings for the phylink instance specified by @pl.
+ * This will be the link settings read from the MAC, PHY or fixed link
+ * settings depending on the current negotiation mode.
+ */
int phylink_ethtool_ksettings_get(struct phylink *pl,
struct ethtool_link_ksettings *kset)
{
@@ -850,14 +1031,13 @@ int phylink_ethtool_ksettings_get(struct phylink *pl,
phylink_get_ksettings(&link_state, kset);
break;
- case MLO_AN_SGMII:
+ case MLO_AN_INBAND:
/* If there is a phy attached, then use the reported
* settings from the phy with no modification.
*/
if (pl->phydev)
break;
- case MLO_AN_8023Z:
phylink_get_mac_state(pl, &link_state);
/* The MAC is reporting the link results from its own PCS
@@ -872,6 +1052,11 @@ int phylink_ethtool_ksettings_get(struct phylink *pl,
}
EXPORT_SYMBOL_GPL(phylink_ethtool_ksettings_get);
+/**
+ * phylink_ethtool_ksettings_set() - set the link settings
+ * @pl: a pointer to a &struct phylink returned from phylink_create()
+ * @kset: a pointer to a &struct ethtool_link_ksettings for the desired modes
+ */
int phylink_ethtool_ksettings_set(struct phylink *pl,
const struct ethtool_link_ksettings *kset)
{
@@ -965,6 +1150,17 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
}
EXPORT_SYMBOL_GPL(phylink_ethtool_ksettings_set);
+/**
+ * phylink_ethtool_nway_reset() - restart negotiation
+ * @pl: a pointer to a &struct phylink returned from phylink_create()
+ *
+ * Restart negotiation for the phylink instance specified by @pl. This will
+ * cause any attached phy to restart negotiation with the link partner, and
+ * if the MAC is in a BaseX mode, the MAC will also be requested to restart
+ * negotiation.
+ *
+ * Returns zero on success, or negative error code.
+ */
int phylink_ethtool_nway_reset(struct phylink *pl)
{
int ret = 0;
@@ -979,6 +1175,11 @@ int phylink_ethtool_nway_reset(struct phylink *pl)
}
EXPORT_SYMBOL_GPL(phylink_ethtool_nway_reset);
+/**
+ * phylink_ethtool_get_pauseparam() - get the current pause parameters
+ * @pl: a pointer to a &struct phylink returned from phylink_create()
+ * @pause: a pointer to a &struct ethtool_pauseparam
+ */
void phylink_ethtool_get_pauseparam(struct phylink *pl,
struct ethtool_pauseparam *pause)
{
@@ -990,6 +1191,11 @@ void phylink_ethtool_get_pauseparam(struct phylink *pl,
}
EXPORT_SYMBOL_GPL(phylink_ethtool_get_pauseparam);
+/**
+ * phylink_ethtool_set_pauseparam() - set the current pause parameters
+ * @pl: a pointer to a &struct phylink returned from phylink_create()
+ * @pause: a pointer to a &struct ethtool_pauseparam
+ */
int phylink_ethtool_set_pauseparam(struct phylink *pl,
struct ethtool_pauseparam *pause)
{
@@ -1028,8 +1234,7 @@ int phylink_ethtool_set_pauseparam(struct phylink *pl,
phylink_mac_config(pl, config);
break;
- case MLO_AN_SGMII:
- case MLO_AN_8023Z:
+ case MLO_AN_INBAND:
phylink_mac_config(pl, config);
phylink_mac_an_restart(pl);
break;
@@ -1068,19 +1273,16 @@ int phylink_ethtool_get_module_eeprom(struct phylink *pl,
}
EXPORT_SYMBOL_GPL(phylink_ethtool_get_module_eeprom);
-int phylink_init_eee(struct phylink *pl, bool clk_stop_enable)
-{
- int ret = -EPROTONOSUPPORT;
-
- WARN_ON(!lockdep_rtnl_is_held());
-
- if (pl->phydev)
- ret = phy_init_eee(pl->phydev, clk_stop_enable);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(phylink_init_eee);
-
+/**
+ * phylink_ethtool_get_eee_err() - read the energy efficient ethernet error
+ * counter
+ * @pl: a pointer to a &struct phylink returned from phylink_create().
+ *
+ * Read the Energy Efficient Ethernet error counter from the PHY associated
+ * with the phylink instance specified by @pl.
+ *
+ * Returns positive error counter value, or negative error code.
+ */
int phylink_get_eee_err(struct phylink *pl)
{
int ret = 0;
@@ -1094,6 +1296,11 @@ int phylink_get_eee_err(struct phylink *pl)
}
EXPORT_SYMBOL_GPL(phylink_get_eee_err);
+/**
+ * phylink_ethtool_get_eee() - read the energy efficient ethernet parameters
+ * @pl: a pointer to a &struct phylink returned from phylink_create()
+ * @eee: a pointer to a &struct ethtool_eee for the read parameters
+ */
int phylink_ethtool_get_eee(struct phylink *pl, struct ethtool_eee *eee)
{
int ret = -EOPNOTSUPP;
@@ -1107,6 +1314,11 @@ int phylink_ethtool_get_eee(struct phylink *pl, struct ethtool_eee *eee)
}
EXPORT_SYMBOL_GPL(phylink_ethtool_get_eee);
+/**
+ * phylink_ethtool_set_eee() - set the energy efficient ethernet parameters
+ * @pl: a pointer to a &struct phylink returned from phylink_create()
+ * @eee: a pointer to a &struct ethtool_eee for the desired parameters
+ */
int phylink_ethtool_set_eee(struct phylink *pl, struct ethtool_eee *eee)
{
int ret = -EOPNOTSUPP;
@@ -1246,9 +1458,7 @@ static int phylink_mii_read(struct phylink *pl, unsigned int phy_id,
case MLO_AN_PHY:
return -EOPNOTSUPP;
- case MLO_AN_SGMII:
- /* No phy, fall through to 8023z method */
- case MLO_AN_8023Z:
+ case MLO_AN_INBAND:
if (phy_id == 0) {
val = phylink_get_mac_state(pl, &state);
if (val < 0)
@@ -1273,15 +1483,31 @@ static int phylink_mii_write(struct phylink *pl, unsigned int phy_id,
case MLO_AN_PHY:
return -EOPNOTSUPP;
- case MLO_AN_SGMII:
- /* No phy, fall through to 8023z method */
- case MLO_AN_8023Z:
+ case MLO_AN_INBAND:
break;
}
return 0;
}
+/**
+ * phylink_mii_ioctl() - generic mii ioctl interface
+ * @pl: a pointer to a &struct phylink returned from phylink_create()
+ * @ifr: a pointer to a &struct ifreq for socket ioctls
+ * @cmd: ioctl cmd to execute
+ *
+ * Perform the specified MII ioctl on the PHY attached to the phylink instance
+ * specified by @pl. If no PHY is attached, emulate the presence of the PHY.
+ *
+ * Returns: zero on success or negative error code.
+ *
+ * %SIOCGMIIPHY:
+ * read register from the current PHY.
+ * %SIOCGMIIREG:
+ * read register from the specified PHY.
+ * %SIOCSMIIREG:
+ * set a register on the specified PHY.
+ */
int phylink_mii_ioctl(struct phylink *pl, struct ifreq *ifr, int cmd)
{
struct mii_ioctl_data *mii = if_mii(ifr);
@@ -1290,7 +1516,7 @@ int phylink_mii_ioctl(struct phylink *pl, struct ifreq *ifr, int cmd)
WARN_ON(!lockdep_rtnl_is_held());
if (pl->phydev) {
- /* PHYs only exist for MLO_AN_PHY and MLO_AN_SGMII */
+ /* PHYs only exist for MLO_AN_PHY and SGMII */
switch (cmd) {
case SIOCGMIIPHY:
mii->phy_id = pl->phydev->mdio.addr;
@@ -1359,10 +1585,10 @@ static int phylink_sfp_module_insert(void *upstream,
switch (iface) {
case PHY_INTERFACE_MODE_SGMII:
- mode = MLO_AN_SGMII;
- break;
case PHY_INTERFACE_MODE_1000BASEX:
- mode = MLO_AN_8023Z;
+ case PHY_INTERFACE_MODE_2500BASEX:
+ case PHY_INTERFACE_MODE_10GKR:
+ mode = MLO_AN_INBAND;
break;
default:
return -EINVAL;
@@ -1389,7 +1615,7 @@ static int phylink_sfp_module_insert(void *upstream,
phylink_an_mode_str(mode), phy_modes(config.interface),
__ETHTOOL_LINK_MODE_MASK_NBITS, support);
- if (mode == MLO_AN_8023Z && pl->phydev)
+ if (phy_interface_mode_is_8023z(iface) && pl->phydev)
return -EINVAL;
changed = !bitmap_equal(pl->supported, support,
diff --git a/drivers/net/phy/qsemi.c b/drivers/net/phy/qsemi.c
index dbef8002bc28..889a4dce1648 100644
--- a/drivers/net/phy/qsemi.c
+++ b/drivers/net/phy/qsemi.c
@@ -118,8 +118,6 @@ static struct phy_driver qs6612_driver[] = { {
.features = PHY_BASIC_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.config_init = qs6612_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = qs6612_ack_interrupt,
.config_intr = qs6612_config_intr,
} };
diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c
index eda0a6e86918..7c1bf688dd48 100644
--- a/drivers/net/phy/realtek.c
+++ b/drivers/net/phy/realtek.c
@@ -13,29 +13,67 @@
* option) any later version.
*
*/
+#include <linux/bitops.h>
#include <linux/phy.h>
#include <linux/module.h>
-#define RTL821x_PHYSR 0x11
-#define RTL821x_PHYSR_DUPLEX 0x2000
-#define RTL821x_PHYSR_SPEED 0xc000
-#define RTL821x_INER 0x12
-#define RTL821x_INER_INIT 0x6400
-#define RTL821x_INSR 0x13
-#define RTL821x_PAGE_SELECT 0x1f
-#define RTL8211E_INER_LINK_STATUS 0x400
+#define RTL821x_PHYSR 0x11
+#define RTL821x_PHYSR_DUPLEX BIT(13)
+#define RTL821x_PHYSR_SPEED GENMASK(15, 14)
-#define RTL8211F_INER_LINK_STATUS 0x0010
-#define RTL8211F_INSR 0x1d
-#define RTL8211F_TX_DELAY 0x100
+#define RTL821x_INER 0x12
+#define RTL8211B_INER_INIT 0x6400
+#define RTL8211E_INER_LINK_STATUS BIT(10)
+#define RTL8211F_INER_LINK_STATUS BIT(4)
-#define RTL8201F_ISR 0x1e
-#define RTL8201F_IER 0x13
+#define RTL821x_INSR 0x13
+
+#define RTL821x_PAGE_SELECT 0x1f
+
+#define RTL8211F_INSR 0x1d
+
+#define RTL8211F_TX_DELAY BIT(8)
+
+#define RTL8201F_ISR 0x1e
+#define RTL8201F_IER 0x13
MODULE_DESCRIPTION("Realtek PHY driver");
MODULE_AUTHOR("Johnson Leung");
MODULE_LICENSE("GPL");
+static int rtl8211x_page_read(struct phy_device *phydev, u16 page, u16 address)
+{
+ int ret;
+
+ ret = phy_write(phydev, RTL821x_PAGE_SELECT, page);
+ if (ret)
+ return ret;
+
+ ret = phy_read(phydev, address);
+
+ /* restore to default page 0 */
+ phy_write(phydev, RTL821x_PAGE_SELECT, 0x0);
+
+ return ret;
+}
+
+static int rtl8211x_page_write(struct phy_device *phydev, u16 page,
+ u16 address, u16 val)
+{
+ int ret;
+
+ ret = phy_write(phydev, RTL821x_PAGE_SELECT, page);
+ if (ret)
+ return ret;
+
+ ret = phy_write(phydev, address, val);
+
+ /* restore to default page 0 */
+ phy_write(phydev, RTL821x_PAGE_SELECT, 0x0);
+
+ return ret;
+}
+
static int rtl8201_ack_interrupt(struct phy_device *phydev)
{
int err;
@@ -58,31 +96,21 @@ static int rtl8211f_ack_interrupt(struct phy_device *phydev)
{
int err;
- phy_write(phydev, RTL821x_PAGE_SELECT, 0xa43);
- err = phy_read(phydev, RTL8211F_INSR);
- /* restore to default page 0 */
- phy_write(phydev, RTL821x_PAGE_SELECT, 0x0);
+ err = rtl8211x_page_read(phydev, 0xa43, RTL8211F_INSR);
return (err < 0) ? err : 0;
}
static int rtl8201_config_intr(struct phy_device *phydev)
{
- int err;
-
- /* switch to page 7 */
- phy_write(phydev, RTL821x_PAGE_SELECT, 0x7);
+ u16 val;
if (phydev->interrupts == PHY_INTERRUPT_ENABLED)
- err = phy_write(phydev, RTL8201F_IER,
- BIT(13) | BIT(12) | BIT(11));
+ val = BIT(13) | BIT(12) | BIT(11);
else
- err = phy_write(phydev, RTL8201F_IER, 0);
-
- /* restore to default page 0 */
- phy_write(phydev, RTL821x_PAGE_SELECT, 0x0);
+ val = 0;
- return err;
+ return rtl8211x_page_write(phydev, 0x7, RTL8201F_IER, val);
}
static int rtl8211b_config_intr(struct phy_device *phydev)
@@ -91,7 +119,7 @@ static int rtl8211b_config_intr(struct phy_device *phydev)
if (phydev->interrupts == PHY_INTERRUPT_ENABLED)
err = phy_write(phydev, RTL821x_INER,
- RTL821x_INER_INIT);
+ RTL8211B_INER_INIT);
else
err = phy_write(phydev, RTL821x_INER, 0);
@@ -113,41 +141,41 @@ static int rtl8211e_config_intr(struct phy_device *phydev)
static int rtl8211f_config_intr(struct phy_device *phydev)
{
- int err;
+ u16 val;
- phy_write(phydev, RTL821x_PAGE_SELECT, 0xa42);
if (phydev->interrupts == PHY_INTERRUPT_ENABLED)
- err = phy_write(phydev, RTL821x_INER,
- RTL8211F_INER_LINK_STATUS);
+ val = RTL8211F_INER_LINK_STATUS;
else
- err = phy_write(phydev, RTL821x_INER, 0);
- phy_write(phydev, RTL821x_PAGE_SELECT, 0);
+ val = 0;
- return err;
+ return rtl8211x_page_write(phydev, 0xa42, RTL821x_INER, val);
}
static int rtl8211f_config_init(struct phy_device *phydev)
{
int ret;
- u16 reg;
+ u16 val;
ret = genphy_config_init(phydev);
if (ret < 0)
return ret;
- phy_write(phydev, RTL821x_PAGE_SELECT, 0xd08);
- reg = phy_read(phydev, 0x11);
+ ret = rtl8211x_page_read(phydev, 0xd08, 0x11);
+ if (ret < 0)
+ return ret;
+
+ val = ret & 0xffff;
/* enable TX-delay for rgmii-id and rgmii-txid, otherwise disable it */
if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID)
- reg |= RTL8211F_TX_DELAY;
+ val |= RTL8211F_TX_DELAY;
else
- reg &= ~RTL8211F_TX_DELAY;
+ val &= ~RTL8211F_TX_DELAY;
- phy_write(phydev, 0x11, reg);
- /* restore to default page 0 */
- phy_write(phydev, RTL821x_PAGE_SELECT, 0x0);
+ ret = rtl8211x_page_write(phydev, 0xd08, 0x11, val);
+ if (ret)
+ return ret;
return 0;
}
@@ -159,16 +187,12 @@ static struct phy_driver realtek_drvs[] = {
.phy_id_mask = 0x0000ffff,
.features = PHY_BASIC_FEATURES,
.flags = PHY_HAS_INTERRUPT,
- .config_aneg = &genphy_config_aneg,
- .read_status = &genphy_read_status,
}, {
.phy_id = 0x001cc816,
.name = "RTL8201F 10/100Mbps Ethernet",
.phy_id_mask = 0x001fffff,
.features = PHY_BASIC_FEATURES,
.flags = PHY_HAS_INTERRUPT,
- .config_aneg = &genphy_config_aneg,
- .read_status = &genphy_read_status,
.ack_interrupt = &rtl8201_ack_interrupt,
.config_intr = &rtl8201_config_intr,
.suspend = genphy_suspend,
@@ -179,8 +203,6 @@ static struct phy_driver realtek_drvs[] = {
.phy_id_mask = 0x001fffff,
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
- .config_aneg = &genphy_config_aneg,
- .read_status = &genphy_read_status,
.ack_interrupt = &rtl821x_ack_interrupt,
.config_intr = &rtl8211b_config_intr,
}, {
@@ -189,8 +211,6 @@ static struct phy_driver realtek_drvs[] = {
.phy_id_mask = 0x001fffff,
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = rtl821x_ack_interrupt,
.config_intr = rtl8211e_config_intr,
.suspend = genphy_suspend,
@@ -201,8 +221,6 @@ static struct phy_driver realtek_drvs[] = {
.phy_id_mask = 0x001fffff,
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
- .config_aneg = &genphy_config_aneg,
- .read_status = &genphy_read_status,
.ack_interrupt = &rtl821x_ack_interrupt,
.config_intr = &rtl8211e_config_intr,
.suspend = genphy_suspend,
@@ -213,9 +231,7 @@ static struct phy_driver realtek_drvs[] = {
.phy_id_mask = 0x001fffff,
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
- .config_aneg = &genphy_config_aneg,
.config_init = &rtl8211f_config_init,
- .read_status = &genphy_read_status,
.ack_interrupt = &rtl8211f_ack_interrupt,
.config_intr = &rtl8211f_config_intr,
.suspend = genphy_suspend,
diff --git a/drivers/net/phy/rockchip.c b/drivers/net/phy/rockchip.c
index c092af137056..f1da70b9b55f 100644
--- a/drivers/net/phy/rockchip.c
+++ b/drivers/net/phy/rockchip.c
@@ -213,7 +213,6 @@ static struct phy_driver rockchip_phy_driver[] = {
.soft_reset = genphy_soft_reset,
.config_init = rockchip_integrated_phy_config_init,
.config_aneg = rockchip_config_aneg,
- .read_status = genphy_read_status,
.suspend = genphy_suspend,
.resume = rockchip_phy_resume,
},
diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
index 8a1b1f4c1b7c..1356dba0d9d3 100644
--- a/drivers/net/phy/sfp-bus.c
+++ b/drivers/net/phy/sfp-bus.c
@@ -8,10 +8,14 @@
#include "sfp.h"
+/**
+ * struct sfp_bus - internal representation of a sfp bus
+ */
struct sfp_bus {
+ /* private: */
struct kref kref;
struct list_head node;
- struct device_node *device_node;
+ struct fwnode_handle *fwnode;
const struct sfp_socket_ops *socket_ops;
struct device *sfp_dev;
@@ -26,6 +30,20 @@ struct sfp_bus {
bool started;
};
+/**
+ * sfp_parse_port() - Parse the EEPROM base ID, setting the port type
+ * @bus: a pointer to the &struct sfp_bus structure for the sfp module
+ * @id: a pointer to the module's &struct sfp_eeprom_id
+ * @support: optional pointer to an array of unsigned long for the
+ * ethtool support mask
+ *
+ * Parse the EEPROM identification given in @id, and return one of
+ * %PORT_TP, %PORT_FIBRE or %PORT_OTHER. If @support is non-%NULL,
+ * also set the ethtool %ETHTOOL_LINK_MODE_xxx_BIT corresponding with
+ * the connector type.
+ *
+ * If the port type is not known, returns %PORT_OTHER.
+ */
int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
unsigned long *support)
{
@@ -78,6 +96,24 @@ int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
}
EXPORT_SYMBOL_GPL(sfp_parse_port);
+/**
+ * sfp_parse_interface() - Parse the phy_interface_t
+ * @bus: a pointer to the &struct sfp_bus structure for the sfp module
+ * @id: a pointer to the module's &struct sfp_eeprom_id
+ *
+ * Derive the phy_interface_t mode for the information found in the
+ * module's identifying EEPROM. There is no standard or defined way
+ * to derive this information, so we use some heuristics.
+ *
+ * If the encoding is 64b66b, then the module must be >= 10G, so
+ * return %PHY_INTERFACE_MODE_10GKR.
+ *
+ * If it's 8b10b, then it's 1G or slower. If it's definitely a fibre
+ * module, return %PHY_INTERFACE_MODE_1000BASEX mode, otherwise return
+ * %PHY_INTERFACE_MODE_SGMII mode.
+ *
+ * If the encoding is not known, return %PHY_INTERFACE_MODE_NA.
+ */
phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
const struct sfp_eeprom_id *id)
{
@@ -117,6 +153,15 @@ phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
}
EXPORT_SYMBOL_GPL(sfp_parse_interface);
+/**
+ * sfp_parse_support() - Parse the eeprom id for supported link modes
+ * @bus: a pointer to the &struct sfp_bus structure for the sfp module
+ * @id: a pointer to the module's &struct sfp_eeprom_id
+ * @support: pointer to an array of unsigned long for the ethtool support mask
+ *
+ * Parse the EEPROM identification information and derive the supported
+ * ethtool link modes for the module.
+ */
void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
unsigned long *support)
{
@@ -215,7 +260,7 @@ static const struct sfp_upstream_ops *sfp_get_upstream_ops(struct sfp_bus *bus)
return bus->registered ? bus->upstream_ops : NULL;
}
-static struct sfp_bus *sfp_bus_get(struct device_node *np)
+static struct sfp_bus *sfp_bus_get(struct fwnode_handle *fwnode)
{
struct sfp_bus *sfp, *new, *found = NULL;
@@ -224,7 +269,7 @@ static struct sfp_bus *sfp_bus_get(struct device_node *np)
mutex_lock(&sfp_mutex);
list_for_each_entry(sfp, &sfp_buses, node) {
- if (sfp->device_node == np) {
+ if (sfp->fwnode == fwnode) {
kref_get(&sfp->kref);
found = sfp;
break;
@@ -233,7 +278,7 @@ static struct sfp_bus *sfp_bus_get(struct device_node *np)
if (!found && new) {
kref_init(&new->kref);
- new->device_node = np;
+ new->fwnode = fwnode;
list_add(&new->node, &sfp_buses);
found = new;
new = NULL;
@@ -246,7 +291,7 @@ static struct sfp_bus *sfp_bus_get(struct device_node *np)
return found;
}
-static void sfp_bus_release(struct kref *kref) __releases(sfp_mutex)
+static void sfp_bus_release(struct kref *kref)
{
struct sfp_bus *bus = container_of(kref, struct sfp_bus, kref);
@@ -293,6 +338,16 @@ static void sfp_unregister_bus(struct sfp_bus *bus)
bus->registered = false;
}
+/**
+ * sfp_get_module_info() - Get the ethtool_modinfo for a SFP module
+ * @bus: a pointer to the &struct sfp_bus structure for the sfp module
+ * @modinfo: a &struct ethtool_modinfo
+ *
+ * Fill in the type and eeprom_len parameters in @modinfo for a module on
+ * the sfp bus specified by @bus.
+ *
+ * Returns 0 on success or a negative errno number.
+ */
int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo)
{
if (!bus->registered)
@@ -301,6 +356,17 @@ int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo)
}
EXPORT_SYMBOL_GPL(sfp_get_module_info);
+/**
+ * sfp_get_module_eeprom() - Read the SFP module EEPROM
+ * @bus: a pointer to the &struct sfp_bus structure for the sfp module
+ * @ee: a &struct ethtool_eeprom
+ * @data: buffer to contain the EEPROM data (must be at least @ee->len bytes)
+ *
+ * Read the EEPROM as specified by the supplied @ee. See the documentation
+ * for &struct ethtool_eeprom for the region to be read.
+ *
+ * Returns 0 on success or a negative errno number.
+ */
int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee,
u8 *data)
{
@@ -310,6 +376,15 @@ int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee,
}
EXPORT_SYMBOL_GPL(sfp_get_module_eeprom);
+/**
+ * sfp_upstream_start() - Inform the SFP that the network device is up
+ * @bus: a pointer to the &struct sfp_bus structure for the sfp module
+ *
+ * Inform the SFP socket that the network device is now up, so that the
+ * module can be enabled by allowing TX_DISABLE to be deasserted. This
+ * should be called from the network device driver's &struct net_device_ops
+ * ndo_open() method.
+ */
void sfp_upstream_start(struct sfp_bus *bus)
{
if (bus->registered)
@@ -318,6 +393,15 @@ void sfp_upstream_start(struct sfp_bus *bus)
}
EXPORT_SYMBOL_GPL(sfp_upstream_start);
+/**
+ * sfp_upstream_stop() - Inform the SFP that the network device is down
+ * @bus: a pointer to the &struct sfp_bus structure for the sfp module
+ *
+ * Inform the SFP socket that the network device is now up, so that the
+ * module can be disabled by asserting TX_DISABLE, disabling the laser
+ * in optical modules. This should be called from the network device
+ * driver's &struct net_device_ops ndo_stop() method.
+ */
void sfp_upstream_stop(struct sfp_bus *bus)
{
if (bus->registered)
@@ -326,11 +410,24 @@ void sfp_upstream_stop(struct sfp_bus *bus)
}
EXPORT_SYMBOL_GPL(sfp_upstream_stop);
-struct sfp_bus *sfp_register_upstream(struct device_node *np,
+/**
+ * sfp_register_upstream() - Register the neighbouring device
+ * @np: device node for the SFP bus
+ * @ndev: network device associated with the interface
+ * @upstream: the upstream private data
+ * @ops: the upstream's &struct sfp_upstream_ops
+ *
+ * Register the upstream device (eg, PHY) with the SFP bus. MAC drivers
+ * should use phylink, which will call this function for them. Returns
+ * a pointer to the allocated &struct sfp_bus.
+ *
+ * On error, returns %NULL.
+ */
+struct sfp_bus *sfp_register_upstream(struct fwnode_handle *fwnode,
struct net_device *ndev, void *upstream,
const struct sfp_upstream_ops *ops)
{
- struct sfp_bus *bus = sfp_bus_get(np);
+ struct sfp_bus *bus = sfp_bus_get(fwnode);
int ret = 0;
if (bus) {
@@ -353,6 +450,13 @@ struct sfp_bus *sfp_register_upstream(struct device_node *np,
}
EXPORT_SYMBOL_GPL(sfp_register_upstream);
+/**
+ * sfp_unregister_upstream() - Unregister sfp bus
+ * @bus: a pointer to the &struct sfp_bus structure for the sfp module
+ *
+ * Unregister a previously registered upstream connection for the SFP
+ * module. @bus is returned from sfp_register_upstream().
+ */
void sfp_unregister_upstream(struct sfp_bus *bus)
{
rtnl_lock();
@@ -433,7 +537,7 @@ EXPORT_SYMBOL_GPL(sfp_module_remove);
struct sfp_bus *sfp_register_socket(struct device *dev, struct sfp *sfp,
const struct sfp_socket_ops *ops)
{
- struct sfp_bus *bus = sfp_bus_get(dev->of_node);
+ struct sfp_bus *bus = sfp_bus_get(dev->fwnode);
int ret = 0;
if (bus) {
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index 9dfc1c4c954f..96511557eb2c 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -98,12 +98,18 @@ static const enum gpiod_flags gpio_flags[] = {
static DEFINE_MUTEX(sfp_mutex);
+struct sff_data {
+ unsigned int gpios;
+ bool (*module_supported)(const struct sfp_eeprom_id *id);
+};
+
struct sfp {
struct device *dev;
struct i2c_adapter *i2c;
struct mii_bus *i2c_mii;
struct sfp_bus *sfp_bus;
struct phy_device *mod_phy;
+ const struct sff_data *type;
unsigned int (*get_state)(struct sfp *);
void (*set_state)(struct sfp *, unsigned int);
@@ -123,6 +129,36 @@ struct sfp {
struct sfp_eeprom_id id;
};
+static bool sff_module_supported(const struct sfp_eeprom_id *id)
+{
+ return id->base.phys_id == SFP_PHYS_ID_SFF &&
+ id->base.phys_ext_id == SFP_PHYS_EXT_ID_SFP;
+}
+
+static const struct sff_data sff_data = {
+ .gpios = SFP_F_LOS | SFP_F_TX_FAULT | SFP_F_TX_DISABLE,
+ .module_supported = sff_module_supported,
+};
+
+static bool sfp_module_supported(const struct sfp_eeprom_id *id)
+{
+ return id->base.phys_id == SFP_PHYS_ID_SFP &&
+ id->base.phys_ext_id == SFP_PHYS_EXT_ID_SFP;
+}
+
+static const struct sff_data sfp_data = {
+ .gpios = SFP_F_PRESENT | SFP_F_LOS | SFP_F_TX_FAULT |
+ SFP_F_TX_DISABLE | SFP_F_RATE_SELECT,
+ .module_supported = sfp_module_supported,
+};
+
+static const struct of_device_id sfp_of_match[] = {
+ { .compatible = "sff,sff", .data = &sff_data, },
+ { .compatible = "sff,sfp", .data = &sfp_data, },
+ { },
+};
+MODULE_DEVICE_TABLE(of, sfp_of_match);
+
static unsigned long poll_jiffies;
static unsigned int sfp_gpio_get_state(struct sfp *sfp)
@@ -141,6 +177,11 @@ static unsigned int sfp_gpio_get_state(struct sfp *sfp)
return state;
}
+static unsigned int sff_gpio_get_state(struct sfp *sfp)
+{
+ return sfp_gpio_get_state(sfp) | SFP_F_PRESENT;
+}
+
static void sfp_gpio_set_state(struct sfp *sfp, unsigned int state)
{
if (state & SFP_F_PRESENT) {
@@ -479,10 +520,10 @@ static int sfp_sm_mod_probe(struct sfp *sfp)
dev_info(sfp->dev, "module %s %s rev %s sn %s dc %s\n",
vendor, part, rev, sn, date);
- /* We only support SFP modules, not the legacy GBIC modules. */
- if (sfp->id.base.phys_id != SFP_PHYS_ID_SFP ||
- sfp->id.base.phys_ext_id != SFP_PHYS_EXT_ID_SFP) {
- dev_err(sfp->dev, "module is not SFP - phys id 0x%02x 0x%02x\n",
+ /* Check whether we support this module */
+ if (!sfp->type->module_supported(&sfp->id)) {
+ dev_err(sfp->dev,
+ "module is not supported - phys id 0x%02x 0x%02x\n",
sfp->id.base.phys_id, sfp->id.base.phys_ext_id);
return -EINVAL;
}
@@ -801,6 +842,7 @@ static void sfp_cleanup(void *data)
static int sfp_probe(struct platform_device *pdev)
{
+ const struct sff_data *sff;
struct sfp *sfp;
bool poll = false;
int irq, err, i;
@@ -815,10 +857,19 @@ static int sfp_probe(struct platform_device *pdev)
if (err < 0)
return err;
+ sff = sfp->type = &sfp_data;
+
if (pdev->dev.of_node) {
struct device_node *node = pdev->dev.of_node;
+ const struct of_device_id *id;
struct device_node *np;
+ id = of_match_node(sfp_of_match, node);
+ if (WARN_ON(!id))
+ return -EINVAL;
+
+ sff = sfp->type = id->data;
+
np = of_parse_phandle(node, "i2c-bus", 0);
if (np) {
struct i2c_adapter *i2c;
@@ -834,17 +885,22 @@ static int sfp_probe(struct platform_device *pdev)
return err;
}
}
+ }
- for (i = 0; i < GPIO_MAX; i++) {
+ for (i = 0; i < GPIO_MAX; i++)
+ if (sff->gpios & BIT(i)) {
sfp->gpio[i] = devm_gpiod_get_optional(sfp->dev,
gpio_of_names[i], gpio_flags[i]);
if (IS_ERR(sfp->gpio[i]))
return PTR_ERR(sfp->gpio[i]);
}
- sfp->get_state = sfp_gpio_get_state;
- sfp->set_state = sfp_gpio_set_state;
- }
+ sfp->get_state = sfp_gpio_get_state;
+ sfp->set_state = sfp_gpio_set_state;
+
+ /* Modules that have no detect signal are always present */
+ if (!(sfp->gpio[GPIO_MODDEF0]))
+ sfp->get_state = sff_gpio_get_state;
sfp->sfp_bus = sfp_register_socket(sfp->dev, sfp, &sfp_module_ops);
if (!sfp->sfp_bus)
@@ -899,12 +955,6 @@ static int sfp_remove(struct platform_device *pdev)
return 0;
}
-static const struct of_device_id sfp_of_match[] = {
- { .compatible = "sff,sfp", },
- { },
-};
-MODULE_DEVICE_TABLE(of, sfp_of_match);
-
static struct platform_driver sfp_driver = {
.probe = sfp_probe,
.remove = sfp_remove,
diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c
index 2306bfae057f..be399d645224 100644
--- a/drivers/net/phy/smsc.c
+++ b/drivers/net/phy/smsc.c
@@ -227,8 +227,6 @@ static struct phy_driver smsc_phy_driver[] = {
.probe = smsc_phy_probe,
/* basic functions */
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.config_init = smsc_phy_config_init,
.soft_reset = smsc_phy_reset,
@@ -249,8 +247,6 @@ static struct phy_driver smsc_phy_driver[] = {
.probe = smsc_phy_probe,
/* basic functions */
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.config_init = smsc_phy_config_init,
.soft_reset = smsc_phy_reset,
@@ -276,7 +272,6 @@ static struct phy_driver smsc_phy_driver[] = {
.probe = smsc_phy_probe,
/* basic functions */
- .config_aneg = genphy_config_aneg,
.read_status = lan87xx_read_status,
.config_init = smsc_phy_config_init,
.soft_reset = smsc_phy_reset,
@@ -303,8 +298,6 @@ static struct phy_driver smsc_phy_driver[] = {
.probe = smsc_phy_probe,
/* basic functions */
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.config_init = lan911x_config_init,
/* IRQ related */
@@ -319,12 +312,11 @@ static struct phy_driver smsc_phy_driver[] = {
.name = "SMSC LAN8710/LAN8720",
.features = PHY_BASIC_FEATURES,
- .flags = PHY_HAS_INTERRUPT,
+ .flags = PHY_HAS_INTERRUPT | PHY_RST_AFTER_CLK_EN,
.probe = smsc_phy_probe,
/* basic functions */
- .config_aneg = genphy_config_aneg,
.read_status = lan87xx_read_status,
.config_init = smsc_phy_config_init,
.soft_reset = smsc_phy_reset,
@@ -351,7 +343,6 @@ static struct phy_driver smsc_phy_driver[] = {
.probe = smsc_phy_probe,
/* basic functions */
- .config_aneg = genphy_config_aneg,
.read_status = lan87xx_read_status,
.config_init = smsc_phy_config_init,
.soft_reset = smsc_phy_reset,
diff --git a/drivers/net/phy/ste10Xp.c b/drivers/net/phy/ste10Xp.c
index d00cfb64529e..fbd548a1ad84 100644
--- a/drivers/net/phy/ste10Xp.c
+++ b/drivers/net/phy/ste10Xp.c
@@ -89,8 +89,6 @@ static struct phy_driver ste10xp_pdriver[] = {
.features = PHY_BASIC_FEATURES | SUPPORTED_Pause,
.flags = PHY_HAS_INTERRUPT,
.config_init = ste10Xp_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = ste10Xp_ack_interrupt,
.config_intr = ste10Xp_config_intr,
.suspend = genphy_suspend,
@@ -102,8 +100,6 @@ static struct phy_driver ste10xp_pdriver[] = {
.features = PHY_BASIC_FEATURES | SUPPORTED_Pause,
.flags = PHY_HAS_INTERRUPT,
.config_init = ste10Xp_config_init,
- .config_aneg = genphy_config_aneg,
- .read_status = genphy_read_status,
.ack_interrupt = ste10Xp_ack_interrupt,
.config_intr = ste10Xp_config_intr,
.suspend = genphy_suspend,
diff --git a/drivers/net/phy/uPD60620.c b/drivers/net/phy/uPD60620.c
index 96b33475ea5e..55f48ee3595a 100644
--- a/drivers/net/phy/uPD60620.c
+++ b/drivers/net/phy/uPD60620.c
@@ -95,7 +95,6 @@ static struct phy_driver upd60620_driver[1] = { {
.features = PHY_BASIC_FEATURES,
.flags = 0,
.config_init = upd60620_config_init,
- .config_aneg = genphy_config_aneg,
.read_status = upd60620_read_status,
} };
diff --git a/drivers/net/phy/vitesse.c b/drivers/net/phy/vitesse.c
index f78ff0279648..d9dd8fbfffc7 100644
--- a/drivers/net/phy/vitesse.c
+++ b/drivers/net/phy/vitesse.c
@@ -267,7 +267,6 @@ static struct phy_driver vsc82xx_driver[] = {
.flags = PHY_HAS_INTERRUPT,
.config_init = &vsc824x_config_init,
.config_aneg = &vsc82x4_config_aneg,
- .read_status = &genphy_read_status,
.ack_interrupt = &vsc824x_ack_interrupt,
.config_intr = &vsc82xx_config_intr,
}, {
@@ -278,7 +277,6 @@ static struct phy_driver vsc82xx_driver[] = {
.flags = PHY_HAS_INTERRUPT,
.config_init = &vsc824x_config_init,
.config_aneg = &vsc82x4_config_aneg,
- .read_status = &genphy_read_status,
.ack_interrupt = &vsc824x_ack_interrupt,
.config_intr = &vsc82xx_config_intr,
}, {
@@ -289,7 +287,6 @@ static struct phy_driver vsc82xx_driver[] = {
.flags = PHY_HAS_INTERRUPT,
.config_init = &vsc824x_config_init,
.config_aneg = &vsc82x4_config_aneg,
- .read_status = &genphy_read_status,
.ack_interrupt = &vsc824x_ack_interrupt,
.config_intr = &vsc82xx_config_intr,
}, {
@@ -300,7 +297,6 @@ static struct phy_driver vsc82xx_driver[] = {
.flags = PHY_HAS_INTERRUPT,
.config_init = &vsc824x_config_init,
.config_aneg = &vsc82x4_config_aneg,
- .read_status = &genphy_read_status,
.ack_interrupt = &vsc824x_ack_interrupt,
.config_intr = &vsc82xx_config_intr,
}, {
@@ -311,7 +307,6 @@ static struct phy_driver vsc82xx_driver[] = {
.flags = PHY_HAS_INTERRUPT,
.config_init = &vsc824x_config_init,
.config_aneg = &vsc82x4_config_aneg,
- .read_status = &genphy_read_status,
.ack_interrupt = &vsc824x_ack_interrupt,
.config_intr = &vsc82xx_config_intr,
}, {
@@ -321,8 +316,6 @@ static struct phy_driver vsc82xx_driver[] = {
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.config_init = &vsc8601_config_init,
- .config_aneg = &genphy_config_aneg,
- .read_status = &genphy_read_status,
.ack_interrupt = &vsc824x_ack_interrupt,
.config_intr = &vsc82xx_config_intr,
}, {
@@ -333,7 +326,6 @@ static struct phy_driver vsc82xx_driver[] = {
.flags = PHY_HAS_INTERRUPT,
.config_init = &vsc824x_config_init,
.config_aneg = &vsc82x4_config_aneg,
- .read_status = &genphy_read_status,
.ack_interrupt = &vsc824x_ack_interrupt,
.config_intr = &vsc82xx_config_intr,
}, {
@@ -344,8 +336,6 @@ static struct phy_driver vsc82xx_driver[] = {
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.config_init = &vsc8221_config_init,
- .config_aneg = &genphy_config_aneg,
- .read_status = &genphy_read_status,
.ack_interrupt = &vsc824x_ack_interrupt,
.config_intr = &vsc82xx_config_intr,
}, {
@@ -356,8 +346,6 @@ static struct phy_driver vsc82xx_driver[] = {
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.config_init = &vsc8221_config_init,
- .config_aneg = &genphy_config_aneg,
- .read_status = &genphy_read_status,
.ack_interrupt = &vsc824x_ack_interrupt,
.config_intr = &vsc82xx_config_intr,
} };
diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c
index cc63102ca96e..8940417c30e5 100644
--- a/drivers/net/slip/slip.c
+++ b/drivers/net/slip/slip.c
@@ -731,7 +731,7 @@ static void sl_sync(void)
/* Find a free SLIP channel, and link in this `tty' line. */
-static struct slip *sl_alloc(dev_t line)
+static struct slip *sl_alloc(void)
{
int i;
char name[IFNAMSIZ];
@@ -809,7 +809,7 @@ static int slip_open(struct tty_struct *tty)
/* OK. Find a free SLIP channel to use. */
err = -ENFILE;
- sl = sl_alloc(tty_devnum(tty));
+ sl = sl_alloc();
if (sl == NULL)
goto err_exit;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 4f4a842a1c9c..e367d6310353 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -195,6 +195,11 @@ struct tun_flow_entry {
#define TUN_NUM_FLOW_ENTRIES 1024
+struct tun_steering_prog {
+ struct rcu_head rcu;
+ struct bpf_prog *prog;
+};
+
/* Since the socket were moved to tun_file, to preserve the behavior of persist
* device, socket filter, sndbuf and vnet header size were restore when the
* file were attached to a persist device.
@@ -232,6 +237,7 @@ struct tun_struct {
u32 rx_batched;
struct tun_pcpu_stats __percpu *pcpu_stats;
struct bpf_prog __rcu *xdp_prog;
+ struct tun_steering_prog __rcu *steering_prog;
};
static int tun_napi_receive(struct napi_struct *napi, int budget)
@@ -537,15 +543,12 @@ static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash)
* different rxq no. here. If we could not get rxhash, then we would
* hope the rxq no. may help here.
*/
-static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
- void *accel_priv, select_queue_fallback_t fallback)
+static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
{
- struct tun_struct *tun = netdev_priv(dev);
struct tun_flow_entry *e;
u32 txq = 0;
u32 numqueues = 0;
- rcu_read_lock();
numqueues = READ_ONCE(tun->numqueues);
txq = __skb_get_hash_symmetric(skb);
@@ -563,10 +566,37 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
txq -= numqueues;
}
- rcu_read_unlock();
return txq;
}
+static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb)
+{
+ struct tun_steering_prog *prog;
+ u16 ret = 0;
+
+ prog = rcu_dereference(tun->steering_prog);
+ if (prog)
+ ret = bpf_prog_run_clear_cb(prog->prog, skb);
+
+ return ret % tun->numqueues;
+}
+
+static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
+ void *accel_priv, select_queue_fallback_t fallback)
+{
+ struct tun_struct *tun = netdev_priv(dev);
+ u16 ret;
+
+ rcu_read_lock();
+ if (rcu_dereference(tun->steering_prog))
+ ret = tun_ebpf_select_queue(tun, skb);
+ else
+ ret = tun_automq_select_queue(tun, skb);
+ rcu_read_unlock();
+
+ return ret;
+}
+
static inline bool tun_not_capable(struct tun_struct *tun)
{
const struct cred *cred = current_cred();
@@ -673,7 +703,6 @@ static void tun_detach(struct tun_file *tfile, bool clean)
static void tun_detach_all(struct net_device *dev)
{
struct tun_struct *tun = netdev_priv(dev);
- struct bpf_prog *xdp_prog = rtnl_dereference(tun->xdp_prog);
struct tun_file *tfile, *tmp;
int i, n = tun->numqueues;
@@ -708,9 +737,6 @@ static void tun_detach_all(struct net_device *dev)
}
BUG_ON(tun->numdisabled != 0);
- if (xdp_prog)
- bpf_prog_put(xdp_prog);
-
if (tun->flags & IFF_PERSIST)
module_put(THIS_MODULE);
}
@@ -937,23 +963,10 @@ static int tun_net_close(struct net_device *dev)
}
/* Net device start xmit */
-static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
+static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb)
{
- struct tun_struct *tun = netdev_priv(dev);
- int txq = skb->queue_mapping;
- struct tun_file *tfile;
- u32 numqueues = 0;
-
- rcu_read_lock();
- tfile = rcu_dereference(tun->tfiles[txq]);
- numqueues = READ_ONCE(tun->numqueues);
-
- /* Drop packet if interface is not attached */
- if (txq >= numqueues)
- goto drop;
-
#ifdef CONFIG_RPS
- if (numqueues == 1 && static_key_false(&rps_needed)) {
+ if (tun->numqueues == 1 && static_key_false(&rps_needed)) {
/* Select queue was not called for the skbuff, so we extract the
* RPS hash and save it into the flow_table here.
*/
@@ -969,6 +982,24 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
}
}
#endif
+}
+
+/* Net device start xmit */
+static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct tun_struct *tun = netdev_priv(dev);
+ int txq = skb->queue_mapping;
+ struct tun_file *tfile;
+
+ rcu_read_lock();
+ tfile = rcu_dereference(tun->tfiles[txq]);
+
+ /* Drop packet if interface is not attached */
+ if (txq >= tun->numqueues)
+ goto drop;
+
+ if (!rcu_dereference(tun->steering_prog))
+ tun_automq_xmit(tun, skb);
tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len);
@@ -1551,7 +1582,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
int copylen;
bool zerocopy = false;
int err;
- u32 rxhash;
+ u32 rxhash = 0;
int skb_xdp = 1;
bool frags = tun_napi_frags_enabled(tun);
@@ -1739,7 +1770,10 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
rcu_read_unlock();
}
- rxhash = __skb_get_hash_symmetric(skb);
+ rcu_read_lock();
+ if (!rcu_dereference(tun->steering_prog))
+ rxhash = __skb_get_hash_symmetric(skb);
+ rcu_read_unlock();
if (frags) {
/* Exercise flow dissector code path. */
@@ -1783,7 +1817,9 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
u64_stats_update_end(&stats->syncp);
put_cpu_ptr(stats);
- tun_flow_update(tun, rxhash, tfile);
+ if (rxhash)
+ tun_flow_update(tun, rxhash, tfile);
+
return total_len;
}
@@ -1991,6 +2027,39 @@ static ssize_t tun_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
return ret;
}
+static void tun_steering_prog_free(struct rcu_head *rcu)
+{
+ struct tun_steering_prog *prog = container_of(rcu,
+ struct tun_steering_prog, rcu);
+
+ bpf_prog_destroy(prog->prog);
+ kfree(prog);
+}
+
+static int __tun_set_steering_ebpf(struct tun_struct *tun,
+ struct bpf_prog *prog)
+{
+ struct tun_steering_prog *old, *new = NULL;
+
+ if (prog) {
+ new = kmalloc(sizeof(*new), GFP_KERNEL);
+ if (!new)
+ return -ENOMEM;
+ new->prog = prog;
+ }
+
+ spin_lock_bh(&tun->lock);
+ old = rcu_dereference_protected(tun->steering_prog,
+ lockdep_is_held(&tun->lock));
+ rcu_assign_pointer(tun->steering_prog, new);
+ spin_unlock_bh(&tun->lock);
+
+ if (old)
+ call_rcu(&old->rcu, tun_steering_prog_free);
+
+ return 0;
+}
+
static void tun_free_netdev(struct net_device *dev)
{
struct tun_struct *tun = netdev_priv(dev);
@@ -1999,6 +2068,7 @@ static void tun_free_netdev(struct net_device *dev)
free_percpu(tun->pcpu_stats);
tun_flow_uninit(tun);
security_tun_dev_free_security(tun->security);
+ __tun_set_steering_ebpf(tun, NULL);
}
static void tun_setup(struct net_device *dev)
@@ -2287,6 +2357,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
tun->filter_attached = false;
tun->sndbuf = tfile->socket.sk->sk_sndbuf;
tun->rx_batched = 0;
+ RCU_INIT_POINTER(tun->steering_prog, NULL);
tun->pcpu_stats = netdev_alloc_pcpu_stats(struct tun_pcpu_stats);
if (!tun->pcpu_stats) {
@@ -2479,6 +2550,25 @@ unlock:
return ret;
}
+static int tun_set_steering_ebpf(struct tun_struct *tun, void __user *data)
+{
+ struct bpf_prog *prog;
+ int fd;
+
+ if (copy_from_user(&fd, data, sizeof(fd)))
+ return -EFAULT;
+
+ if (fd == -1) {
+ prog = NULL;
+ } else {
+ prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+ }
+
+ return __tun_set_steering_ebpf(tun, prog);
+}
+
static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
unsigned long arg, int ifreq_len)
{
@@ -2755,6 +2845,10 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
ret = 0;
break;
+ case TUNSETSTEERINGEBPF:
+ ret = tun_set_steering_ebpf(tun, argp);
+ break;
+
default:
ret = -EINVAL;
break;
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 3000ddd1c7e2..cfaa07f230e5 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -826,7 +826,7 @@ err:
static const struct driver_info qmi_wwan_info = {
.description = "WWAN/QMI device",
- .flags = FLAG_WWAN,
+ .flags = FLAG_WWAN | FLAG_SEND_ZLP,
.bind = qmi_wwan_bind,
.unbind = qmi_wwan_unbind,
.manage_power = qmi_wwan_manage_power,
@@ -835,7 +835,7 @@ static const struct driver_info qmi_wwan_info = {
static const struct driver_info qmi_wwan_info_quirk_dtr = {
.description = "WWAN/QMI device",
- .flags = FLAG_WWAN,
+ .flags = FLAG_WWAN | FLAG_SEND_ZLP,
.bind = qmi_wwan_bind,
.unbind = qmi_wwan_unbind,
.manage_power = qmi_wwan_manage_power,
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index f5438d0978ca..a69ad39ee57e 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -410,6 +410,9 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
if (ifmp && (dev->ifindex != 0))
peer->ifindex = ifmp->ifi_index;
+ peer->gso_max_size = dev->gso_max_size;
+ peer->gso_max_segs = dev->gso_max_segs;
+
err = register_netdevice(peer);
put_net(net);
net = NULL;
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 559b215c0169..6fb7b658a6cc 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -261,9 +261,12 @@ static void virtqueue_napi_complete(struct napi_struct *napi,
int opaque;
opaque = virtqueue_enable_cb_prepare(vq);
- if (napi_complete_done(napi, processed) &&
- unlikely(virtqueue_poll(vq, opaque)))
- virtqueue_napi_schedule(napi, vq);
+ if (napi_complete_done(napi, processed)) {
+ if (unlikely(virtqueue_poll(vq, opaque)))
+ virtqueue_napi_schedule(napi, vq);
+ } else {
+ virtqueue_disable_cb(vq);
+ }
}
static void skb_xmit_done(struct virtqueue *vq)
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index 9c51b8be0038..5ba222920e80 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -69,10 +69,10 @@
/*
* Version numbers
*/
-#define VMXNET3_DRIVER_VERSION_STRING "1.4.a.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING "1.4.11.0-k"
/* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
-#define VMXNET3_DRIVER_VERSION_NUM 0x01040a00
+#define VMXNET3_DRIVER_VERSION_NUM 0x01040b00
#if defined(CONFIG_PCI_MSI)
/* RSS only makes sense if MSI-X is supported. */
@@ -416,8 +416,8 @@ struct vmxnet3_adapter {
/* must be a multiple of VMXNET3_RING_SIZE_ALIGN */
#define VMXNET3_DEF_TX_RING_SIZE 512
-#define VMXNET3_DEF_RX_RING_SIZE 256
-#define VMXNET3_DEF_RX_RING2_SIZE 128
+#define VMXNET3_DEF_RX_RING_SIZE 1024
+#define VMXNET3_DEF_RX_RING2_SIZE 256
#define VMXNET3_DEF_RXDATA_DESC_SIZE 128
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index 3481e69738b5..f4c73292b304 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -77,6 +77,10 @@ static int of_mdiobus_register_phy(struct mii_bus *mdio,
if (of_property_read_bool(child, "broken-turn-around"))
mdio->phy_ignore_ta_mask |= 1 << addr;
+ of_property_read_u32(child, "reset-delay-us", &phy->mdio.reset_delay);
+ of_property_read_u32(child, "reset-post-delay-us",
+ &phy->mdio.reset_post_delay);
+
/* Associate the OF node with the device structure so it
* can be looked up later */
of_node_get(child);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 4a7c6864fdf4..764ca7b8840d 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1458,6 +1458,7 @@ struct pci_devres {
unsigned int pinned:1;
unsigned int orig_intx:1;
unsigned int restore_intx:1;
+ unsigned int mwi:1;
u32 region_mask;
};
@@ -1476,6 +1477,9 @@ static void pcim_release(struct device *gendev, void *res)
if (this->region_mask & (1 << i))
pci_release_region(dev, i);
+ if (this->mwi)
+ pci_clear_mwi(dev);
+
if (this->restore_intx)
pci_intx(dev, this->orig_intx);
@@ -3761,6 +3765,27 @@ int pci_set_mwi(struct pci_dev *dev)
EXPORT_SYMBOL(pci_set_mwi);
/**
+ * pcim_set_mwi - a device-managed pci_set_mwi()
+ * @dev: the PCI device for which MWI is enabled
+ *
+ * Managed pci_set_mwi().
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+int pcim_set_mwi(struct pci_dev *dev)
+{
+ struct pci_devres *dr;
+
+ dr = find_pci_dr(dev);
+ if (!dr)
+ return -ENOMEM;
+
+ dr->mwi = 1;
+ return pci_set_mwi(dev);
+}
+EXPORT_SYMBOL(pcim_set_mwi);
+
+/**
* pci_try_set_mwi - enables memory-write-invalidate PCI transaction
* @dev: the PCI device for which MWI is enabled
*
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 11066d8647d2..90af87ff29ba 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1549,16 +1549,13 @@ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
rhashtable_walk_enter(&gl_hash_table, &iter);
do {
- gl = ERR_PTR(rhashtable_walk_start(&iter));
- if (IS_ERR(gl))
- goto walk_stop;
+ rhashtable_walk_start(&iter);
while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl))
if (gl->gl_name.ln_sbd == sdp &&
lockref_get_not_dead(&gl->gl_lockref))
examiner(gl);
-walk_stop:
rhashtable_walk_stop(&iter);
} while (cond_resched(), gl == ERR_PTR(-EAGAIN));
@@ -1947,7 +1944,7 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
loff_t n = *pos;
rhashtable_walk_enter(&gl_hash_table, &gi->hti);
- if (rhashtable_walk_start(&gi->hti) != 0)
+ if (rhashtable_walk_start_check(&gi->hti) != 0)
return NULL;
do {
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 8ff86b4c1b8a..d3339dd48b1a 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -14,6 +14,7 @@
#define PHY_ID_BCM5241 0x0143bc30
#define PHY_ID_BCMAC131 0x0143bc70
#define PHY_ID_BCM5481 0x0143bca0
+#define PHY_ID_BCM5395 0x0143bcf0
#define PHY_ID_BCM54810 0x03625d00
#define PHY_ID_BCM5482 0x0143bcb0
#define PHY_ID_BCM5411 0x00206070
diff --git a/include/linux/dsa/lan9303.h b/include/linux/dsa/lan9303.h
index f48a85c377de..b6514c29563f 100644
--- a/include/linux/dsa/lan9303.h
+++ b/include/linux/dsa/lan9303.h
@@ -26,6 +26,7 @@ struct lan9303 {
bool phy_addr_sel_strap;
struct dsa_switch *ds;
struct mutex indirect_mutex; /* protect indexed register access */
+ struct mutex alr_mutex; /* protect ALR access */
const struct lan9303_phy_ops *ops;
bool is_bridged; /* true if port 1 and 2 are bridged */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 80b5b482cb46..0062302e1285 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -985,6 +985,7 @@ struct bpf_sock_ops_kern {
u32 reply;
u32 replylong[4];
};
+ u32 is_fullsock;
};
#endif /* __LINUX_FILTER_H__ */
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 6c9336626592..93bd6fcd6e62 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -127,28 +127,6 @@ struct hv_ring_buffer_info {
u32 priv_read_index;
};
-/*
- *
- * hv_get_ringbuffer_availbytes()
- *
- * Get number of bytes available to read and to write to
- * for the specified ring buffer
- */
-static inline void
-hv_get_ringbuffer_availbytes(const struct hv_ring_buffer_info *rbi,
- u32 *read, u32 *write)
-{
- u32 read_loc, write_loc, dsize;
-
- /* Capture the read/write indices before they changed */
- read_loc = rbi->ring_buffer->read_index;
- write_loc = rbi->ring_buffer->write_index;
- dsize = rbi->ring_datasize;
-
- *write = write_loc >= read_loc ? dsize - (write_loc - read_loc) :
- read_loc - write_loc;
- *read = dsize - *write;
-}
static inline u32 hv_get_bytes_to_read(const struct hv_ring_buffer_info *rbi)
{
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index bedf54b6f943..4cb7aeeafce0 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -30,10 +30,10 @@ struct macvlan_dev {
enum macvlan_mode mode;
u16 flags;
int nest_level;
+ unsigned int macaddr_count;
#ifdef CONFIG_NET_POLL_CONTROLLER
struct netpoll *netpoll;
#endif
- unsigned int macaddr_count;
};
static inline void macvlan_count_rx(const struct macvlan_dev *vlan,
diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index ca08ab16ecdc..e37c21d8eb19 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -12,6 +12,7 @@
#include <uapi/linux/mdio.h>
#include <linux/mod_devicetable.h>
+struct gpio_desc;
struct mii_bus;
/* Multiple levels of nesting are possible. However typically this is
@@ -39,6 +40,9 @@ struct mdio_device {
/* Bus address of the MDIO device (0-31) */
int addr;
int flags;
+ struct gpio_desc *reset;
+ unsigned int reset_delay;
+ unsigned int reset_post_delay;
};
#define to_mdio_device(d) container_of(d, struct mdio_device, dev)
@@ -71,6 +75,7 @@ void mdio_device_free(struct mdio_device *mdiodev);
struct mdio_device *mdio_device_create(struct mii_bus *bus, int addr);
int mdio_device_register(struct mdio_device *mdiodev);
void mdio_device_remove(struct mdio_device *mdiodev);
+void mdio_device_reset(struct mdio_device *mdiodev, int value);
int mdio_driver_register(struct mdio_driver *drv);
void mdio_driver_unregister(struct mdio_driver *drv);
int mdio_device_bus_match(struct device *dev, struct device_driver *drv);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ef789e1d679e..cc4ce7456e38 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -820,6 +820,8 @@ struct netdev_bpf {
struct {
u8 prog_attached;
u32 prog_id;
+ /* flags with which program was installed */
+ u32 prog_flags;
};
/* BPF_OFFLOAD_VERIFIER_PREP */
struct {
@@ -3330,7 +3332,8 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf);
int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
int fd, u32 flags);
-u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t xdp_op, u32 *prog_id);
+void __dev_xdp_query(struct net_device *dev, bpf_op_t xdp_op,
+ struct netdev_bpf *xdp);
int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index c170c9250c8b..0314e0716c30 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1072,6 +1072,7 @@ int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state);
int pci_set_cacheline_size(struct pci_dev *dev);
#define HAVE_PCI_SET_MWI
int __must_check pci_set_mwi(struct pci_dev *dev);
+int __must_check pcim_set_mwi(struct pci_dev *dev);
int pci_try_set_mwi(struct pci_dev *dev);
void pci_clear_mwi(struct pci_dev *dev);
void pci_intx(struct pci_dev *dev, int enable);
diff --git a/include/linux/phy.h b/include/linux/phy.h
index dc82a07cb4fd..c4b4715caa21 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -59,6 +59,7 @@
#define PHY_HAS_INTERRUPT 0x00000001
#define PHY_IS_INTERNAL 0x00000002
+#define PHY_RST_AFTER_CLK_EN 0x00000004
#define MDIO_DEVICE_IS_PHY 0x80000000
/* Interface Mode definitions */
@@ -468,7 +469,6 @@ struct phy_device {
/* Interrupt and Polling infrastructure */
struct work_struct phy_queue;
struct delayed_work state_queue;
- atomic_t irq_disable;
struct mutex lock;
@@ -497,19 +497,19 @@ struct phy_device {
* flags: A bitfield defining certain other features this PHY
* supports (like interrupts)
*
- * The drivers must implement config_aneg and read_status. All
- * other functions are optional. Note that none of these
- * functions should be called from interrupt time. The goal is
- * for the bus read/write functions to be able to block when the
- * bus transaction is happening, and be freed up by an interrupt
- * (The MPC85xx has this ability, though it is not currently
- * supported in the driver).
+ * All functions are optional. If config_aneg or read_status
+ * are not implemented, the phy core uses the genphy versions.
+ * Note that none of these functions should be called from
+ * interrupt time. The goal is for the bus read/write functions
+ * to be able to block when the bus transaction is happening,
+ * and be freed up by an interrupt (The MPC85xx has this ability,
+ * though it is not currently supported in the driver).
*/
struct phy_driver {
struct mdio_driver_common mdiodrv;
u32 phy_id;
char *name;
- unsigned int phy_id_mask;
+ u32 phy_id_mask;
u32 features;
u32 flags;
const void *driver_data;
@@ -763,6 +763,20 @@ static inline bool phy_interface_mode_is_rgmii(phy_interface_t mode)
};
/**
+ * phy_interface_mode_is_8023z() - does the phy interface mode use 802.3z
+ * negotiation
+ * @mode: one of &enum phy_interface_t
+ *
+ * Returns true if the phy interface mode uses the 16-bit negotiation
+ * word as defined in 802.3z. (See 802.3-2015 37.2.1 Config_Reg encoding)
+ */
+static inline bool phy_interface_mode_is_8023z(phy_interface_t mode)
+{
+ return mode == PHY_INTERFACE_MODE_1000BASEX ||
+ mode == PHY_INTERFACE_MODE_2500BASEX;
+}
+
+/**
* phy_interface_is_rgmii - Convenience function for testing if a PHY interface
* is RGMII (all variants)
* @phydev: the phy_device struct
@@ -840,13 +854,11 @@ int phy_aneg_done(struct phy_device *phydev);
int phy_stop_interrupts(struct phy_device *phydev);
int phy_restart_aneg(struct phy_device *phydev);
+int phy_reset_after_clk_enable(struct phy_device *phydev);
-static inline int phy_read_status(struct phy_device *phydev)
+static inline void phy_device_reset(struct phy_device *phydev, int value)
{
- if (!phydev->drv)
- return -EIO;
-
- return phydev->drv->read_status(phydev);
+ mdio_device_reset(&phydev->mdio, value);
}
#define phydev_err(_phydev, format, args...) \
@@ -890,6 +902,17 @@ int genphy_c45_read_pma(struct phy_device *phydev);
int genphy_c45_pma_setup_forced(struct phy_device *phydev);
int genphy_c45_an_disable_aneg(struct phy_device *phydev);
+static inline int phy_read_status(struct phy_device *phydev)
+{
+ if (!phydev->drv)
+ return -EIO;
+
+ if (phydev->drv->read_status)
+ return phydev->drv->read_status(phydev);
+ else
+ return genphy_read_status(phydev);
+}
+
void phy_driver_unregister(struct phy_driver *drv);
void phy_drivers_unregister(struct phy_driver *drv, int n);
int phy_driver_register(struct phy_driver *new_driver, struct module *owner);
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index af67edd4ae38..bd137c273d38 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -7,6 +7,7 @@
struct device_node;
struct ethtool_cmd;
+struct fwnode_handle;
struct net_device;
enum {
@@ -20,19 +21,31 @@ enum {
MLO_AN_PHY = 0, /* Conventional PHY */
MLO_AN_FIXED, /* Fixed-link mode */
- MLO_AN_SGMII, /* Cisco SGMII protocol */
- MLO_AN_8023Z, /* 1000base-X protocol */
+ MLO_AN_INBAND, /* In-band protocol */
};
static inline bool phylink_autoneg_inband(unsigned int mode)
{
- return mode == MLO_AN_SGMII || mode == MLO_AN_8023Z;
+ return mode == MLO_AN_INBAND;
}
+/**
+ * struct phylink_link_state - link state structure
+ * @advertising: ethtool bitmask containing advertised link modes
+ * @lp_advertising: ethtool bitmask containing link partner advertised link
+ * modes
+ * @interface: link &typedef phy_interface_t mode
+ * @speed: link speed, one of the SPEED_* constants.
+ * @duplex: link duplex mode, one of DUPLEX_* constants.
+ * @pause: link pause state, described by MLO_PAUSE_* constants.
+ * @link: true if the link is up.
+ * @an_enabled: true if autonegotiation is enabled/desired.
+ * @an_complete: true if autonegotiation has completed.
+ */
struct phylink_link_state {
__ETHTOOL_DECLARE_LINK_MODE_MASK(advertising);
__ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertising);
- phy_interface_t interface; /* PHY_INTERFACE_xxx */
+ phy_interface_t interface;
int speed;
int duplex;
int pause;
@@ -41,72 +54,145 @@ struct phylink_link_state {
unsigned int an_complete:1;
};
+/**
+ * struct phylink_mac_ops - MAC operations structure.
+ * @validate: Validate and update the link configuration.
+ * @mac_link_state: Read the current link state from the hardware.
+ * @mac_config: configure the MAC for the selected mode and state.
+ * @mac_an_restart: restart 802.3z BaseX autonegotiation.
+ * @mac_link_down: take the link down.
+ * @mac_link_up: allow the link to come up.
+ *
+ * The individual methods are described more fully below.
+ */
struct phylink_mac_ops {
- /**
- * validate: validate and update the link configuration
- * @ndev: net_device structure associated with MAC
- * @config: configuration to validate
- *
- * Update the %config->supported and %config->advertised masks
- * clearing bits that can not be supported.
- *
- * Note: the PHY may be able to transform from one connection
- * technology to another, so, eg, don't clear 1000BaseX just
- * because the MAC is unable to support it. This is more about
- * clearing unsupported speeds and duplex settings.
- *
- * If the %config->interface mode is %PHY_INTERFACE_MODE_1000BASEX
- * or %PHY_INTERFACE_MODE_2500BASEX, select the appropriate mode
- * based on %config->advertised and/or %config->speed.
- */
void (*validate)(struct net_device *ndev, unsigned long *supported,
struct phylink_link_state *state);
-
- /* Read the current link state from the hardware */
- int (*mac_link_state)(struct net_device *, struct phylink_link_state *);
-
- /* Configure the MAC */
- /**
- * mac_config: configure the MAC for the selected mode and state
- * @ndev: net_device structure for the MAC
- * @mode: one of MLO_AN_FIXED, MLO_AN_PHY, MLO_AN_8023Z, MLO_AN_SGMII
- * @state: state structure
- *
- * The action performed depends on the currently selected mode:
- *
- * %MLO_AN_FIXED, %MLO_AN_PHY:
- * set the specified speed, duplex, pause mode, and phy interface
- * mode in the provided @state.
- * %MLO_AN_8023Z:
- * place the link in 1000base-X mode, advertising the parameters
- * given in advertising in @state.
- * %MLO_AN_SGMII:
- * place the link in Cisco SGMII mode - there is no advertisment
- * to make as the PHY communicates the speed and duplex to the
- * MAC over the in-band control word. Configuration of the pause
- * mode is as per MLO_AN_PHY since this is not included.
- */
+ int (*mac_link_state)(struct net_device *ndev,
+ struct phylink_link_state *state);
void (*mac_config)(struct net_device *ndev, unsigned int mode,
const struct phylink_link_state *state);
-
- /**
- * mac_an_restart: restart 802.3z BaseX autonegotiation
- * @ndev: net_device structure for the MAC
- */
void (*mac_an_restart)(struct net_device *ndev);
-
- void (*mac_link_down)(struct net_device *, unsigned int mode);
- void (*mac_link_up)(struct net_device *, unsigned int mode,
- struct phy_device *);
+ void (*mac_link_down)(struct net_device *ndev, unsigned int mode);
+ void (*mac_link_up)(struct net_device *ndev, unsigned int mode,
+ struct phy_device *phy);
};
-struct phylink *phylink_create(struct net_device *, struct device_node *,
+#if 0 /* For kernel-doc purposes only. */
+/**
+ * validate - Validate and update the link configuration
+ * @ndev: a pointer to a &struct net_device for the MAC.
+ * @supported: ethtool bitmask for supported link modes.
+ * @state: a pointer to a &struct phylink_link_state.
+ *
+ * Clear bits in the @supported and @state->advertising masks that
+ * are not supportable by the MAC.
+ *
+ * Note that the PHY may be able to transform from one connection
+ * technology to another, so, eg, don't clear 1000BaseX just
+ * because the MAC is unable to BaseX mode. This is more about
+ * clearing unsupported speeds and duplex settings.
+ *
+ * If the @state->interface mode is %PHY_INTERFACE_MODE_1000BASEX
+ * or %PHY_INTERFACE_MODE_2500BASEX, select the appropriate mode
+ * based on @state->advertising and/or @state->speed and update
+ * @state->interface accordingly.
+ */
+void validate(struct net_device *ndev, unsigned long *supported,
+ struct phylink_link_state *state);
+
+/**
+ * mac_link_state() - Read the current link state from the hardware
+ * @ndev: a pointer to a &struct net_device for the MAC.
+ * @state: a pointer to a &struct phylink_link_state.
+ *
+ * Read the current link state from the MAC, reporting the current
+ * speed in @state->speed, duplex mode in @state->duplex, pause mode
+ * in @state->pause using the %MLO_PAUSE_RX and %MLO_PAUSE_TX bits,
+ * negotiation completion state in @state->an_complete, and link
+ * up state in @state->link.
+ */
+int mac_link_state(struct net_device *ndev,
+ struct phylink_link_state *state);
+
+/**
+ * mac_config() - configure the MAC for the selected mode and state
+ * @ndev: a pointer to a &struct net_device for the MAC.
+ * @mode: one of %MLO_AN_FIXED, %MLO_AN_PHY, %MLO_AN_INBAND.
+ * @state: a pointer to a &struct phylink_link_state.
+ *
+ * The action performed depends on the currently selected mode:
+ *
+ * %MLO_AN_FIXED, %MLO_AN_PHY:
+ * Configure the specified @state->speed, @state->duplex and
+ * @state->pause (%MLO_PAUSE_TX / %MLO_PAUSE_RX) mode.
+ *
+ * %MLO_AN_INBAND:
+ * place the link in an inband negotiation mode (such as 802.3z
+ * 1000base-X or Cisco SGMII mode depending on the @state->interface
+ * mode). In both cases, link state management (whether the link
+ * is up or not) is performed by the MAC, and reported via the
+ * mac_link_state() callback. Changes in link state must be made
+ * by calling phylink_mac_change().
+ *
+ * If in 802.3z mode, the link speed is fixed, dependent on the
+ * @state->interface. Duplex is negotiated, and pause is advertised
+ * according to @state->an_enabled, @state->pause and
+ * @state->advertising flags. Beware of MACs which only support full
+ * duplex at gigabit and higher speeds.
+ *
+ * If in Cisco SGMII mode, the link speed and duplex mode are passed
+ * in the serial bitstream 16-bit configuration word, and the MAC
+ * should be configured to read these bits and acknowledge the
+ * configuration word. Nothing is advertised by the MAC. The MAC is
+ * responsible for reading the configuration word and configuring
+ * itself accordingly.
+ */
+void mac_config(struct net_device *ndev, unsigned int mode,
+ const struct phylink_link_state *state);
+
+/**
+ * mac_an_restart() - restart 802.3z BaseX autonegotiation
+ * @ndev: a pointer to a &struct net_device for the MAC.
+ */
+void mac_an_restart(struct net_device *ndev);
+
+/**
+ * mac_link_down() - take the link down
+ * @ndev: a pointer to a &struct net_device for the MAC.
+ * @mode: link autonegotiation mode
+ *
+ * If @mode is not an in-band negotiation mode (as defined by
+ * phylink_autoneg_inband()), force the link down and disable any
+ * Energy Efficient Ethernet MAC configuration.
+ */
+void mac_link_down(struct net_device *ndev, unsigned int mode);
+
+/**
+ * mac_link_up() - allow the link to come up
+ * @ndev: a pointer to a &struct net_device for the MAC.
+ * @mode: link autonegotiation mode
+ * @phy: any attached phy
+ *
+ * If @mode is not an in-band negotiation mode (as defined by
+ * phylink_autoneg_inband()), allow the link to come up. If @phy
+ * is non-%NULL, configure Energy Efficient Ethernet by calling
+ * phy_init_eee() and perform appropriate MAC configuration for EEE.
+ */
+void mac_link_up(struct net_device *ndev, unsigned int mode,
+ struct phy_device *phy);
+#endif
+
+struct phylink *phylink_create(struct net_device *, struct fwnode_handle *,
phy_interface_t iface, const struct phylink_mac_ops *ops);
void phylink_destroy(struct phylink *);
int phylink_connect_phy(struct phylink *, struct phy_device *);
-int phylink_of_phy_connect(struct phylink *, struct device_node *);
+int phylink_of_phy_connect(struct phylink *, struct device_node *, u32 flags);
void phylink_disconnect_phy(struct phylink *);
+int phylink_fixed_state_cb(struct phylink *,
+ void (*cb)(struct net_device *dev,
+ struct phylink_link_state *));
void phylink_mac_change(struct phylink *, bool up);
@@ -128,7 +214,6 @@ int phylink_ethtool_set_pauseparam(struct phylink *,
int phylink_ethtool_get_module_info(struct phylink *, struct ethtool_modinfo *);
int phylink_ethtool_get_module_eeprom(struct phylink *,
struct ethtool_eeprom *, u8 *);
-int phylink_init_eee(struct phylink *, bool);
int phylink_get_eee_err(struct phylink *);
int phylink_ethtool_get_eee(struct phylink *, struct ethtool_eee *);
int phylink_ethtool_set_eee(struct phylink *, struct ethtool_eee *);
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 361c08e35dbc..c9df2527e0cd 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -207,6 +207,7 @@ struct rhashtable_iter {
struct rhashtable_walker walker;
unsigned int slot;
unsigned int skip;
+ bool end_of_table;
};
static inline unsigned long rht_marker(const struct rhashtable *ht, u32 hash)
@@ -239,34 +240,42 @@ static inline unsigned int rht_bucket_index(const struct bucket_table *tbl,
return (hash >> RHT_HASH_RESERVED_SPACE) & (tbl->size - 1);
}
-static inline unsigned int rht_key_hashfn(
- struct rhashtable *ht, const struct bucket_table *tbl,
- const void *key, const struct rhashtable_params params)
+static inline unsigned int rht_key_get_hash(struct rhashtable *ht,
+ const void *key, const struct rhashtable_params params,
+ unsigned int hash_rnd)
{
unsigned int hash;
/* params must be equal to ht->p if it isn't constant. */
if (!__builtin_constant_p(params.key_len))
- hash = ht->p.hashfn(key, ht->key_len, tbl->hash_rnd);
+ hash = ht->p.hashfn(key, ht->key_len, hash_rnd);
else if (params.key_len) {
unsigned int key_len = params.key_len;
if (params.hashfn)
- hash = params.hashfn(key, key_len, tbl->hash_rnd);
+ hash = params.hashfn(key, key_len, hash_rnd);
else if (key_len & (sizeof(u32) - 1))
- hash = jhash(key, key_len, tbl->hash_rnd);
+ hash = jhash(key, key_len, hash_rnd);
else
- hash = jhash2(key, key_len / sizeof(u32),
- tbl->hash_rnd);
+ hash = jhash2(key, key_len / sizeof(u32), hash_rnd);
} else {
unsigned int key_len = ht->p.key_len;
if (params.hashfn)
- hash = params.hashfn(key, key_len, tbl->hash_rnd);
+ hash = params.hashfn(key, key_len, hash_rnd);
else
- hash = jhash(key, key_len, tbl->hash_rnd);
+ hash = jhash(key, key_len, hash_rnd);
}
+ return hash;
+}
+
+static inline unsigned int rht_key_hashfn(
+ struct rhashtable *ht, const struct bucket_table *tbl,
+ const void *key, const struct rhashtable_params params)
+{
+ unsigned int hash = rht_key_get_hash(ht, key, params, tbl->hash_rnd);
+
return rht_bucket_index(tbl, hash);
}
@@ -378,8 +387,15 @@ void *rhashtable_insert_slow(struct rhashtable *ht, const void *key,
void rhashtable_walk_enter(struct rhashtable *ht,
struct rhashtable_iter *iter);
void rhashtable_walk_exit(struct rhashtable_iter *iter);
-int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU);
+int rhashtable_walk_start_check(struct rhashtable_iter *iter) __acquires(RCU);
+
+static inline void rhashtable_walk_start(struct rhashtable_iter *iter)
+{
+ (void)rhashtable_walk_start_check(iter);
+}
+
void *rhashtable_walk_next(struct rhashtable_iter *iter);
+void *rhashtable_walk_peek(struct rhashtable_iter *iter);
void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU);
void rhashtable_free_and_destroy(struct rhashtable *ht,
diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index da803dfc7a39..b36c76635f18 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -102,11 +102,15 @@ enum sctp_cid {
/* AUTH Extension Section 4.1 */
SCTP_CID_AUTH = 0x0F,
+ /* sctp ndata 5.1. I-DATA */
+ SCTP_CID_I_DATA = 0x40,
+
/* PR-SCTP Sec 3.2 */
SCTP_CID_FWD_TSN = 0xC0,
/* Use hex, as defined in ADDIP sec. 3.1 */
SCTP_CID_ASCONF = 0xC1,
+ SCTP_CID_I_FWD_TSN = 0xC2,
SCTP_CID_ASCONF_ACK = 0x80,
SCTP_CID_RECONF = 0x82,
}; /* enum */
@@ -240,6 +244,23 @@ struct sctp_data_chunk {
struct sctp_datahdr data_hdr;
};
+struct sctp_idatahdr {
+ __be32 tsn;
+ __be16 stream;
+ __be16 reserved;
+ __be32 mid;
+ union {
+ __u32 ppid;
+ __be32 fsn;
+ };
+ __u8 payload[0];
+};
+
+struct sctp_idata_chunk {
+ struct sctp_chunkhdr chunk_hdr;
+ struct sctp_idatahdr data_hdr;
+};
+
/* DATA Chuck Specific Flags */
enum {
SCTP_DATA_MIDDLE_FRAG = 0x00,
@@ -596,6 +617,22 @@ struct sctp_fwdtsn_chunk {
struct sctp_fwdtsn_hdr fwdtsn_hdr;
};
+struct sctp_ifwdtsn_skip {
+ __be16 stream;
+ __u8 reserved;
+ __u8 flags;
+ __be32 mid;
+};
+
+struct sctp_ifwdtsn_hdr {
+ __be32 new_cum_tsn;
+ struct sctp_ifwdtsn_skip skip[0];
+};
+
+struct sctp_ifwdtsn_chunk {
+ struct sctp_chunkhdr chunk_hdr;
+ struct sctp_ifwdtsn_hdr fwdtsn_hdr;
+};
/* ADDIP
* Section 3.1.1 Address Configuration Change Chunk (ASCONF)
diff --git a/include/linux/sfp.h b/include/linux/sfp.h
index 4a906f560817..0c5c5f6ae1ec 100644
--- a/include/linux/sfp.h
+++ b/include/linux/sfp.h
@@ -3,7 +3,7 @@
#include <linux/phy.h>
-struct __packed sfp_eeprom_base {
+struct sfp_eeprom_base {
u8 phys_id;
u8 phys_ext_id;
u8 connector;
@@ -166,12 +166,12 @@ struct __packed sfp_eeprom_base {
union {
__be16 optical_wavelength;
u8 cable_spec;
- };
+ } __packed;
u8 reserved62;
u8 cc_base;
-};
+} __packed;
-struct __packed sfp_eeprom_ext {
+struct sfp_eeprom_ext {
__be16 options;
u8 br_max;
u8 br_min;
@@ -181,12 +181,21 @@ struct __packed sfp_eeprom_ext {
u8 enhopts;
u8 sff8472_compliance;
u8 cc_ext;
-};
-
-struct __packed sfp_eeprom_id {
+} __packed;
+
+/**
+ * struct sfp_eeprom_id - raw SFP module identification information
+ * @base: base SFP module identification structure
+ * @ext: extended SFP module identification structure
+ *
+ * See the SFF-8472 specification and related documents for the definition
+ * of these structure members. This can be obtained from
+ * ftp://ftp.seagate.com/sff
+ */
+struct sfp_eeprom_id {
struct sfp_eeprom_base base;
struct sfp_eeprom_ext ext;
-};
+} __packed;
/* SFP EEPROM registers */
enum {
@@ -222,6 +231,7 @@ enum {
SFP_SFF8472_COMPLIANCE = 0x5e,
SFP_CC_EXT = 0x5f,
+ SFP_PHYS_ID_SFF = 0x02,
SFP_PHYS_ID_SFP = 0x03,
SFP_PHYS_EXT_ID_SFP = 0x04,
SFP_CONNECTOR_UNSPEC = 0x00,
@@ -347,19 +357,32 @@ enum {
SFP_PAGE = 0x7f,
};
-struct device_node;
+struct fwnode_handle;
struct ethtool_eeprom;
struct ethtool_modinfo;
struct net_device;
struct sfp_bus;
+/**
+ * struct sfp_upstream_ops - upstream operations structure
+ * @module_insert: called after a module has been detected to determine
+ * whether the module is supported for the upstream device.
+ * @module_remove: called after the module has been removed.
+ * @link_down: called when the link is non-operational for whatever
+ * reason.
+ * @link_up: called when the link is operational.
+ * @connect_phy: called when an I2C accessible PHY has been detected
+ * on the module.
+ * @disconnect_phy: called when a module with an I2C accessible PHY has
+ * been removed.
+ */
struct sfp_upstream_ops {
- int (*module_insert)(void *, const struct sfp_eeprom_id *id);
- void (*module_remove)(void *);
- void (*link_down)(void *);
- void (*link_up)(void *);
- int (*connect_phy)(void *, struct phy_device *);
- void (*disconnect_phy)(void *);
+ int (*module_insert)(void *priv, const struct sfp_eeprom_id *id);
+ void (*module_remove)(void *priv);
+ void (*link_down)(void *priv);
+ void (*link_up)(void *priv);
+ int (*connect_phy)(void *priv, struct phy_device *);
+ void (*disconnect_phy)(void *priv);
};
#if IS_ENABLED(CONFIG_SFP)
@@ -375,7 +398,7 @@ int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee,
u8 *data);
void sfp_upstream_start(struct sfp_bus *bus);
void sfp_upstream_stop(struct sfp_bus *bus);
-struct sfp_bus *sfp_register_upstream(struct device_node *np,
+struct sfp_bus *sfp_register_upstream(struct fwnode_handle *fwnode,
struct net_device *ndev, void *upstream,
const struct sfp_upstream_ops *ops);
void sfp_unregister_upstream(struct sfp_bus *bus);
@@ -419,7 +442,8 @@ static inline void sfp_upstream_stop(struct sfp_bus *bus)
{
}
-static inline struct sfp_bus *sfp_register_upstream(struct device_node *np,
+static inline struct sfp_bus *sfp_register_upstream(
+ struct fwnode_handle *fwnode,
struct net_device *ndev, void *upstream,
const struct sfp_upstream_ops *ops)
{
diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h
index 8621ffdeecbf..c7addf37d119 100644
--- a/include/linux/skb_array.h
+++ b/include/linux/skb_array.h
@@ -72,6 +72,11 @@ static inline bool __skb_array_empty(struct skb_array *a)
return !__ptr_ring_peek(&a->ring);
}
+static inline struct sk_buff *__skb_array_peek(struct skb_array *a)
+{
+ return __ptr_ring_peek(&a->ring);
+}
+
static inline bool skb_array_empty(struct skb_array *a)
{
return ptr_ring_empty(&a->ring);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a38c80e9f91e..b8e0da6c27d6 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1211,6 +1211,11 @@ static inline bool skb_flow_dissect_flow_keys_buf(struct flow_keys *flow,
data, proto, nhoff, hlen, flags);
}
+void
+skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container);
+
static inline __u32 skb_get_hash(struct sk_buff *skb)
{
if (!skb->l4_hash && !skb->sw_hash)
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 3bf273538840..4894d322d258 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -409,4 +409,10 @@ extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock);
#define atomic_dec_and_lock(atomic, lock) \
__cond_lock(lock, _atomic_dec_and_lock(atomic, lock))
+int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask,
+ size_t max_size, unsigned int cpu_mult,
+ gfp_t gfp);
+
+void free_bucket_spinlocks(spinlock_t *locks);
+
#endif /* __LINUX_SPINLOCK_H */
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index ca4a6361389b..4f93f0953c41 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -344,7 +344,7 @@ struct tcp_sock {
/* Receiver queue space */
struct {
- int space;
+ u32 space;
u32 seq;
u64 time;
} rcvq_space;
diff --git a/include/net/act_api.h b/include/net/act_api.h
index fd08df74c466..6ed9692f20bd 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -86,7 +86,7 @@ struct tc_action_ops {
int (*act)(struct sk_buff *, const struct tc_action *,
struct tcf_result *);
int (*dump)(struct sk_buff *, struct tc_action *, int, int);
- void (*cleanup)(struct tc_action *, int bind);
+ void (*cleanup)(struct tc_action *);
int (*lookup)(struct net *, struct tc_action **, u32);
int (*init)(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **act, int ovr,
@@ -120,12 +120,19 @@ int tc_action_net_init(struct tc_action_net *tn,
void tcf_idrinfo_destroy(const struct tc_action_ops *ops,
struct tcf_idrinfo *idrinfo);
-static inline void tc_action_net_exit(struct tc_action_net *tn)
+static inline void tc_action_net_exit(struct list_head *net_list,
+ unsigned int id)
{
+ struct net *net;
+
rtnl_lock();
- tcf_idrinfo_destroy(tn->ops, tn->idrinfo);
+ list_for_each_entry(net, net_list, exit_list) {
+ struct tc_action_net *tn = net_generic(net, id);
+
+ tcf_idrinfo_destroy(tn->ops, tn->idrinfo);
+ kfree(tn->idrinfo);
+ }
rtnl_unlock();
- kfree(tn->idrinfo);
}
int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index b623b65a79d1..c4185a7b0e90 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -180,7 +180,7 @@ static inline int addrconf_finite_timeout(unsigned long timeout)
*/
int ipv6_addr_label_init(void);
void ipv6_addr_label_cleanup(void);
-void ipv6_addr_label_rtnl_register(void);
+int ipv6_addr_label_rtnl_register(void);
u32 ipv6_addr_label(struct net *net, const struct in6_addr *addr,
int type, int ifindex);
diff --git a/include/net/dn_route.h b/include/net/dn_route.h
index 55df9939bca2..342d2503cba5 100644
--- a/include/net/dn_route.h
+++ b/include/net/dn_route.h
@@ -69,6 +69,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev,
*/
struct dn_route {
struct dst_entry dst;
+ struct dn_route __rcu *dn_next;
struct neighbour *n;
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 2a05738570d8..6cb602dd970c 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -296,31 +296,39 @@ static inline u32 dsa_user_ports(struct dsa_switch *ds)
return mask;
}
-static inline u8 dsa_upstream_port(struct dsa_switch *ds)
+/* Return the local port used to reach an arbitrary switch port */
+static inline unsigned int dsa_towards_port(struct dsa_switch *ds, int device,
+ int port)
{
- struct dsa_switch_tree *dst = ds->dst;
-
- /*
- * If this is the root switch (i.e. the switch that connects
- * to the CPU), return the cpu port number on this switch.
- * Else return the (DSA) port number that connects to the
- * switch that is one hop closer to the cpu.
- */
- if (dst->cpu_dp->ds == ds)
- return dst->cpu_dp->index;
+ if (device == ds->index)
+ return port;
else
- return ds->rtable[dst->cpu_dp->ds->index];
+ return ds->rtable[device];
+}
+
+/* Return the local port used to reach the dedicated CPU port */
+static inline unsigned int dsa_upstream_port(struct dsa_switch *ds, int port)
+{
+ const struct dsa_port *dp = dsa_to_port(ds, port);
+ const struct dsa_port *cpu_dp = dp->cpu_dp;
+
+ if (!cpu_dp)
+ return port;
+
+ return dsa_towards_port(ds, cpu_dp->ds->index, cpu_dp->index);
}
typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid,
bool is_static, void *data);
struct dsa_switch_ops {
+#if IS_ENABLED(CONFIG_NET_DSA_LEGACY)
/*
* Legacy probing.
*/
const char *(*probe)(struct device *dsa_dev,
struct device *host_dev, int sw_addr,
void **priv);
+#endif
enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds,
int port);
@@ -412,12 +420,10 @@ struct dsa_switch_ops {
*/
int (*port_vlan_filtering)(struct dsa_switch *ds, int port,
bool vlan_filtering);
- int (*port_vlan_prepare)(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_vlan *vlan,
- struct switchdev_trans *trans);
- void (*port_vlan_add)(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_vlan *vlan,
- struct switchdev_trans *trans);
+ int (*port_vlan_prepare)(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_vlan *vlan);
+ void (*port_vlan_add)(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_vlan *vlan);
int (*port_vlan_del)(struct dsa_switch *ds, int port,
const struct switchdev_obj_port_vlan *vlan);
/*
@@ -433,12 +439,10 @@ struct dsa_switch_ops {
/*
* Multicast database
*/
- int (*port_mdb_prepare)(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_mdb *mdb,
- struct switchdev_trans *trans);
- void (*port_mdb_add)(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_mdb *mdb,
- struct switchdev_trans *trans);
+ int (*port_mdb_prepare)(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_mdb *mdb);
+ void (*port_mdb_add)(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_mdb *mdb);
int (*port_mdb_del)(struct dsa_switch *ds, int port,
const struct switchdev_obj_port_mdb *mdb);
/*
@@ -472,11 +476,20 @@ struct dsa_switch_driver {
const struct dsa_switch_ops *ops;
};
+#if IS_ENABLED(CONFIG_NET_DSA_LEGACY)
/* Legacy driver registration */
void register_switch_driver(struct dsa_switch_driver *type);
void unregister_switch_driver(struct dsa_switch_driver *type);
struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev);
+#else
+static inline void register_switch_driver(struct dsa_switch_driver *type) { }
+static inline void unregister_switch_driver(struct dsa_switch_driver *type) { }
+static inline struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev)
+{
+ return NULL;
+}
+#endif
struct net_device *dsa_dev_to_net_device(struct device *dev);
/* Keep inline for faster access in hot path */
diff --git a/include/net/dst.h b/include/net/dst.h
index b091fd536098..33d2a5433924 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -34,13 +34,9 @@ struct sk_buff;
struct dst_entry {
struct net_device *dev;
- struct rcu_head rcu_head;
- struct dst_entry *child;
struct dst_ops *ops;
unsigned long _metrics;
unsigned long expires;
- struct dst_entry *path;
- struct dst_entry *from;
#ifdef CONFIG_XFRM
struct xfrm_state *xfrm;
#else
@@ -59,8 +55,6 @@ struct dst_entry {
#define DST_XFRM_QUEUE 0x0040
#define DST_METADATA 0x0080
- short error;
-
/* A non-zero value of dst->obsolete forces by-hand validation
* of the route entry. Positive values are set by the generic
* dst layer to indicate that the entry has been forcefully
@@ -76,35 +70,24 @@ struct dst_entry {
#define DST_OBSOLETE_KILL -2
unsigned short header_len; /* more space at head required */
unsigned short trailer_len; /* space to reserve at tail */
- unsigned short __pad3;
-#ifdef CONFIG_IP_ROUTE_CLASSID
- __u32 tclassid;
-#else
- __u32 __pad2;
-#endif
-
-#ifdef CONFIG_64BIT
- /*
- * Align __refcnt to a 64 bytes alignment
- * (L1_CACHE_SIZE would be too much)
- */
- long __pad_to_align_refcnt[2];
-#endif
/*
* __refcnt wants to be on a different cache line from
* input/output/ops or performance tanks badly
*/
- atomic_t __refcnt; /* client references */
+#ifdef CONFIG_64BIT
+ atomic_t __refcnt; /* 64-bit offset 64 */
+#endif
int __use;
unsigned long lastuse;
struct lwtunnel_state *lwtstate;
- union {
- struct dst_entry *next;
- struct rtable __rcu *rt_next;
- struct rt6_info __rcu *rt6_next;
- struct dn_route __rcu *dn_next;
- };
+ struct rcu_head rcu_head;
+ short error;
+ short __pad;
+ __u32 tclassid;
+#ifndef CONFIG_64BIT
+ atomic_t __refcnt; /* 32-bit offset 64 */
+#endif
};
struct dst_metrics {
@@ -250,7 +233,7 @@ static inline void dst_hold(struct dst_entry *dst)
{
/*
* If your kernel compilation stops here, please check
- * __pad_to_align_refcnt declaration in struct dst_entry
+ * the placement of __refcnt in struct dst_entry
*/
BUILD_BUG_ON(offsetof(struct dst_entry, __refcnt) & 63);
WARN_ON(atomic_inc_not_zero(&dst->__refcnt) == 0);
diff --git a/include/net/erspan.h b/include/net/erspan.h
index ca94fc86865e..acdf6843095d 100644
--- a/include/net/erspan.h
+++ b/include/net/erspan.h
@@ -15,7 +15,7 @@
* s, Recur, Flags, Version fields only S (bit 03) is set to 1. The
* other fields are set to zero, so only a sequence number follows.
*
- * ERSPAN Type II header (8 octets [42:49])
+ * ERSPAN Version 1 (Type II) header (8 octets [42:49])
* 0 1 2 3
* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@@ -24,11 +24,29 @@
* | Reserved | Index |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*
+ *
+ * ERSPAN Version 2 (Type III) header (12 octets [42:49])
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Ver | VLAN | COS |BSO|T| Session ID |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Timestamp |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | SGT |P| FT | Hw ID |D|Gra|O|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Platform Specific SubHeader (8 octets, optional)
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Platf ID | Platform Specific Info |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Platform Specific Info |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
* GRE proto ERSPAN type II = 0x88BE, type III = 0x22EB
*/
-#define ERSPAN_VERSION 0x1
-
+#define ERSPAN_VERSION 0x1 /* ERSPAN type II */
#define VER_MASK 0xf000
#define VLAN_MASK 0x0fff
#define COS_MASK 0xe000
@@ -37,6 +55,28 @@
#define ID_MASK 0x03ff
#define INDEX_MASK 0xfffff
+#define ERSPAN_VERSION2 0x2 /* ERSPAN type III*/
+#define BSO_MASK EN_MASK
+#define SGT_MASK 0xffff0000
+#define P_MASK 0x8000
+#define FT_MASK 0x7c00
+#define HWID_MASK 0x03f0
+#define DIR_MASK 0x0008
+#define GRA_MASK 0x0006
+#define O_MASK 0x0001
+
+/* ERSPAN version 2 metadata header */
+struct erspan_md2 {
+ __be32 timestamp;
+ __be16 sgt; /* security group tag */
+ __be16 flags;
+#define P_OFFSET 15
+#define FT_OFFSET 10
+#define HWID_OFFSET 4
+#define DIR_OFFSET 3
+#define GRA_OFFSET 1
+};
+
enum erspan_encap_type {
ERSPAN_ENCAP_NOVLAN = 0x0, /* originally without VLAN tag */
ERSPAN_ENCAP_ISL = 0x1, /* originally ISL encapsulated */
@@ -44,18 +84,159 @@ enum erspan_encap_type {
ERSPAN_ENCAP_INFRAME = 0x3, /* VLAN tag perserved in frame */
};
+#define ERSPAN_V1_MDSIZE 4
+#define ERSPAN_V2_MDSIZE 8
struct erspan_metadata {
- __be32 index; /* type II */
+ union {
+ __be32 index; /* Version 1 (type II)*/
+ struct erspan_md2 md2; /* Version 2 (type III) */
+ } u;
+ int version;
};
-struct erspanhdr {
+struct erspan_base_hdr {
__be16 ver_vlan;
#define VER_OFFSET 12
__be16 session_id;
#define COS_OFFSET 13
#define EN_OFFSET 11
+#define BSO_OFFSET EN_OFFSET
#define T_OFFSET 10
- struct erspan_metadata md;
};
+static inline int erspan_hdr_len(int version)
+{
+ return sizeof(struct erspan_base_hdr) +
+ (version == 1 ? ERSPAN_V1_MDSIZE : ERSPAN_V2_MDSIZE);
+}
+
+static inline u8 tos_to_cos(u8 tos)
+{
+ u8 dscp, cos;
+
+ dscp = tos >> 2;
+ cos = dscp >> 3;
+ return cos;
+}
+
+static inline void erspan_build_header(struct sk_buff *skb,
+ __be32 id, u32 index,
+ bool truncate, bool is_ipv4)
+{
+ struct ethhdr *eth = eth_hdr(skb);
+ enum erspan_encap_type enc_type;
+ struct erspan_base_hdr *ershdr;
+ struct erspan_metadata *ersmd;
+ struct qtag_prefix {
+ __be16 eth_type;
+ __be16 tci;
+ } *qp;
+ u16 vlan_tci = 0;
+ u8 tos;
+
+ tos = is_ipv4 ? ip_hdr(skb)->tos :
+ (ipv6_hdr(skb)->priority << 4) +
+ (ipv6_hdr(skb)->flow_lbl[0] >> 4);
+
+ enc_type = ERSPAN_ENCAP_NOVLAN;
+
+ /* If mirrored packet has vlan tag, extract tci and
+ * perserve vlan header in the mirrored frame.
+ */
+ if (eth->h_proto == htons(ETH_P_8021Q)) {
+ qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN);
+ vlan_tci = ntohs(qp->tci);
+ enc_type = ERSPAN_ENCAP_INFRAME;
+ }
+
+ skb_push(skb, sizeof(*ershdr) + ERSPAN_V1_MDSIZE);
+ ershdr = (struct erspan_base_hdr *)skb->data;
+ memset(ershdr, 0, sizeof(*ershdr) + ERSPAN_V1_MDSIZE);
+
+ /* Build base header */
+ ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) |
+ (ERSPAN_VERSION << VER_OFFSET));
+ ershdr->session_id = htons((u16)(ntohl(id) & ID_MASK) |
+ ((tos_to_cos(tos) << COS_OFFSET) & COS_MASK) |
+ (enc_type << EN_OFFSET & EN_MASK) |
+ ((truncate << T_OFFSET) & T_MASK));
+
+ /* Build metadata */
+ ersmd = (struct erspan_metadata *)(ershdr + 1);
+ ersmd->u.index = htonl(index & INDEX_MASK);
+}
+
+/* ERSPAN GRA: timestamp granularity
+ * 00b --> granularity = 100 microseconds
+ * 01b --> granularity = 100 nanoseconds
+ * 10b --> granularity = IEEE 1588
+ * Here we only support 100 microseconds.
+ */
+static inline __be32 erspan_get_timestamp(void)
+{
+ u64 h_usecs;
+ ktime_t kt;
+
+ kt = ktime_get_real();
+ h_usecs = ktime_divns(kt, 100 * NSEC_PER_USEC);
+
+ /* ERSPAN base header only has 32-bit,
+ * so it wraps around 4 days.
+ */
+ return htonl((u32)h_usecs);
+}
+
+static inline void erspan_build_header_v2(struct sk_buff *skb,
+ __be32 id, u8 direction, u16 hwid,
+ bool truncate, bool is_ipv4)
+{
+ struct ethhdr *eth = eth_hdr(skb);
+ struct erspan_base_hdr *ershdr;
+ struct erspan_metadata *md;
+ struct qtag_prefix {
+ __be16 eth_type;
+ __be16 tci;
+ } *qp;
+ u16 vlan_tci = 0;
+ u16 session_id;
+ u8 gra = 0; /* 100 usec */
+ u8 bso = 0; /* Bad/Short/Oversized */
+ u8 sgt = 0;
+ u8 tos;
+
+ tos = is_ipv4 ? ip_hdr(skb)->tos :
+ (ipv6_hdr(skb)->priority << 4) +
+ (ipv6_hdr(skb)->flow_lbl[0] >> 4);
+
+ /* Unlike v1, v2 does not have En field,
+ * so only extract vlan tci field.
+ */
+ if (eth->h_proto == htons(ETH_P_8021Q)) {
+ qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN);
+ vlan_tci = ntohs(qp->tci);
+ }
+
+ skb_push(skb, sizeof(*ershdr) + ERSPAN_V2_MDSIZE);
+ ershdr = (struct erspan_base_hdr *)skb->data;
+ memset(ershdr, 0, sizeof(*ershdr) + ERSPAN_V2_MDSIZE);
+
+ /* Build base header */
+ ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) |
+ (ERSPAN_VERSION2 << VER_OFFSET));
+ session_id = (u16)(ntohl(id) & ID_MASK) |
+ ((tos_to_cos(tos) << COS_OFFSET) & COS_MASK) |
+ (bso << BSO_OFFSET & BSO_MASK) |
+ ((truncate << T_OFFSET) & T_MASK);
+ ershdr->session_id = htons(session_id);
+
+ /* Build metadata */
+ md = (struct erspan_metadata *)(ershdr + 1);
+ md->u.md2.timestamp = erspan_get_timestamp();
+ md->u.md2.sgt = htons(sgt);
+ md->u.md2.flags = htons(((1 << P_OFFSET) & P_MASK) |
+ ((hwid << HWID_OFFSET) & HWID_MASK) |
+ ((direction << DIR_OFFSET) & DIR_MASK) |
+ ((gra << GRA_OFFSET) & GRA_MASK));
+}
+
#endif
diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
index 304f7aa9cc01..0304ba2ae353 100644
--- a/include/net/gen_stats.h
+++ b/include/net/gen_stats.h
@@ -49,6 +49,9 @@ int gnet_stats_copy_rate_est(struct gnet_dump *d,
int gnet_stats_copy_queue(struct gnet_dump *d,
struct gnet_stats_queue __percpu *cpu_q,
struct gnet_stats_queue *q, __u32 qlen);
+void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
+ const struct gnet_stats_queue __percpu *cpu_q,
+ const struct gnet_stats_queue *q, __u32 qlen);
int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len);
int gnet_stats_finish_copy(struct gnet_dump *d);
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 0358745ea059..8e1bf9ae4a5e 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -77,6 +77,7 @@ struct inet_connection_sock_af_ops {
* @icsk_af_ops Operations which are AF_INET{4,6} specific
* @icsk_ulp_ops Pluggable ULP control hook
* @icsk_ulp_data ULP private data
+ * @icsk_listen_portaddr_node hash to the portaddr listener hashtable
* @icsk_ca_state: Congestion control state
* @icsk_retransmits: Number of unrecovered [RTO] timeouts
* @icsk_pending: Scheduled timer event
@@ -101,6 +102,7 @@ struct inet_connection_sock {
const struct inet_connection_sock_af_ops *icsk_af_ops;
const struct tcp_ulp_ops *icsk_ulp_ops;
void *icsk_ulp_data;
+ struct hlist_node icsk_listen_portaddr_node;
unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
__u8 icsk_ca_state:6,
icsk_ca_setsockopt:1,
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 2dbbbff5e1e3..9141e95529e7 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -111,6 +111,7 @@ struct inet_bind_hashbucket {
*/
struct inet_listen_hashbucket {
spinlock_t lock;
+ unsigned int count;
struct hlist_head head;
};
@@ -132,12 +133,13 @@ struct inet_hashinfo {
/* Ok, let's try this, I give up, we do need a local binding
* TCP hash as well as the others for fast bind/connect.
*/
+ struct kmem_cache *bind_bucket_cachep;
struct inet_bind_hashbucket *bhash;
-
unsigned int bhash_size;
- /* 4 bytes hole on 64 bit */
- struct kmem_cache *bind_bucket_cachep;
+ /* The 2nd listener table hashed by local port and address */
+ unsigned int lhash2_mask;
+ struct inet_listen_hashbucket *lhash2;
/* All the above members are written once at bootup and
* never written again _or_ are predominantly read-access.
@@ -145,14 +147,25 @@ struct inet_hashinfo {
* Now align to a new cache line as all the following members
* might be often dirty.
*/
- /* All sockets in TCP_LISTEN state will be in here. This is the only
- * table where wildcard'd TCP sockets can exist. Hash function here
- * is just local port number.
+ /* All sockets in TCP_LISTEN state will be in listening_hash.
+ * This is the only table where wildcard'd TCP sockets can
+ * exist. listening_hash is only hashed by local port number.
+ * If lhash2 is initialized, the same socket will also be hashed
+ * to lhash2 by port and address.
*/
struct inet_listen_hashbucket listening_hash[INET_LHTABLE_SIZE]
____cacheline_aligned_in_smp;
};
+#define inet_lhash2_for_each_icsk_rcu(__icsk, list) \
+ hlist_for_each_entry_rcu(__icsk, list, icsk_listen_portaddr_node)
+
+static inline struct inet_listen_hashbucket *
+inet_lhash2_bucket(struct inet_hashinfo *h, u32 hash)
+{
+ return &h->lhash2[hash & h->lhash2_mask];
+}
+
static inline struct inet_ehash_bucket *inet_ehash_bucket(
struct inet_hashinfo *hashinfo,
unsigned int hash)
@@ -208,6 +221,10 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child);
void inet_put_port(struct sock *sk);
void inet_hashinfo_init(struct inet_hashinfo *h);
+void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
+ unsigned long numentries, int scale,
+ unsigned long low_limit,
+ unsigned long high_limit);
bool inet_ehash_insert(struct sock *sk, struct sock *osk);
bool inet_ehash_nolisten(struct sock *sk, struct sock *osk);
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 1356fa6a7566..899495589a7e 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -93,8 +93,8 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
struct inet_timewait_death_row *dr,
const int state);
-void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
- struct inet_hashinfo *hashinfo);
+void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
+ struct inet_hashinfo *hashinfo);
void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo,
bool rearm);
diff --git a/include/net/ip.h b/include/net/ip.h
index af8addbaa3c1..746abff9ce51 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -26,12 +26,14 @@
#include <linux/ip.h>
#include <linux/in.h>
#include <linux/skbuff.h>
+#include <linux/jhash.h>
#include <net/inet_sock.h>
#include <net/route.h>
#include <net/snmp.h>
#include <net/flow.h>
#include <net/flow_dissector.h>
+#include <net/netns/hash.h>
#define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */
#define IPV4_MIN_MTU 68 /* RFC 791 */
@@ -522,6 +524,13 @@ static inline unsigned int ipv4_addr_hash(__be32 ip)
return (__force unsigned int) ip;
}
+static inline u32 ipv4_portaddr_hash(const struct net *net,
+ __be32 saddr,
+ unsigned int port)
+{
+ return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port;
+}
+
bool ip_call_ra_chain(struct sk_buff *skb);
/*
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 10c913816032..44d96a91e745 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -129,6 +129,8 @@ struct rt6_exception {
struct rt6_info {
struct dst_entry dst;
+ struct rt6_info __rcu *rt6_next;
+ struct rt6_info *from;
/*
* Tail elements of dst_entry (__refcnt etc.)
@@ -176,11 +178,11 @@ struct rt6_info {
#define for_each_fib6_node_rt_rcu(fn) \
for (rt = rcu_dereference((fn)->leaf); rt; \
- rt = rcu_dereference(rt->dst.rt6_next))
+ rt = rcu_dereference(rt->rt6_next))
#define for_each_fib6_walker_rt(w) \
for (rt = (w)->leaf; rt; \
- rt = rcu_dereference_protected(rt->dst.rt6_next, 1))
+ rt = rcu_dereference_protected(rt->rt6_next, 1))
static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst)
{
@@ -203,11 +205,9 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout)
{
struct rt6_info *rt;
- for (rt = rt0; rt && !(rt->rt6i_flags & RTF_EXPIRES);
- rt = (struct rt6_info *)rt->dst.from);
+ for (rt = rt0; rt && !(rt->rt6i_flags & RTF_EXPIRES); rt = rt->from);
if (rt && rt != rt0)
rt0->dst.expires = rt->dst.expires;
-
dst_set_expires(&rt0->dst, timeout);
rt0->rt6i_flags |= RTF_EXPIRES;
}
@@ -242,8 +242,8 @@ static inline u32 rt6_get_cookie(const struct rt6_info *rt)
u32 cookie = 0;
if (rt->rt6i_flags & RTF_PCPU ||
- (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from))
- rt = (struct rt6_info *)(rt->dst.from);
+ (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
+ rt = rt->from;
rt6_get_cookie_safe(rt, &cookie);
diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index d66f70f63734..236e40ba06bf 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -36,6 +36,10 @@ struct __ip6_tnl_parm {
__be32 o_key;
__u32 fwmark;
+ __u32 index; /* ERSPAN type II index */
+ __u8 erspan_ver; /* ERSPAN version */
+ __u8 dir; /* direction */
+ __u16 hwid; /* hwid */
};
/* IPv6 tunnel */
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 24628f6b09bf..1f16773cfd76 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -116,8 +116,11 @@ struct ip_tunnel {
u32 o_seqno; /* The last output seqno */
int tun_hlen; /* Precalculated header length */
- /* This field used only by ERSPAN */
+ /* These four fields used only by ERSPAN */
u32 index; /* ERSPAN type II index */
+ u8 erspan_ver; /* ERSPAN version */
+ u8 dir; /* ERSPAN direction */
+ u16 hwid; /* ERSPAN hardware ID */
struct dst_cache dst_cache;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index f73797e2fa60..25be4715578c 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -22,6 +22,7 @@
#include <net/flow.h>
#include <net/flow_dissector.h>
#include <net/snmp.h>
+#include <net/netns/hash.h>
#define SIN6_LEN_RFC2133 24
@@ -673,6 +674,22 @@ static inline bool ipv6_addr_v4mapped(const struct in6_addr *a)
cpu_to_be32(0x0000ffff))) == 0UL;
}
+static inline u32 ipv6_portaddr_hash(const struct net *net,
+ const struct in6_addr *addr6,
+ unsigned int port)
+{
+ unsigned int hash, mix = net_hash_mix(net);
+
+ if (ipv6_addr_any(addr6))
+ hash = jhash_1word(0, mix);
+ else if (ipv6_addr_v4mapped(addr6))
+ hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix);
+ else
+ hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix);
+
+ return hash ^ port;
+}
+
/*
* Check for a RFC 4843 ORCHID address
* (Overlay Routable Cryptographic Hash Identifiers)
diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h
index ebc813277662..0db7fb3e4e15 100644
--- a/include/net/netns/sctp.h
+++ b/include/net/netns/sctp.h
@@ -122,9 +122,12 @@ struct netns_sctp {
/* Flag to indicate if PR-CONFIG is enabled. */
int reconf_enable;
- /* Flag to idicate if SCTP-AUTH is enabled */
+ /* Flag to indicate if SCTP-AUTH is enabled */
int auth_enable;
+ /* Flag to indicate if stream interleave is enabled */
+ int intl_enable;
+
/*
* Policy to control SCTP IPv4 address scoping
* 0 - Disable IPv4 address scoping
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index d1f413f06c72..240469228851 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -105,16 +105,18 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
void qdisc_put_rtab(struct qdisc_rate_table *tab);
void qdisc_put_stab(struct qdisc_size_table *tab);
void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc);
-int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
- struct net_device *dev, struct netdev_queue *txq,
- spinlock_t *root_lock, bool validate);
+bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
+ struct net_device *dev, struct netdev_queue *txq,
+ spinlock_t *root_lock, bool validate);
void __qdisc_run(struct Qdisc *q);
static inline void qdisc_run(struct Qdisc *q)
{
- if (qdisc_run_begin(q))
+ if (qdisc_run_begin(q)) {
__qdisc_run(q);
+ qdisc_run_end(q);
+ }
}
static inline __be16 tc_skb_protocol(const struct sk_buff *skb)
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index ead018744ff5..14b6b3af8918 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -13,10 +13,10 @@ enum rtnl_link_flags {
RTNL_FLAG_DOIT_UNLOCKED = 1,
};
-int __rtnl_register(int protocol, int msgtype,
- rtnl_doit_func, rtnl_dumpit_func, unsigned int flags);
void rtnl_register(int protocol, int msgtype,
rtnl_doit_func, rtnl_dumpit_func, unsigned int flags);
+int rtnl_register_module(struct module *owner, int protocol, int msgtype,
+ rtnl_doit_func, rtnl_dumpit_func, unsigned int flags);
int rtnl_unregister(int protocol, int msgtype);
void rtnl_unregister_all(int protocol);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 83a3e47d5845..bc6b25faba99 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -71,6 +71,7 @@ struct Qdisc {
* qdisc_tree_decrease_qlen() should stop.
*/
#define TCQ_F_INVISIBLE 0x80 /* invisible by default in dump */
+#define TCQ_F_NOLOCK 0x100 /* qdisc does not require locking */
#define TCQ_F_OFFLOADED 0x200 /* qdisc is offloaded to HW */
u32 limit;
const struct Qdisc_ops *ops;
@@ -88,14 +89,14 @@ struct Qdisc {
/*
* For performance sake on SMP, we put highly modified fields at the end
*/
- struct sk_buff *gso_skb ____cacheline_aligned_in_smp;
+ struct sk_buff_head gso_skb ____cacheline_aligned_in_smp;
struct qdisc_skb_head q;
struct gnet_stats_basic_packed bstats;
seqcount_t running;
struct gnet_stats_queue qstats;
unsigned long state;
struct Qdisc *next_sched;
- struct sk_buff *skb_bad_txq;
+ struct sk_buff_head skb_bad_txq;
int padded;
refcount_t refcnt;
@@ -162,7 +163,8 @@ struct Qdisc_class_ops {
void (*walk)(struct Qdisc *, struct qdisc_walker * arg);
/* Filter manipulation */
- struct tcf_block * (*tcf_block)(struct Qdisc *, unsigned long);
+ struct tcf_block * (*tcf_block)(struct Qdisc *sch,
+ unsigned long arg);
unsigned long (*bind_tcf)(struct Qdisc *, unsigned long,
u32 classid);
void (*unbind_tcf)(struct Qdisc *, unsigned long);
@@ -179,6 +181,7 @@ struct Qdisc_ops {
const struct Qdisc_class_ops *cl_ops;
char id[IFNAMSIZ];
int priv_size;
+ unsigned int static_flags;
int (*enqueue)(struct sk_buff *skb,
struct Qdisc *sch,
@@ -186,11 +189,12 @@ struct Qdisc_ops {
struct sk_buff * (*dequeue)(struct Qdisc *);
struct sk_buff * (*peek)(struct Qdisc *);
- int (*init)(struct Qdisc *, struct nlattr *arg);
+ int (*init)(struct Qdisc *sch, struct nlattr *arg);
void (*reset)(struct Qdisc *);
void (*destroy)(struct Qdisc *);
- int (*change)(struct Qdisc *, struct nlattr *arg);
- void (*attach)(struct Qdisc *);
+ int (*change)(struct Qdisc *sch,
+ struct nlattr *arg);
+ void (*attach)(struct Qdisc *sch);
int (*dump)(struct Qdisc *, struct sk_buff *);
int (*dump_stats)(struct Qdisc *, struct gnet_dump *);
@@ -279,7 +283,6 @@ struct tcf_block {
struct net *net;
struct Qdisc *q;
struct list_head cb_list;
- struct work_struct work;
};
static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
@@ -290,11 +293,31 @@ static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
BUILD_BUG_ON(sizeof(qcb->data) < sz);
}
+static inline int qdisc_qlen_cpu(const struct Qdisc *q)
+{
+ return this_cpu_ptr(q->cpu_qstats)->qlen;
+}
+
static inline int qdisc_qlen(const struct Qdisc *q)
{
return q->q.qlen;
}
+static inline int qdisc_qlen_sum(const struct Qdisc *q)
+{
+ __u32 qlen = 0;
+ int i;
+
+ if (q->flags & TCQ_F_NOLOCK) {
+ for_each_possible_cpu(i)
+ qlen += per_cpu_ptr(q->cpu_qstats, i)->qlen;
+ } else {
+ qlen = q->q.qlen;
+ }
+
+ return qlen;
+}
+
static inline struct qdisc_skb_cb *qdisc_skb_cb(const struct sk_buff *skb)
{
return (struct qdisc_skb_cb *)skb->cb;
@@ -631,12 +654,39 @@ static inline void qdisc_qstats_backlog_dec(struct Qdisc *sch,
sch->qstats.backlog -= qdisc_pkt_len(skb);
}
+static inline void qdisc_qstats_cpu_backlog_dec(struct Qdisc *sch,
+ const struct sk_buff *skb)
+{
+ this_cpu_sub(sch->cpu_qstats->backlog, qdisc_pkt_len(skb));
+}
+
static inline void qdisc_qstats_backlog_inc(struct Qdisc *sch,
const struct sk_buff *skb)
{
sch->qstats.backlog += qdisc_pkt_len(skb);
}
+static inline void qdisc_qstats_cpu_backlog_inc(struct Qdisc *sch,
+ const struct sk_buff *skb)
+{
+ this_cpu_add(sch->cpu_qstats->backlog, qdisc_pkt_len(skb));
+}
+
+static inline void qdisc_qstats_cpu_qlen_inc(struct Qdisc *sch)
+{
+ this_cpu_inc(sch->cpu_qstats->qlen);
+}
+
+static inline void qdisc_qstats_cpu_qlen_dec(struct Qdisc *sch)
+{
+ this_cpu_dec(sch->cpu_qstats->qlen);
+}
+
+static inline void qdisc_qstats_cpu_requeues_inc(struct Qdisc *sch)
+{
+ this_cpu_inc(sch->cpu_qstats->requeues);
+}
+
static inline void __qdisc_qstats_drop(struct Qdisc *sch, int count)
{
sch->qstats.drops += count;
@@ -767,26 +817,30 @@ static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch)
/* generic pseudo peek method for non-work-conserving qdisc */
static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch)
{
+ struct sk_buff *skb = skb_peek(&sch->gso_skb);
+
/* we can reuse ->gso_skb because peek isn't called for root qdiscs */
- if (!sch->gso_skb) {
- sch->gso_skb = sch->dequeue(sch);
- if (sch->gso_skb) {
+ if (!skb) {
+ skb = sch->dequeue(sch);
+
+ if (skb) {
+ __skb_queue_head(&sch->gso_skb, skb);
/* it's still part of the queue */
- qdisc_qstats_backlog_inc(sch, sch->gso_skb);
+ qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
}
}
- return sch->gso_skb;
+ return skb;
}
/* use instead of qdisc->dequeue() for all qdiscs queried with ->peek() */
static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch)
{
- struct sk_buff *skb = sch->gso_skb;
+ struct sk_buff *skb = skb_peek(&sch->gso_skb);
if (skb) {
- sch->gso_skb = NULL;
+ skb = __skb_dequeue(&sch->gso_skb);
qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
} else {
@@ -844,6 +898,14 @@ static inline void rtnl_qdisc_drop(struct sk_buff *skb, struct Qdisc *sch)
qdisc_qstats_drop(sch);
}
+static inline int qdisc_drop_cpu(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
+{
+ __qdisc_drop(skb, to_free);
+ qdisc_qstats_cpu_drop(sch);
+
+ return NET_XMIT_DROP;
+}
static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index deaafa9b09cb..20ff237c5eb2 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -145,12 +145,13 @@ SCTP_SUBTYPE_CONSTRUCTOR(OTHER, enum sctp_event_other, other)
SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, enum sctp_event_primitive, primitive)
-#define sctp_chunk_is_data(a) (a->chunk_hdr->type == SCTP_CID_DATA)
+#define sctp_chunk_is_data(a) (a->chunk_hdr->type == SCTP_CID_DATA || \
+ a->chunk_hdr->type == SCTP_CID_I_DATA)
/* Calculate the actual data size in a data chunk */
-#define SCTP_DATA_SNDSIZE(c) ((int)((unsigned long)(c->chunk_end)\
- - (unsigned long)(c->chunk_hdr)\
- - sizeof(struct sctp_data_chunk)))
+#define SCTP_DATA_SNDSIZE(c) ((int)((unsigned long)(c->chunk_end) - \
+ (unsigned long)(c->chunk_hdr) - \
+ sctp_datachk_len(&c->asoc->stream)))
/* Internal error codes */
enum sctp_ierror {
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 906a9c0efa71..20c0c1be2ca7 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -116,7 +116,7 @@ extern struct percpu_counter sctp_sockets_allocated;
int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *);
struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *);
-int sctp_transport_walk_start(struct rhashtable_iter *iter);
+void sctp_transport_walk_start(struct rhashtable_iter *iter);
void sctp_transport_walk_stop(struct rhashtable_iter *iter);
struct sctp_transport *sctp_transport_get_next(struct net *net,
struct rhashtable_iter *iter);
@@ -444,13 +444,13 @@ static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu)
int frag = pmtu;
frag -= sp->pf->af->net_header_len;
- frag -= sizeof(struct sctphdr) + sizeof(struct sctp_data_chunk);
+ frag -= sizeof(struct sctphdr) + sctp_datachk_len(&asoc->stream);
if (asoc->user_frag)
frag = min_t(int, frag, asoc->user_frag);
frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN -
- sizeof(struct sctp_data_chunk)));
+ sctp_datachk_len(&asoc->stream)));
return frag;
}
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 70fb397f65b0..2883c43c5258 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -197,10 +197,14 @@ struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *asoc,
struct sctp_chunk *sctp_make_cwr(const struct sctp_association *asoc,
const __u32 lowest_tsn,
const struct sctp_chunk *chunk);
-struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc,
+struct sctp_chunk *sctp_make_idata(const struct sctp_association *asoc,
+ __u8 flags, int paylen, gfp_t gfp);
+struct sctp_chunk *sctp_make_ifwdtsn(const struct sctp_association *asoc,
+ __u32 new_cum_tsn, size_t nstreams,
+ struct sctp_ifwdtsn_skip *skiplist);
+struct sctp_chunk *sctp_make_datafrag_empty(const struct sctp_association *asoc,
const struct sctp_sndrcvinfo *sinfo,
- int len, const __u8 flags,
- __u16 ssn, gfp_t gfp);
+ int len, __u8 flags, gfp_t gfp);
struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc,
const __u32 lowest_tsn);
struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc);
@@ -342,7 +346,7 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk)
__u16 size;
size = ntohs(chunk->chunk_hdr->length);
- size -= sizeof(struct sctp_data_chunk);
+ size -= sctp_datahdr_len(&chunk->asoc->stream);
return size;
}
@@ -358,6 +362,12 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk)
typecheck(__u32, b) && \
((__s32)((a) - (b)) <= 0))
+/* Compare two MIDs */
+#define MID_lt(a, b) \
+ (typecheck(__u32, a) && \
+ typecheck(__u32, b) && \
+ ((__s32)((a) - (b)) < 0))
+
/* Compare two SSNs */
#define SSN_lt(a,b) \
(typecheck(__u16, a) && \
diff --git a/include/net/sctp/stream_interleave.h b/include/net/sctp/stream_interleave.h
new file mode 100644
index 000000000000..6657711c8bc4
--- /dev/null
+++ b/include/net/sctp/stream_interleave.h
@@ -0,0 +1,61 @@
+/* SCTP kernel implementation
+ * (C) Copyright Red Hat Inc. 2017
+ *
+ * These are definitions used by the stream schedulers, defined in RFC
+ * draft ndata (https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-11)
+ *
+ * This SCTP implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This SCTP implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email addresses:
+ * lksctp developers <linux-sctp@vger.kernel.org>
+ *
+ * Written or modified by:
+ * Xin Long <lucien.xin@gmail.com>
+ */
+
+#ifndef __sctp_stream_interleave_h__
+#define __sctp_stream_interleave_h__
+
+struct sctp_stream_interleave {
+ __u16 data_chunk_len;
+ __u16 ftsn_chunk_len;
+ /* (I-)DATA process */
+ struct sctp_chunk *(*make_datafrag)(const struct sctp_association *asoc,
+ const struct sctp_sndrcvinfo *sinfo,
+ int len, __u8 flags, gfp_t gfp);
+ void (*assign_number)(struct sctp_chunk *chunk);
+ bool (*validate_data)(struct sctp_chunk *chunk);
+ int (*ulpevent_data)(struct sctp_ulpq *ulpq,
+ struct sctp_chunk *chunk, gfp_t gfp);
+ int (*enqueue_event)(struct sctp_ulpq *ulpq,
+ struct sctp_ulpevent *event);
+ void (*renege_events)(struct sctp_ulpq *ulpq,
+ struct sctp_chunk *chunk, gfp_t gfp);
+ void (*start_pd)(struct sctp_ulpq *ulpq, gfp_t gfp);
+ void (*abort_pd)(struct sctp_ulpq *ulpq, gfp_t gfp);
+ /* (I-)FORWARD-TSN process */
+ void (*generate_ftsn)(struct sctp_outq *q, __u32 ctsn);
+ bool (*validate_ftsn)(struct sctp_chunk *chunk);
+ void (*report_ftsn)(struct sctp_ulpq *ulpq, __u32 ftsn);
+ void (*handle_ftsn)(struct sctp_ulpq *ulpq,
+ struct sctp_chunk *chunk);
+};
+
+void sctp_stream_interleave_init(struct sctp_stream *stream);
+
+#endif /* __sctp_stream_interleave_h__ */
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 2f8f93da5dc2..8ac4d5cdbfed 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -89,6 +89,7 @@ struct sctp_stream;
#include <net/sctp/tsnmap.h>
#include <net/sctp/ulpevent.h>
#include <net/sctp/ulpqueue.h>
+#include <net/sctp/stream_interleave.h>
/* Structures useful for managing bind/connect. */
@@ -217,6 +218,7 @@ struct sctp_sock {
disable_fragments:1,
v4mapped:1,
frag_interleave:1,
+ strm_interleave:1,
recvrcvinfo:1,
recvnxtinfo:1,
data_ready_signalled:1;
@@ -397,6 +399,28 @@ void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new);
#define sctp_ssn_skip(stream, type, sid, ssn) \
((stream)->type[sid].ssn = ssn + 1)
+/* What is the current MID number for this stream? */
+#define sctp_mid_peek(stream, type, sid) \
+ ((stream)->type[sid].mid)
+
+/* Return the next MID number for this stream. */
+#define sctp_mid_next(stream, type, sid) \
+ ((stream)->type[sid].mid++)
+
+/* Skip over this mid and all below. */
+#define sctp_mid_skip(stream, type, sid, mid) \
+ ((stream)->type[sid].mid = mid + 1)
+
+#define sctp_stream_in(asoc, sid) (&(asoc)->stream.in[sid])
+
+/* What is the current MID_uo number for this stream? */
+#define sctp_mid_uo_peek(stream, type, sid) \
+ ((stream)->type[sid].mid_uo)
+
+/* Return the next MID_uo number for this stream. */
+#define sctp_mid_uo_next(stream, type, sid) \
+ ((stream)->type[sid].mid_uo++)
+
/*
* Pointers to address related SCTP functions.
* (i.e. things that depend on the address family.)
@@ -574,6 +598,8 @@ struct sctp_chunk {
struct sctp_addiphdr *addip_hdr;
struct sctp_fwdtsn_hdr *fwdtsn_hdr;
struct sctp_authhdr *auth_hdr;
+ struct sctp_idatahdr *idata_hdr;
+ struct sctp_ifwdtsn_hdr *ifwdtsn_hdr;
} subh;
__u8 *chunk_end;
@@ -620,6 +646,7 @@ struct sctp_chunk {
__u16 rtt_in_progress:1, /* This chunk used for RTT calc? */
has_tsn:1, /* Does this chunk have a TSN yet? */
has_ssn:1, /* Does this chunk have a SSN yet? */
+#define has_mid has_ssn
singleton:1, /* Only chunk in the packet? */
end_of_packet:1, /* Last chunk in the packet? */
ecn_ce_done:1, /* Have we processed the ECN CE bit? */
@@ -1073,6 +1100,7 @@ void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8);
void sctp_outq_uncork(struct sctp_outq *, gfp_t gfp);
void sctp_prsctp_prune(struct sctp_association *asoc,
struct sctp_sndrcvinfo *sinfo, int msg_len);
+void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 sack_ctsn);
/* Uncork and flush an outqueue. */
static inline void sctp_outq_cork(struct sctp_outq *q)
{
@@ -1357,13 +1385,25 @@ struct sctp_stream_out_ext {
};
struct sctp_stream_out {
- __u16 ssn;
- __u8 state;
+ union {
+ __u32 mid;
+ __u16 ssn;
+ };
+ __u32 mid_uo;
struct sctp_stream_out_ext *ext;
+ __u8 state;
};
struct sctp_stream_in {
- __u16 ssn;
+ union {
+ __u32 mid;
+ __u16 ssn;
+ };
+ __u32 mid_uo;
+ __u32 fsn;
+ __u32 fsn_uo;
+ char pd_mode;
+ char pd_mode_uo;
};
struct sctp_stream {
@@ -1387,11 +1427,32 @@ struct sctp_stream {
struct sctp_stream_out_ext *rr_next;
};
};
+ struct sctp_stream_interleave *si;
};
#define SCTP_STREAM_CLOSED 0x00
#define SCTP_STREAM_OPEN 0x01
+static inline __u16 sctp_datachk_len(const struct sctp_stream *stream)
+{
+ return stream->si->data_chunk_len;
+}
+
+static inline __u16 sctp_datahdr_len(const struct sctp_stream *stream)
+{
+ return stream->si->data_chunk_len - sizeof(struct sctp_chunkhdr);
+}
+
+static inline __u16 sctp_ftsnchk_len(const struct sctp_stream *stream)
+{
+ return stream->si->ftsn_chunk_len;
+}
+
+static inline __u16 sctp_ftsnhdr_len(const struct sctp_stream *stream)
+{
+ return stream->si->ftsn_chunk_len - sizeof(struct sctp_chunkhdr);
+}
+
/* SCTP_GET_ASSOC_STATS counters */
struct sctp_priv_assoc_stats {
/* Maximum observed rto in the association during subsequent
@@ -1940,6 +2001,7 @@ struct sctp_association {
__u8 need_ecne:1, /* Need to send an ECNE Chunk? */
temp:1, /* Is it a temporary association? */
force_delay:1,
+ intl_enable:1,
prsctp_enable:1,
reconf_enable:1;
diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h
index 231dc42f1da6..51b4e0626c34 100644
--- a/include/net/sctp/ulpevent.h
+++ b/include/net/sctp/ulpevent.h
@@ -45,19 +45,29 @@
/* A structure to carry information to the ULP (e.g. Sockets API) */
/* Warning: This sits inside an skb.cb[] area. Be very careful of
* growing this structure as it is at the maximum limit now.
+ *
+ * sctp_ulpevent is saved in sk->cb(48 bytes), whose last 4 bytes
+ * have been taken by sock_skb_cb, So here it has to use 'packed'
+ * to make sctp_ulpevent fit into the rest 44 bytes.
*/
struct sctp_ulpevent {
struct sctp_association *asoc;
struct sctp_chunk *chunk;
unsigned int rmem_len;
- __u32 ppid;
+ union {
+ __u32 mid;
+ __u16 ssn;
+ };
+ union {
+ __u32 ppid;
+ __u32 fsn;
+ };
__u32 tsn;
__u32 cumtsn;
__u16 stream;
- __u16 ssn;
__u16 flags;
__u16 msg_flags;
-};
+} __packed;
/* Retrieve the skb this event sits inside of. */
static inline struct sk_buff *sctp_event2skb(const struct sctp_ulpevent *ev)
@@ -112,7 +122,8 @@ struct sctp_ulpevent *sctp_ulpevent_make_shutdown_event(
struct sctp_ulpevent *sctp_ulpevent_make_pdapi(
const struct sctp_association *asoc,
- __u32 indication, gfp_t gfp);
+ __u32 indication, __u32 sid, __u32 seq,
+ __u32 flags, gfp_t gfp);
struct sctp_ulpevent *sctp_ulpevent_make_adaptation_indication(
const struct sctp_association *asoc, gfp_t gfp);
@@ -140,6 +151,10 @@ struct sctp_ulpevent *sctp_ulpevent_make_stream_change_event(
const struct sctp_association *asoc, __u16 flags,
__u32 strchange_instrms, __u32 strchange_outstrms, gfp_t gfp);
+struct sctp_ulpevent *sctp_make_reassembled_event(
+ struct net *net, struct sk_buff_head *queue,
+ struct sk_buff *f_frag, struct sk_buff *l_frag);
+
void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event,
struct msghdr *);
void sctp_ulpevent_read_rcvinfo(const struct sctp_ulpevent *event,
diff --git a/include/net/sctp/ulpqueue.h b/include/net/sctp/ulpqueue.h
index e0dce07b8794..bb0ecba3db2b 100644
--- a/include/net/sctp/ulpqueue.h
+++ b/include/net/sctp/ulpqueue.h
@@ -45,6 +45,7 @@ struct sctp_ulpq {
char pd_mode;
struct sctp_association *asoc;
struct sk_buff_head reasm;
+ struct sk_buff_head reasm_uo;
struct sk_buff_head lobby;
};
@@ -76,11 +77,8 @@ int sctp_clear_pd(struct sock *sk, struct sctp_association *asoc);
void sctp_ulpq_skip(struct sctp_ulpq *ulpq, __u16 sid, __u16 ssn);
void sctp_ulpq_reasm_flushtsn(struct sctp_ulpq *, __u32);
-#endif /* __sctp_ulpqueue_h__ */
-
-
-
-
-
+__u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq,
+ struct sk_buff_head *list, __u16 needed);
+#endif /* __sctp_ulpqueue_h__ */
diff --git a/include/net/sock.h b/include/net/sock.h
index 9155da422692..9a9047268d37 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2407,4 +2407,15 @@ static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto)
return *proto->sysctl_rmem;
}
+/* Default TCP Small queue budget is ~1 ms of data (1sec >> 10)
+ * Some wifi drivers need to tweak it to get more chunks.
+ * They can use this helper from their ndo_start_xmit()
+ */
+static inline void sk_pacing_shift_update(struct sock *sk, int val)
+{
+ if (!sk || !sk_fullsock(sk) || sk->sk_pacing_shift == val)
+ return;
+ sk->sk_pacing_shift = val;
+}
+
#endif /* _SOCK_H */
diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h
index 21d253c9a8c6..a2e9cbca5c9e 100644
--- a/include/net/tc_act/tc_mirred.h
+++ b/include/net/tc_act/tc_mirred.h
@@ -8,10 +8,8 @@
struct tcf_mirred {
struct tc_action common;
int tcfm_eaction;
- int tcfm_ifindex;
bool tcfm_mac_header_xmit;
struct net_device __rcu *tcfm_dev;
- struct net *net;
struct list_head tcfm_list;
};
#define to_mirred(a) ((struct tcf_mirred *)a)
@@ -34,9 +32,9 @@ static inline bool is_tcf_mirred_egress_mirror(const struct tc_action *a)
return false;
}
-static inline int tcf_mirred_ifindex(const struct tc_action *a)
+static inline struct net_device *tcf_mirred_dev(const struct tc_action *a)
{
- return to_mirred(a)->tcfm_ifindex;
+ return rtnl_dereference(to_mirred(a)->tcfm_dev);
}
#endif /* __NET_TC_MIR_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 6da880d2f022..6939e69d3c37 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1507,8 +1507,7 @@ int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
/* From tcp_fastopen.c */
void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
- struct tcp_fastopen_cookie *cookie, int *syn_loss,
- unsigned long *last_syn_loss);
+ struct tcp_fastopen_cookie *cookie);
void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
struct tcp_fastopen_cookie *cookie, bool syn_lost,
u16 try_exp);
@@ -1546,7 +1545,7 @@ extern unsigned int sysctl_tcp_fastopen_blackhole_timeout;
void tcp_fastopen_active_disable(struct sock *sk);
bool tcp_fastopen_active_should_disable(struct sock *sk);
void tcp_fastopen_active_disable_ofo_check(struct sock *sk);
-void tcp_fastopen_active_timeout_reset(void);
+void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired);
/* Latencies incurred by various limits for a sender. They are
* chronograph-like stats that are mutually exclusive.
@@ -2011,10 +2010,12 @@ static inline int tcp_call_bpf(struct sock *sk, int op)
struct bpf_sock_ops_kern sock_ops;
int ret;
- if (sk_fullsock(sk))
+ memset(&sock_ops, 0, sizeof(sock_ops));
+ if (sk_fullsock(sk)) {
+ sock_ops.is_fullsock = 1;
sock_owned_by_me(sk);
+ }
- memset(&sock_ops, 0, sizeof(sock_ops));
sock_ops.sk = sk;
sock_ops.op = op;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index dc28a98ce97c..1ec0c4760646 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -968,7 +968,7 @@ static inline bool xfrm_sec_ctx_match(struct xfrm_sec_ctx *s1, struct xfrm_sec_c
/* A struct encoding bundle of transformations to apply to some set of flow.
*
- * dst->child points to the next element of bundle.
+ * xdst->child points to the next element of bundle.
* dst->xfrm points to an instanse of transformer.
*
* Due to unfortunate limitations of current routing cache, which we
@@ -984,6 +984,8 @@ struct xfrm_dst {
struct rt6_info rt6;
} u;
struct dst_entry *route;
+ struct dst_entry *child;
+ struct dst_entry *path;
struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
int num_pols, num_xfrms;
u32 xfrm_genid;
@@ -994,7 +996,35 @@ struct xfrm_dst {
u32 path_cookie;
};
+static inline struct dst_entry *xfrm_dst_path(const struct dst_entry *dst)
+{
#ifdef CONFIG_XFRM
+ if (dst->xfrm) {
+ const struct xfrm_dst *xdst = (const struct xfrm_dst *) dst;
+
+ return xdst->path;
+ }
+#endif
+ return (struct dst_entry *) dst;
+}
+
+static inline struct dst_entry *xfrm_dst_child(const struct dst_entry *dst)
+{
+#ifdef CONFIG_XFRM
+ if (dst->xfrm) {
+ struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
+ return xdst->child;
+ }
+#endif
+ return NULL;
+}
+
+#ifdef CONFIG_XFRM
+static inline void xfrm_dst_set_child(struct xfrm_dst *xdst, struct dst_entry *child)
+{
+ xdst->child = child;
+}
+
static inline void xfrm_dst_destroy(struct xfrm_dst *xdst)
{
xfrm_pols_put(xdst->pols, xdst->num_pols);
@@ -1866,12 +1896,14 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x);
static inline bool xfrm_dst_offload_ok(struct dst_entry *dst)
{
struct xfrm_state *x = dst->xfrm;
+ struct xfrm_dst *xdst;
if (!x || !x->type_offload)
return false;
- if (x->xso.offload_handle && (x->xso.dev == dst->path->dev) &&
- !dst->child->xfrm)
+ xdst = (struct xfrm_dst *) dst;
+ if (x->xso.offload_handle && (x->xso.dev == xfrm_dst_path(dst)->dev) &&
+ !xdst->child->xfrm)
return true;
return false;
diff --git a/include/trace/events/bridge.h b/include/trace/events/bridge.h
index 1bee3e7fdf32..8ea966448b58 100644
--- a/include/trace/events/bridge.h
+++ b/include/trace/events/bridge.h
@@ -82,8 +82,8 @@ TRACE_EVENT(fdb_delete,
TP_fast_assign(
__assign_str(br_dev, br->dev->name);
__assign_str(dev, f->dst ? f->dst->dev->name : "null");
- memcpy(__entry->addr, f->addr.addr, ETH_ALEN);
- __entry->vid = f->vlan_id;
+ memcpy(__entry->addr, f->key.addr.addr, ETH_ALEN);
+ __entry->vid = f->key.vlan_id;
),
TP_printk("br_dev %s dev %s addr %02x:%02x:%02x:%02x:%02x:%02x vid %u",
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4c223ab30293..80d62e88590c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -941,6 +941,12 @@ struct bpf_sock_ops {
__u32 local_ip6[4]; /* Stored in network byte order */
__u32 remote_port; /* Stored in network byte order */
__u32 local_port; /* stored in host byte order */
+ __u32 is_fullsock; /* Some TCP fields are only valid if
+ * there is a full socket. If not, the
+ * fields read as zero.
+ */
+ __u32 snd_cwnd;
+ __u32 srtt_us; /* Averaged RTT << 3 in usecs */
};
/* List of known BPF sock_ops operators.
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index ac71559314e7..44a0b675a6bc 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1686,6 +1686,7 @@ enum ethtool_reset_flags {
ETH_RESET_PHY = 1 << 6, /* Transceiver/PHY */
ETH_RESET_RAM = 1 << 7, /* RAM shared between
* multiple components */
+ ETH_RESET_AP = 1 << 8, /* Application processor */
ETH_RESET_DEDICATED = 0x0000ffff, /* All components dedicated to
* this interface */
diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index 3ee3bf7c8526..87b7529fcdfe 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -47,6 +47,7 @@
#define ETH_P_PUP 0x0200 /* Xerox PUP packet */
#define ETH_P_PUPAT 0x0201 /* Xerox PUP Addr Trans packet */
#define ETH_P_TSN 0x22F0 /* TSN (IEEE 1722) packet */
+#define ETH_P_ERSPAN2 0x22EB /* ERSPAN version 2 (type III) */
#define ETH_P_IP 0x0800 /* Internet Protocol packet */
#define ETH_P_X25 0x0805 /* CCITT X.25 */
#define ETH_P_ARP 0x0806 /* Address Resolution packet */
diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
index 030d3e6d6029..fb38c1797131 100644
--- a/include/uapi/linux/if_tun.h
+++ b/include/uapi/linux/if_tun.h
@@ -57,6 +57,7 @@
*/
#define TUNSETVNETBE _IOW('T', 222, int)
#define TUNGETVNETBE _IOR('T', 223, int)
+#define TUNSETSTEERINGEBPF _IOR('T', 224, int)
/* TUNSETIFF ifr flags */
#define IFF_TUN 0x0001
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index e68dadbd6d45..1b3d148c4560 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -137,6 +137,9 @@ enum {
IFLA_GRE_IGNORE_DF,
IFLA_GRE_FWMARK,
IFLA_GRE_ERSPAN_INDEX,
+ IFLA_GRE_ERSPAN_VER,
+ IFLA_GRE_ERSPAN_DIR,
+ IFLA_GRE_ERSPAN_HWID,
__IFLA_GRE_MAX,
};
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index d9adab32dbee..4c4db14786bd 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -125,6 +125,7 @@ typedef __s32 sctp_assoc_t;
#define SCTP_SOCKOPT_PEELOFF_FLAGS 122
#define SCTP_STREAM_SCHEDULER 123
#define SCTP_STREAM_SCHEDULER_VALUE 124
+#define SCTP_INTERLEAVING_SUPPORTED 125
/* PR-SCTP policies */
#define SCTP_PR_SCTP_NONE 0x0000
@@ -459,6 +460,8 @@ struct sctp_pdapi_event {
__u32 pdapi_length;
__u32 pdapi_indication;
sctp_assoc_t pdapi_assoc_id;
+ __u32 pdapi_stream;
+ __u32 pdapi_seq;
};
enum { SCTP_PARTIAL_DELIVERY_ABORTED=0, };
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d4593571c404..7afa92e9b409 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -216,6 +216,17 @@ static const char * const reg_type_str[] = {
[PTR_TO_PACKET_END] = "pkt_end",
};
+static void print_liveness(struct bpf_verifier_env *env,
+ enum bpf_reg_liveness live)
+{
+ if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN))
+ verbose(env, "_");
+ if (live & REG_LIVE_READ)
+ verbose(env, "r");
+ if (live & REG_LIVE_WRITTEN)
+ verbose(env, "w");
+}
+
static void print_verifier_state(struct bpf_verifier_env *env,
struct bpf_verifier_state *state)
{
@@ -228,7 +239,9 @@ static void print_verifier_state(struct bpf_verifier_env *env,
t = reg->type;
if (t == NOT_INIT)
continue;
- verbose(env, " R%d=%s", i, reg_type_str[t]);
+ verbose(env, " R%d", i);
+ print_liveness(env, reg->live);
+ verbose(env, "=%s", reg_type_str[t]);
if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
tnum_is_const(reg->var_off)) {
/* reg->off should be 0 for SCALAR_VALUE */
@@ -277,10 +290,13 @@ static void print_verifier_state(struct bpf_verifier_env *env,
}
}
for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
- if (state->stack[i].slot_type[0] == STACK_SPILL)
- verbose(env, " fp%d=%s",
- -MAX_BPF_STACK + i * BPF_REG_SIZE,
+ if (state->stack[i].slot_type[0] == STACK_SPILL) {
+ verbose(env, " fp%d",
+ (-i - 1) * BPF_REG_SIZE);
+ print_liveness(env, state->stack[i].spilled_ptr.live);
+ verbose(env, "=%s",
reg_type_str[state->stack[i].spilled_ptr.type]);
+ }
}
verbose(env, "\n");
}
@@ -568,8 +584,8 @@ static void mark_reg_unknown(struct bpf_verifier_env *env,
{
if (WARN_ON(regno >= MAX_BPF_REG)) {
verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
- /* Something bad happened, let's kill all regs */
- for (regno = 0; regno < MAX_BPF_REG; regno++)
+ /* Something bad happened, let's kill all regs except FP */
+ for (regno = 0; regno < BPF_REG_FP; regno++)
__mark_reg_not_init(regs + regno);
return;
}
@@ -587,8 +603,8 @@ static void mark_reg_not_init(struct bpf_verifier_env *env,
{
if (WARN_ON(regno >= MAX_BPF_REG)) {
verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
- /* Something bad happened, let's kill all regs */
- for (regno = 0; regno < MAX_BPF_REG; regno++)
+ /* Something bad happened, let's kill all regs except FP */
+ for (regno = 0; regno < BPF_REG_FP; regno++)
__mark_reg_not_init(regs + regno);
return;
}
@@ -779,6 +795,11 @@ static int check_stack_read(struct bpf_verifier_env *env,
if (value_regno >= 0) {
/* restore register state from stack */
state->regs[value_regno] = state->stack[spi].spilled_ptr;
+ /* mark reg as written since spilled pointer state likely
+ * has its liveness marks cleared by is_state_visited()
+ * which resets stack/reg liveness for state transitions
+ */
+ state->regs[value_regno].live |= REG_LIVE_WRITTEN;
mark_stack_slot_read(state, spi);
}
return 0;
@@ -1244,9 +1265,9 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
}
/* Does this register contain a constant zero? */
-static bool register_is_null(struct bpf_reg_state reg)
+static bool register_is_null(struct bpf_reg_state *reg)
{
- return reg.type == SCALAR_VALUE && tnum_equals_const(reg.var_off, 0);
+ return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
}
/* when register 'regno' is passed into function that will read 'access_size'
@@ -1259,31 +1280,31 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
int access_size, bool zero_size_allowed,
struct bpf_call_arg_meta *meta)
{
+ struct bpf_reg_state *reg = cur_regs(env) + regno;
struct bpf_verifier_state *state = env->cur_state;
- struct bpf_reg_state *regs = state->regs;
int off, i, slot, spi;
- if (regs[regno].type != PTR_TO_STACK) {
+ if (reg->type != PTR_TO_STACK) {
/* Allow zero-byte read from NULL, regardless of pointer type */
if (zero_size_allowed && access_size == 0 &&
- register_is_null(regs[regno]))
+ register_is_null(reg))
return 0;
verbose(env, "R%d type=%s expected=%s\n", regno,
- reg_type_str[regs[regno].type],
+ reg_type_str[reg->type],
reg_type_str[PTR_TO_STACK]);
return -EACCES;
}
/* Only allow fixed-offset stack reads */
- if (!tnum_is_const(regs[regno].var_off)) {
+ if (!tnum_is_const(reg->var_off)) {
char tn_buf[48];
- tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off);
+ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
verbose(env, "invalid variable stack read R%d var_off=%s\n",
regno, tn_buf);
}
- off = regs[regno].off + regs[regno].var_off.value;
+ off = reg->off + reg->var_off.value;
if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
@@ -1391,7 +1412,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
* passed in as argument, it's a SCALAR_VALUE type. Final test
* happens during stack boundary checking.
*/
- if (register_is_null(*reg) &&
+ if (register_is_null(reg) &&
arg_type == ARG_PTR_TO_MEM_OR_NULL)
/* final test in check_stack_boundary() */;
else if (!type_is_pkt_pointer(type) &&
@@ -2934,8 +2955,9 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
if (BPF_SRC(insn->code) == BPF_K &&
(opcode == BPF_JEQ || opcode == BPF_JNE) &&
dst_reg->type == SCALAR_VALUE &&
- tnum_equals_const(dst_reg->var_off, insn->imm)) {
- if (opcode == BPF_JEQ) {
+ tnum_is_const(dst_reg->var_off)) {
+ if ((opcode == BPF_JEQ && dst_reg->var_off.value == insn->imm) ||
+ (opcode == BPF_JNE && dst_reg->var_off.value != insn->imm)) {
/* if (imm == imm) goto pc+off;
* only follow the goto, ignore fall-through
*/
diff --git a/lib/Makefile b/lib/Makefile
index d11c48ec8ffd..a6c8529dd9b2 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -39,7 +39,7 @@ obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \
gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
bsearch.o find_bit.o llist.o memweight.o kfifo.o \
percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o \
- once.o refcount.o usercopy.o errseq.o
+ once.o refcount.o usercopy.o errseq.o bucket_locks.o
obj-$(CONFIG_STRING_SELFTEST) += test_string.o
obj-y += string_helpers.o
obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
diff --git a/lib/bucket_locks.c b/lib/bucket_locks.c
new file mode 100644
index 000000000000..266a97c5708b
--- /dev/null
+++ b/lib/bucket_locks.c
@@ -0,0 +1,54 @@
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+/* Allocate an array of spinlocks to be accessed by a hash. Two arguments
+ * indicate the number of elements to allocate in the array. max_size
+ * gives the maximum number of elements to allocate. cpu_mult gives
+ * the number of locks per CPU to allocate. The size is rounded up
+ * to a power of 2 to be suitable as a hash table.
+ */
+
+int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *locks_mask,
+ size_t max_size, unsigned int cpu_mult, gfp_t gfp)
+{
+ spinlock_t *tlocks = NULL;
+ unsigned int i, size;
+#if defined(CONFIG_PROVE_LOCKING)
+ unsigned int nr_pcpus = 2;
+#else
+ unsigned int nr_pcpus = num_possible_cpus();
+#endif
+
+ if (cpu_mult) {
+ nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL);
+ size = min_t(unsigned int, nr_pcpus * cpu_mult, max_size);
+ } else {
+ size = max_size;
+ }
+
+ if (sizeof(spinlock_t) != 0) {
+ if (gfpflags_allow_blocking(gfp))
+ tlocks = kvmalloc(size * sizeof(spinlock_t), gfp);
+ else
+ tlocks = kmalloc_array(size, sizeof(spinlock_t), gfp);
+ if (!tlocks)
+ return -ENOMEM;
+ for (i = 0; i < size; i++)
+ spin_lock_init(&tlocks[i]);
+ }
+
+ *locks = tlocks;
+ *locks_mask = size - 1;
+
+ return 0;
+}
+EXPORT_SYMBOL(alloc_bucket_spinlocks);
+
+void free_bucket_spinlocks(spinlock_t *locks)
+{
+ kvfree(locks);
+}
+EXPORT_SYMBOL(free_bucket_spinlocks);
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index ddd7dde87c3c..3825c30aaa36 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -65,42 +65,6 @@ EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
#define ASSERT_RHT_MUTEX(HT)
#endif
-
-static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
- gfp_t gfp)
-{
- unsigned int i, size;
-#if defined(CONFIG_PROVE_LOCKING)
- unsigned int nr_pcpus = 2;
-#else
- unsigned int nr_pcpus = num_possible_cpus();
-#endif
-
- nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL);
- size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
-
- /* Never allocate more than 0.5 locks per bucket */
- size = min_t(unsigned int, size, tbl->size >> 1);
-
- if (tbl->nest)
- size = min(size, 1U << tbl->nest);
-
- if (sizeof(spinlock_t) != 0) {
- if (gfpflags_allow_blocking(gfp))
- tbl->locks = kvmalloc(size * sizeof(spinlock_t), gfp);
- else
- tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
- gfp);
- if (!tbl->locks)
- return -ENOMEM;
- for (i = 0; i < size; i++)
- spin_lock_init(&tbl->locks[i]);
- }
- tbl->locks_mask = size - 1;
-
- return 0;
-}
-
static void nested_table_free(union nested_table *ntbl, unsigned int size)
{
const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
@@ -140,7 +104,7 @@ static void bucket_table_free(const struct bucket_table *tbl)
if (tbl->nest)
nested_bucket_table_free(tbl);
- kvfree(tbl->locks);
+ free_bucket_spinlocks(tbl->locks);
kvfree(tbl);
}
@@ -207,7 +171,7 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
gfp_t gfp)
{
struct bucket_table *tbl = NULL;
- size_t size;
+ size_t size, max_locks;
int i;
size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
@@ -227,7 +191,12 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
tbl->size = size;
- if (alloc_bucket_locks(ht, tbl, gfp) < 0) {
+ max_locks = size >> 1;
+ if (tbl->nest)
+ max_locks = min_t(size_t, max_locks, 1U << tbl->nest);
+
+ if (alloc_bucket_spinlocks(&tbl->locks, &tbl->locks_mask, max_locks,
+ ht->p.locks_mul, gfp) < 0) {
bucket_table_free(tbl);
return NULL;
}
@@ -707,6 +676,7 @@ void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter)
iter->p = NULL;
iter->slot = 0;
iter->skip = 0;
+ iter->end_of_table = 0;
spin_lock(&ht->lock);
iter->walker.tbl =
@@ -732,7 +702,7 @@ void rhashtable_walk_exit(struct rhashtable_iter *iter)
EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
/**
- * rhashtable_walk_start - Start a hash table walk
+ * rhashtable_walk_start_check - Start a hash table walk
* @iter: Hash table iterator
*
* Start a hash table walk at the current iterator position. Note that we take
@@ -744,8 +714,12 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
* Returns -EAGAIN if resize event occured. Note that the iterator
* will rewind back to the beginning and you may use it immediately
* by calling rhashtable_walk_next.
+ *
+ * rhashtable_walk_start is defined as an inline variant that returns
+ * void. This is preferred in cases where the caller would ignore
+ * resize events and always continue.
*/
-int rhashtable_walk_start(struct rhashtable_iter *iter)
+int rhashtable_walk_start_check(struct rhashtable_iter *iter)
__acquires(RCU)
{
struct rhashtable *ht = iter->ht;
@@ -757,28 +731,26 @@ int rhashtable_walk_start(struct rhashtable_iter *iter)
list_del(&iter->walker.list);
spin_unlock(&ht->lock);
- if (!iter->walker.tbl) {
+ if (!iter->walker.tbl && !iter->end_of_table) {
iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht);
return -EAGAIN;
}
return 0;
}
-EXPORT_SYMBOL_GPL(rhashtable_walk_start);
+EXPORT_SYMBOL_GPL(rhashtable_walk_start_check);
/**
- * rhashtable_walk_next - Return the next object and advance the iterator
- * @iter: Hash table iterator
+ * __rhashtable_walk_find_next - Find the next element in a table (or the first
+ * one in case of a new walk).
*
- * Note that you must call rhashtable_walk_stop when you are finished
- * with the walk.
+ * @iter: Hash table iterator
*
- * Returns the next object or NULL when the end of the table is reached.
+ * Returns the found object or NULL when the end of the table is reached.
*
- * Returns -EAGAIN if resize event occured. Note that the iterator
- * will rewind back to the beginning and you may continue to use it.
+ * Returns -EAGAIN if resize event occurred.
*/
-void *rhashtable_walk_next(struct rhashtable_iter *iter)
+static void *__rhashtable_walk_find_next(struct rhashtable_iter *iter)
{
struct bucket_table *tbl = iter->walker.tbl;
struct rhlist_head *list = iter->list;
@@ -786,13 +758,8 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter)
struct rhash_head *p = iter->p;
bool rhlist = ht->rhlist;
- if (p) {
- if (!rhlist || !(list = rcu_dereference(list->next))) {
- p = rcu_dereference(p->next);
- list = container_of(p, struct rhlist_head, rhead);
- }
- goto next;
- }
+ if (!tbl)
+ return NULL;
for (; iter->slot < tbl->size; iter->slot++) {
int skip = iter->skip;
@@ -836,13 +803,90 @@ next:
iter->slot = 0;
iter->skip = 0;
return ERR_PTR(-EAGAIN);
+ } else {
+ iter->end_of_table = true;
}
return NULL;
}
+
+/**
+ * rhashtable_walk_next - Return the next object and advance the iterator
+ * @iter: Hash table iterator
+ *
+ * Note that you must call rhashtable_walk_stop when you are finished
+ * with the walk.
+ *
+ * Returns the next object or NULL when the end of the table is reached.
+ *
+ * Returns -EAGAIN if resize event occurred. Note that the iterator
+ * will rewind back to the beginning and you may continue to use it.
+ */
+void *rhashtable_walk_next(struct rhashtable_iter *iter)
+{
+ struct rhlist_head *list = iter->list;
+ struct rhashtable *ht = iter->ht;
+ struct rhash_head *p = iter->p;
+ bool rhlist = ht->rhlist;
+
+ if (p) {
+ if (!rhlist || !(list = rcu_dereference(list->next))) {
+ p = rcu_dereference(p->next);
+ list = container_of(p, struct rhlist_head, rhead);
+ }
+ if (!rht_is_a_nulls(p)) {
+ iter->skip++;
+ iter->p = p;
+ iter->list = list;
+ return rht_obj(ht, rhlist ? &list->rhead : p);
+ }
+
+ /* At the end of this slot, switch to next one and then find
+ * next entry from that point.
+ */
+ iter->skip = 0;
+ iter->slot++;
+ }
+
+ return __rhashtable_walk_find_next(iter);
+}
EXPORT_SYMBOL_GPL(rhashtable_walk_next);
/**
+ * rhashtable_walk_peek - Return the next object but don't advance the iterator
+ * @iter: Hash table iterator
+ *
+ * Returns the next object or NULL when the end of the table is reached.
+ *
+ * Returns -EAGAIN if resize event occurred. Note that the iterator
+ * will rewind back to the beginning and you may continue to use it.
+ */
+void *rhashtable_walk_peek(struct rhashtable_iter *iter)
+{
+ struct rhlist_head *list = iter->list;
+ struct rhashtable *ht = iter->ht;
+ struct rhash_head *p = iter->p;
+
+ if (p)
+ return rht_obj(ht, ht->rhlist ? &list->rhead : p);
+
+ /* No object found in current iter, find next one in the table. */
+
+ if (iter->skip) {
+ /* A nonzero skip value points to the next entry in the table
+ * beyond that last one that was found. Decrement skip so
+ * we find the current value. __rhashtable_walk_find_next
+ * will restore the original value of skip assuming that
+ * the table hasn't changed.
+ */
+ iter->skip--;
+ }
+
+ return __rhashtable_walk_find_next(iter);
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_peek);
+
+/**
* rhashtable_walk_stop - Finish a hash table walk
* @iter: Hash table iterator
*
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 8e83cbdc049c..76d3667fdea2 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -162,11 +162,7 @@ static void test_bucket_stats(struct rhashtable *ht, unsigned int entries)
return;
}
- err = rhashtable_walk_start(&hti);
- if (err && err != -EAGAIN) {
- pr_warn("Test failed: iterator failed: %d\n", err);
- return;
- }
+ rhashtable_walk_start(&hti);
while ((pos = rhashtable_walk_next(&hti))) {
if (PTR_ERR(pos) == -EAGAIN) {
diff --git a/net/atm/common.c b/net/atm/common.c
index 8a4f99114cd2..5763fd241dc3 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -14,7 +14,7 @@
#include <linux/capability.h>
#include <linux/mm.h>
#include <linux/sched/signal.h>
-#include <linux/time.h> /* struct timeval */
+#include <linux/time64.h> /* 64-bit time for seconds */
#include <linux/skbuff.h>
#include <linux/bitops.h>
#include <linux/init.h>
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 7c6a1cc760a2..31e0dcb970f8 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -1089,7 +1089,7 @@ static void MPOA_trigger_rcvd(struct k_message *msg, struct mpoa_client *mpc)
msg->type = SND_MPOA_RES_RQST;
msg->content.in_info = entry->ctrl_info;
msg_to_mpoad(msg, mpc);
- do_gettimeofday(&(entry->reply_wait));
+ entry->reply_wait = ktime_get_seconds();
mpc->in_ops->put(entry);
return;
}
@@ -1099,7 +1099,7 @@ static void MPOA_trigger_rcvd(struct k_message *msg, struct mpoa_client *mpc)
msg->type = SND_MPOA_RES_RQST;
msg->content.in_info = entry->ctrl_info;
msg_to_mpoad(msg, mpc);
- do_gettimeofday(&(entry->reply_wait));
+ entry->reply_wait = ktime_get_seconds();
mpc->in_ops->put(entry);
return;
}
@@ -1175,8 +1175,9 @@ static void MPOA_res_reply_rcvd(struct k_message *msg, struct mpoa_client *mpc)
}
entry->ctrl_info = msg->content.in_info;
- do_gettimeofday(&(entry->tv));
- do_gettimeofday(&(entry->reply_wait)); /* Used in refreshing func from now on */
+ entry->time = ktime_get_seconds();
+ /* Used in refreshing func from now on */
+ entry->reply_wait = ktime_get_seconds();
entry->refresh_time = 0;
ddprintk_cont("entry->shortcut = %p\n", entry->shortcut);
diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c
index e01450bb32d6..4bb418313720 100644
--- a/net/atm/mpoa_caches.c
+++ b/net/atm/mpoa_caches.c
@@ -117,7 +117,7 @@ static in_cache_entry *in_cache_add_entry(__be32 dst_ip,
memcpy(entry->MPS_ctrl_ATM_addr, client->mps_ctrl_addr, ATM_ESA_LEN);
entry->ctrl_info.in_dst_ip = dst_ip;
- do_gettimeofday(&(entry->tv));
+ entry->time = ktime_get_seconds();
entry->retry_time = client->parameters.mpc_p4;
entry->count = 1;
entry->entry_state = INGRESS_INVALID;
@@ -148,7 +148,7 @@ static int cache_hit(in_cache_entry *entry, struct mpoa_client *mpc)
if (qos != NULL)
msg.qos = qos->qos;
msg_to_mpoad(&msg, mpc);
- do_gettimeofday(&(entry->reply_wait));
+ entry->reply_wait = ktime_get_seconds();
entry->entry_state = INGRESS_RESOLVING;
}
if (entry->shortcut != NULL)
@@ -171,7 +171,7 @@ static int cache_hit(in_cache_entry *entry, struct mpoa_client *mpc)
if (qos != NULL)
msg.qos = qos->qos;
msg_to_mpoad(&msg, mpc);
- do_gettimeofday(&(entry->reply_wait));
+ entry->reply_wait = ktime_get_seconds();
}
return CLOSED;
@@ -227,17 +227,16 @@ static void in_cache_remove_entry(in_cache_entry *entry,
static void clear_count_and_expired(struct mpoa_client *client)
{
in_cache_entry *entry, *next_entry;
- struct timeval now;
+ time64_t now;
- do_gettimeofday(&now);
+ now = ktime_get_seconds();
write_lock_bh(&client->ingress_lock);
entry = client->in_cache;
while (entry != NULL) {
entry->count = 0;
next_entry = entry->next;
- if ((now.tv_sec - entry->tv.tv_sec)
- > entry->ctrl_info.holding_time) {
+ if ((now - entry->time) > entry->ctrl_info.holding_time) {
dprintk("holding time expired, ip = %pI4\n",
&entry->ctrl_info.in_dst_ip);
client->in_ops->remove_entry(entry, client);
@@ -253,35 +252,35 @@ static void check_resolving_entries(struct mpoa_client *client)
struct atm_mpoa_qos *qos;
in_cache_entry *entry;
- struct timeval now;
+ time64_t now;
struct k_message msg;
- do_gettimeofday(&now);
+ now = ktime_get_seconds();
read_lock_bh(&client->ingress_lock);
entry = client->in_cache;
while (entry != NULL) {
if (entry->entry_state == INGRESS_RESOLVING) {
- if ((now.tv_sec - entry->hold_down.tv_sec) <
- client->parameters.mpc_p6) {
+
+ if ((now - entry->hold_down)
+ < client->parameters.mpc_p6) {
entry = entry->next; /* Entry in hold down */
continue;
}
- if ((now.tv_sec - entry->reply_wait.tv_sec) >
- entry->retry_time) {
+ if ((now - entry->reply_wait) > entry->retry_time) {
entry->retry_time = MPC_C1 * (entry->retry_time);
/*
* Retry time maximum exceeded,
* put entry in hold down.
*/
if (entry->retry_time > client->parameters.mpc_p5) {
- do_gettimeofday(&(entry->hold_down));
+ entry->hold_down = ktime_get_seconds();
entry->retry_time = client->parameters.mpc_p4;
entry = entry->next;
continue;
}
/* Ask daemon to send a resolution request. */
- memset(&(entry->hold_down), 0, sizeof(struct timeval));
+ memset(&entry->hold_down, 0, sizeof(time64_t));
msg.type = SND_MPOA_RES_RTRY;
memcpy(msg.MPS_ctrl, client->mps_ctrl_addr, ATM_ESA_LEN);
msg.content.in_info = entry->ctrl_info;
@@ -289,7 +288,7 @@ static void check_resolving_entries(struct mpoa_client *client)
if (qos != NULL)
msg.qos = qos->qos;
msg_to_mpoad(&msg, client);
- do_gettimeofday(&(entry->reply_wait));
+ entry->reply_wait = ktime_get_seconds();
}
}
entry = entry->next;
@@ -300,18 +299,18 @@ static void check_resolving_entries(struct mpoa_client *client)
/* Call this every MPC-p5 seconds. */
static void refresh_entries(struct mpoa_client *client)
{
- struct timeval now;
+ time64_t now;
struct in_cache_entry *entry = client->in_cache;
ddprintk("refresh_entries\n");
- do_gettimeofday(&now);
+ now = ktime_get_seconds();
read_lock_bh(&client->ingress_lock);
while (entry != NULL) {
if (entry->entry_state == INGRESS_RESOLVED) {
if (!(entry->refresh_time))
entry->refresh_time = (2 * (entry->ctrl_info.holding_time))/3;
- if ((now.tv_sec - entry->reply_wait.tv_sec) >
+ if ((now - entry->reply_wait) >
entry->refresh_time) {
dprintk("refreshing an entry.\n");
entry->entry_state = INGRESS_REFRESHING;
@@ -480,7 +479,7 @@ static eg_cache_entry *eg_cache_add_entry(struct k_message *msg,
memcpy(entry->MPS_ctrl_ATM_addr, client->mps_ctrl_addr, ATM_ESA_LEN);
entry->ctrl_info = msg->content.eg_info;
- do_gettimeofday(&(entry->tv));
+ entry->time = ktime_get_seconds();
entry->entry_state = EGRESS_RESOLVED;
dprintk("new_eg_cache_entry cache_id %u\n",
ntohl(entry->ctrl_info.cache_id));
@@ -495,7 +494,7 @@ static eg_cache_entry *eg_cache_add_entry(struct k_message *msg,
static void update_eg_cache_entry(eg_cache_entry *entry, uint16_t holding_time)
{
- do_gettimeofday(&(entry->tv));
+ entry->time = ktime_get_seconds();
entry->entry_state = EGRESS_RESOLVED;
entry->ctrl_info.holding_time = holding_time;
}
@@ -503,17 +502,16 @@ static void update_eg_cache_entry(eg_cache_entry *entry, uint16_t holding_time)
static void clear_expired(struct mpoa_client *client)
{
eg_cache_entry *entry, *next_entry;
- struct timeval now;
+ time64_t now;
struct k_message msg;
- do_gettimeofday(&now);
+ now = ktime_get_seconds();
write_lock_irq(&client->egress_lock);
entry = client->eg_cache;
while (entry != NULL) {
next_entry = entry->next;
- if ((now.tv_sec - entry->tv.tv_sec)
- > entry->ctrl_info.holding_time) {
+ if ((now - entry->time) > entry->ctrl_info.holding_time) {
msg.type = SND_EGRESS_PURGE;
msg.content.eg_info = entry->ctrl_info;
dprintk("egress_cache: holding time expired, cache_id = %u.\n",
diff --git a/net/atm/mpoa_caches.h b/net/atm/mpoa_caches.h
index 6a266669ebf4..464c4c7f8d1f 100644
--- a/net/atm/mpoa_caches.h
+++ b/net/atm/mpoa_caches.h
@@ -2,6 +2,7 @@
#ifndef MPOA_CACHES_H
#define MPOA_CACHES_H
+#include <linux/time64.h>
#include <linux/netdevice.h>
#include <linux/types.h>
#include <linux/atm.h>
@@ -16,9 +17,9 @@ void atm_mpoa_init_cache(struct mpoa_client *mpc);
typedef struct in_cache_entry {
struct in_cache_entry *next;
struct in_cache_entry *prev;
- struct timeval tv;
- struct timeval reply_wait;
- struct timeval hold_down;
+ time64_t time;
+ time64_t reply_wait;
+ time64_t hold_down;
uint32_t packets_fwded;
uint16_t entry_state;
uint32_t retry_time;
@@ -53,7 +54,7 @@ struct in_cache_ops{
typedef struct eg_cache_entry{
struct eg_cache_entry *next;
struct eg_cache_entry *prev;
- struct timeval tv;
+ time64_t time;
uint8_t MPS_ctrl_ATM_addr[ATM_ESA_LEN];
struct atm_vcc *shortcut;
uint32_t packets_rcvd;
diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c
index 8a0c17e1c203..2212da9c2da2 100644
--- a/net/atm/mpoa_proc.c
+++ b/net/atm/mpoa_proc.c
@@ -8,7 +8,7 @@
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/proc_fs.h>
-#include <linux/time.h>
+#include <linux/ktime.h>
#include <linux/seq_file.h>
#include <linux/uaccess.h>
#include <linux/atmmpc.h>
@@ -138,7 +138,7 @@ static int mpc_show(struct seq_file *m, void *v)
int i;
in_cache_entry *in_entry;
eg_cache_entry *eg_entry;
- struct timeval now;
+ time64_t now;
unsigned char ip_string[16];
if (v == SEQ_START_TOKEN) {
@@ -148,15 +148,17 @@ static int mpc_show(struct seq_file *m, void *v)
seq_printf(m, "\nInterface %d:\n\n", mpc->dev_num);
seq_printf(m, "Ingress Entries:\nIP address State Holding time Packets fwded VPI VCI\n");
- do_gettimeofday(&now);
+ now = ktime_get_seconds();
for (in_entry = mpc->in_cache; in_entry; in_entry = in_entry->next) {
+ unsigned long seconds_delta = now - in_entry->time;
+
sprintf(ip_string, "%pI4", &in_entry->ctrl_info.in_dst_ip);
seq_printf(m, "%-16s%s%-14lu%-12u",
ip_string,
ingress_state_string(in_entry->entry_state),
in_entry->ctrl_info.holding_time -
- (now.tv_sec-in_entry->tv.tv_sec),
+ seconds_delta,
in_entry->packets_fwded);
if (in_entry->shortcut)
seq_printf(m, " %-3d %-3d",
@@ -169,13 +171,14 @@ static int mpc_show(struct seq_file *m, void *v)
seq_printf(m, "Egress Entries:\nIngress MPC ATM addr\nCache-id State Holding time Packets recvd Latest IP addr VPI VCI\n");
for (eg_entry = mpc->eg_cache; eg_entry; eg_entry = eg_entry->next) {
unsigned char *p = eg_entry->ctrl_info.in_MPC_data_ATM_addr;
+ unsigned long seconds_delta = now - eg_entry->time;
+
for (i = 0; i < ATM_ESA_LEN; i++)
seq_printf(m, "%02x", p[i]);
seq_printf(m, "\n%-16lu%s%-14lu%-15u",
(unsigned long)ntohl(eg_entry->ctrl_info.cache_id),
egress_state_string(eg_entry->entry_state),
- (eg_entry->ctrl_info.holding_time -
- (now.tv_sec-eg_entry->tv.tv_sec)),
+ (eg_entry->ctrl_info.holding_time - seconds_delta),
eg_entry->packets_rcvd);
/* latest IP address */
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index af5b8c87f590..1285ca30ab0a 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -125,9 +125,16 @@ static int br_dev_init(struct net_device *dev)
if (!br->stats)
return -ENOMEM;
+ err = br_fdb_hash_init(br);
+ if (err) {
+ free_percpu(br->stats);
+ return err;
+ }
+
err = br_vlan_init(br);
if (err) {
free_percpu(br->stats);
+ br_fdb_hash_fini(br);
return err;
}
@@ -135,6 +142,7 @@ static int br_dev_init(struct net_device *dev)
if (err) {
free_percpu(br->stats);
br_vlan_flush(br);
+ br_fdb_hash_fini(br);
}
br_set_lockdep_class(dev);
@@ -148,6 +156,7 @@ static void br_dev_uninit(struct net_device *dev)
br_multicast_dev_del(br);
br_multicast_uninit_stats(br);
br_vlan_flush(br);
+ br_fdb_hash_fini(br);
free_percpu(br->stats);
}
@@ -416,6 +425,7 @@ void br_dev_setup(struct net_device *dev)
br->dev = dev;
spin_lock_init(&br->lock);
INIT_LIST_HEAD(&br->port_list);
+ INIT_HLIST_HEAD(&br->fdb_list);
spin_lock_init(&br->hash_lock);
br->bridge_id.prio[0] = 0x80;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 4ea5c8bbe286..dc87fbc9a23b 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -28,14 +28,20 @@
#include <trace/events/bridge.h>
#include "br_private.h"
+static const struct rhashtable_params br_fdb_rht_params = {
+ .head_offset = offsetof(struct net_bridge_fdb_entry, rhnode),
+ .key_offset = offsetof(struct net_bridge_fdb_entry, key),
+ .key_len = sizeof(struct net_bridge_fdb_key),
+ .automatic_shrinking = true,
+ .locks_mul = 1,
+};
+
static struct kmem_cache *br_fdb_cache __read_mostly;
static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr, u16 vid);
static void fdb_notify(struct net_bridge *br,
const struct net_bridge_fdb_entry *, int);
-static u32 fdb_salt __read_mostly;
-
int __init br_fdb_init(void)
{
br_fdb_cache = kmem_cache_create("bridge_fdb_cache",
@@ -45,7 +51,6 @@ int __init br_fdb_init(void)
if (!br_fdb_cache)
return -ENOMEM;
- get_random_bytes(&fdb_salt, sizeof(fdb_salt));
return 0;
}
@@ -54,6 +59,15 @@ void br_fdb_fini(void)
kmem_cache_destroy(br_fdb_cache);
}
+int br_fdb_hash_init(struct net_bridge *br)
+{
+ return rhashtable_init(&br->fdb_hash_tbl, &br_fdb_rht_params);
+}
+
+void br_fdb_hash_fini(struct net_bridge *br)
+{
+ rhashtable_destroy(&br->fdb_hash_tbl);
+}
/* if topology_changing then use forward_delay (default 15 sec)
* otherwise keep longer (default 5 minutes)
@@ -70,13 +84,6 @@ static inline int has_expired(const struct net_bridge *br,
time_before_eq(fdb->updated + hold_time(br), jiffies);
}
-static inline int br_mac_hash(const unsigned char *mac, __u16 vid)
-{
- /* use 1 byte of OUI and 3 bytes of NIC */
- u32 key = get_unaligned((u32 *)(mac + 2));
- return jhash_2words(key, vid, fdb_salt) & (BR_HASH_SIZE - 1);
-}
-
static void fdb_rcu_free(struct rcu_head *head)
{
struct net_bridge_fdb_entry *ent
@@ -84,19 +91,18 @@ static void fdb_rcu_free(struct rcu_head *head)
kmem_cache_free(br_fdb_cache, ent);
}
-static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head,
+static struct net_bridge_fdb_entry *fdb_find_rcu(struct rhashtable *tbl,
const unsigned char *addr,
__u16 vid)
{
- struct net_bridge_fdb_entry *f;
+ struct net_bridge_fdb_key key;
WARN_ON_ONCE(!rcu_read_lock_held());
- hlist_for_each_entry_rcu(f, head, hlist)
- if (ether_addr_equal(f->addr.addr, addr) && f->vlan_id == vid)
- break;
+ key.vlan_id = vid;
+ memcpy(key.addr.addr, addr, sizeof(key.addr.addr));
- return f;
+ return rhashtable_lookup(tbl, &key, br_fdb_rht_params);
}
/* requires bridge hash_lock */
@@ -104,13 +110,12 @@ static struct net_bridge_fdb_entry *br_fdb_find(struct net_bridge *br,
const unsigned char *addr,
__u16 vid)
{
- struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
struct net_bridge_fdb_entry *fdb;
lockdep_assert_held_once(&br->hash_lock);
rcu_read_lock();
- fdb = fdb_find_rcu(head, addr, vid);
+ fdb = fdb_find_rcu(&br->fdb_hash_tbl, addr, vid);
rcu_read_unlock();
return fdb;
@@ -120,9 +125,7 @@ struct net_bridge_fdb_entry *br_fdb_find_rcu(struct net_bridge *br,
const unsigned char *addr,
__u16 vid)
{
- struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
-
- return fdb_find_rcu(head, addr, vid);
+ return fdb_find_rcu(&br->fdb_hash_tbl, addr, vid);
}
/* When a static FDB entry is added, the mac address from the entry is
@@ -175,9 +178,11 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
trace_fdb_delete(br, f);
if (f->is_static)
- fdb_del_hw_addr(br, f->addr.addr);
+ fdb_del_hw_addr(br, f->key.addr.addr);
- hlist_del_init_rcu(&f->hlist);
+ hlist_del_init_rcu(&f->fdb_node);
+ rhashtable_remove_fast(&br->fdb_hash_tbl, &f->rhnode,
+ br_fdb_rht_params);
fdb_notify(br, f, RTM_DELNEIGH);
call_rcu(&f->rcu, fdb_rcu_free);
}
@@ -187,11 +192,11 @@ static void fdb_delete_local(struct net_bridge *br,
const struct net_bridge_port *p,
struct net_bridge_fdb_entry *f)
{
- const unsigned char *addr = f->addr.addr;
+ const unsigned char *addr = f->key.addr.addr;
struct net_bridge_vlan_group *vg;
const struct net_bridge_vlan *v;
struct net_bridge_port *op;
- u16 vid = f->vlan_id;
+ u16 vid = f->key.vlan_id;
/* Maybe another port has same hw addr? */
list_for_each_entry(op, &br->port_list, list) {
@@ -233,31 +238,23 @@ void br_fdb_find_delete_local(struct net_bridge *br,
void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
{
struct net_bridge_vlan_group *vg;
+ struct net_bridge_fdb_entry *f;
struct net_bridge *br = p->br;
struct net_bridge_vlan *v;
- int i;
spin_lock_bh(&br->hash_lock);
-
vg = nbp_vlan_group(p);
- /* Search all chains since old address/hash is unknown */
- for (i = 0; i < BR_HASH_SIZE; i++) {
- struct hlist_node *h;
- hlist_for_each(h, &br->hash[i]) {
- struct net_bridge_fdb_entry *f;
-
- f = hlist_entry(h, struct net_bridge_fdb_entry, hlist);
- if (f->dst == p && f->is_local && !f->added_by_user) {
- /* delete old one */
- fdb_delete_local(br, p, f);
-
- /* if this port has no vlan information
- * configured, we can safely be done at
- * this point.
- */
- if (!vg || !vg->num_vlans)
- goto insert;
- }
+ hlist_for_each_entry(f, &br->fdb_list, fdb_node) {
+ if (f->dst == p && f->is_local && !f->added_by_user) {
+ /* delete old one */
+ fdb_delete_local(br, p, f);
+
+ /* if this port has no vlan information
+ * configured, we can safely be done at
+ * this point.
+ */
+ if (!vg || !vg->num_vlans)
+ goto insert;
}
}
@@ -316,35 +313,32 @@ void br_fdb_cleanup(struct work_struct *work)
{
struct net_bridge *br = container_of(work, struct net_bridge,
gc_work.work);
+ struct net_bridge_fdb_entry *f = NULL;
unsigned long delay = hold_time(br);
unsigned long work_delay = delay;
unsigned long now = jiffies;
- int i;
- for (i = 0; i < BR_HASH_SIZE; i++) {
- struct net_bridge_fdb_entry *f;
- struct hlist_node *n;
+ /* this part is tricky, in order to avoid blocking learning and
+ * consequently forwarding, we rely on rcu to delete objects with
+ * delayed freeing allowing us to continue traversing
+ */
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
+ unsigned long this_timer;
- if (!br->hash[i].first)
+ if (f->is_static || f->added_by_external_learn)
continue;
-
- spin_lock_bh(&br->hash_lock);
- hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) {
- unsigned long this_timer;
-
- if (f->is_static)
- continue;
- if (f->added_by_external_learn)
- continue;
- this_timer = f->updated + delay;
- if (time_after(this_timer, now))
- work_delay = min(work_delay, this_timer - now);
- else
+ this_timer = f->updated + delay;
+ if (time_after(this_timer, now)) {
+ work_delay = min(work_delay, this_timer - now);
+ } else {
+ spin_lock_bh(&br->hash_lock);
+ if (!hlist_unhashed(&f->fdb_node))
fdb_delete(br, f);
+ spin_unlock_bh(&br->hash_lock);
}
- spin_unlock_bh(&br->hash_lock);
- cond_resched();
}
+ rcu_read_unlock();
/* Cleanup minimum 10 milliseconds apart */
work_delay = max_t(unsigned long, work_delay, msecs_to_jiffies(10));
@@ -354,16 +348,13 @@ void br_fdb_cleanup(struct work_struct *work)
/* Completely flush all dynamic entries in forwarding database.*/
void br_fdb_flush(struct net_bridge *br)
{
- int i;
+ struct net_bridge_fdb_entry *f;
+ struct hlist_node *tmp;
spin_lock_bh(&br->hash_lock);
- for (i = 0; i < BR_HASH_SIZE; i++) {
- struct net_bridge_fdb_entry *f;
- struct hlist_node *n;
- hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) {
- if (!f->is_static)
- fdb_delete(br, f);
- }
+ hlist_for_each_entry_safe(f, tmp, &br->fdb_list, fdb_node) {
+ if (!f->is_static)
+ fdb_delete(br, f);
}
spin_unlock_bh(&br->hash_lock);
}
@@ -377,27 +368,22 @@ void br_fdb_delete_by_port(struct net_bridge *br,
u16 vid,
int do_all)
{
- int i;
+ struct net_bridge_fdb_entry *f;
+ struct hlist_node *tmp;
spin_lock_bh(&br->hash_lock);
- for (i = 0; i < BR_HASH_SIZE; i++) {
- struct hlist_node *h, *g;
+ hlist_for_each_entry_safe(f, tmp, &br->fdb_list, fdb_node) {
+ if (f->dst != p)
+ continue;
- hlist_for_each_safe(h, g, &br->hash[i]) {
- struct net_bridge_fdb_entry *f
- = hlist_entry(h, struct net_bridge_fdb_entry, hlist);
- if (f->dst != p)
+ if (!do_all)
+ if (f->is_static || (vid && f->key.vlan_id != vid))
continue;
- if (!do_all)
- if (f->is_static || (vid && f->vlan_id != vid))
- continue;
-
- if (f->is_local)
- fdb_delete_local(br, p, f);
- else
- fdb_delete(br, f);
- }
+ if (f->is_local)
+ fdb_delete_local(br, p, f);
+ else
+ fdb_delete(br, f);
}
spin_unlock_bh(&br->hash_lock);
}
@@ -433,52 +419,48 @@ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr)
int br_fdb_fillbuf(struct net_bridge *br, void *buf,
unsigned long maxnum, unsigned long skip)
{
- struct __fdb_entry *fe = buf;
- int i, num = 0;
struct net_bridge_fdb_entry *f;
+ struct __fdb_entry *fe = buf;
+ int num = 0;
memset(buf, 0, maxnum*sizeof(struct __fdb_entry));
rcu_read_lock();
- for (i = 0; i < BR_HASH_SIZE; i++) {
- hlist_for_each_entry_rcu(f, &br->hash[i], hlist) {
- if (num >= maxnum)
- goto out;
+ hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
+ if (num >= maxnum)
+ break;
- if (has_expired(br, f))
- continue;
+ if (has_expired(br, f))
+ continue;
- /* ignore pseudo entry for local MAC address */
- if (!f->dst)
- continue;
+ /* ignore pseudo entry for local MAC address */
+ if (!f->dst)
+ continue;
- if (skip) {
- --skip;
- continue;
- }
+ if (skip) {
+ --skip;
+ continue;
+ }
- /* convert from internal format to API */
- memcpy(fe->mac_addr, f->addr.addr, ETH_ALEN);
+ /* convert from internal format to API */
+ memcpy(fe->mac_addr, f->key.addr.addr, ETH_ALEN);
- /* due to ABI compat need to split into hi/lo */
- fe->port_no = f->dst->port_no;
- fe->port_hi = f->dst->port_no >> 8;
+ /* due to ABI compat need to split into hi/lo */
+ fe->port_no = f->dst->port_no;
+ fe->port_hi = f->dst->port_no >> 8;
- fe->is_local = f->is_local;
- if (!f->is_static)
- fe->ageing_timer_value = jiffies_delta_to_clock_t(jiffies - f->updated);
- ++fe;
- ++num;
- }
+ fe->is_local = f->is_local;
+ if (!f->is_static)
+ fe->ageing_timer_value = jiffies_delta_to_clock_t(jiffies - f->updated);
+ ++fe;
+ ++num;
}
-
- out:
rcu_read_unlock();
return num;
}
-static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
+static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br,
struct net_bridge_port *source,
const unsigned char *addr,
__u16 vid,
@@ -489,16 +471,23 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC);
if (fdb) {
- memcpy(fdb->addr.addr, addr, ETH_ALEN);
+ memcpy(fdb->key.addr.addr, addr, ETH_ALEN);
fdb->dst = source;
- fdb->vlan_id = vid;
+ fdb->key.vlan_id = vid;
fdb->is_local = is_local;
fdb->is_static = is_static;
fdb->added_by_user = 0;
fdb->added_by_external_learn = 0;
fdb->offloaded = 0;
fdb->updated = fdb->used = jiffies;
- hlist_add_head_rcu(&fdb->hlist, head);
+ if (rhashtable_lookup_insert_fast(&br->fdb_hash_tbl,
+ &fdb->rhnode,
+ br_fdb_rht_params)) {
+ kmem_cache_free(br_fdb_cache, fdb);
+ fdb = NULL;
+ } else {
+ hlist_add_head_rcu(&fdb->fdb_node, &br->fdb_list);
+ }
}
return fdb;
}
@@ -506,7 +495,6 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr, u16 vid)
{
- struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
struct net_bridge_fdb_entry *fdb;
if (!is_valid_ether_addr(addr))
@@ -524,7 +512,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
fdb_delete(br, fdb);
}
- fdb = fdb_create(head, source, addr, vid, 1, 1);
+ fdb = fdb_create(br, source, addr, vid, 1, 1);
if (!fdb)
return -ENOMEM;
@@ -548,7 +536,6 @@ int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr, u16 vid, bool added_by_user)
{
- struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
struct net_bridge_fdb_entry *fdb;
bool fdb_modified = false;
@@ -561,7 +548,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
source->state == BR_STATE_FORWARDING))
return;
- fdb = fdb_find_rcu(head, addr, vid);
+ fdb = fdb_find_rcu(&br->fdb_hash_tbl, addr, vid);
if (likely(fdb)) {
/* attempt to update an entry for a local interface */
if (unlikely(fdb->is_local)) {
@@ -590,14 +577,13 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
}
} else {
spin_lock(&br->hash_lock);
- if (likely(!fdb_find_rcu(head, addr, vid))) {
- fdb = fdb_create(head, source, addr, vid, 0, 0);
- if (fdb) {
- if (unlikely(added_by_user))
- fdb->added_by_user = 1;
- trace_br_fdb_update(br, source, addr, vid, added_by_user);
- fdb_notify(br, fdb, RTM_NEWNEIGH);
- }
+ fdb = fdb_create(br, source, addr, vid, 0, 0);
+ if (fdb) {
+ if (unlikely(added_by_user))
+ fdb->added_by_user = 1;
+ trace_br_fdb_update(br, source, addr, vid,
+ added_by_user);
+ fdb_notify(br, fdb, RTM_NEWNEIGH);
}
/* else we lose race and someone else inserts
* it first, don't bother updating
@@ -646,7 +632,7 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
if (fdb->added_by_external_learn)
ndm->ndm_flags |= NTF_EXT_LEARNED;
- if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->addr))
+ if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->key.addr))
goto nla_put_failure;
if (nla_put_u32(skb, NDA_MASTER, br->dev->ifindex))
goto nla_put_failure;
@@ -657,7 +643,8 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
goto nla_put_failure;
- if (fdb->vlan_id && nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id))
+ if (fdb->key.vlan_id && nla_put(skb, NDA_VLAN, sizeof(u16),
+ &fdb->key.vlan_id))
goto nla_put_failure;
nlmsg_end(skb, nlh);
@@ -711,54 +698,48 @@ int br_fdb_dump(struct sk_buff *skb,
int *idx)
{
struct net_bridge *br = netdev_priv(dev);
+ struct net_bridge_fdb_entry *f;
int err = 0;
- int i;
if (!(dev->priv_flags & IFF_EBRIDGE))
- goto out;
+ return err;
if (!filter_dev) {
err = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
if (err < 0)
- goto out;
+ return err;
}
- for (i = 0; i < BR_HASH_SIZE; i++) {
- struct net_bridge_fdb_entry *f;
-
- hlist_for_each_entry_rcu(f, &br->hash[i], hlist) {
-
- if (*idx < cb->args[2])
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
+ if (*idx < cb->args[2])
+ goto skip;
+ if (filter_dev && (!f->dst || f->dst->dev != filter_dev)) {
+ if (filter_dev != dev)
goto skip;
-
- if (filter_dev &&
- (!f->dst || f->dst->dev != filter_dev)) {
- if (filter_dev != dev)
- goto skip;
- /* !f->dst is a special case for bridge
- * It means the MAC belongs to the bridge
- * Therefore need a little more filtering
- * we only want to dump the !f->dst case
- */
- if (f->dst)
- goto skip;
- }
- if (!filter_dev && f->dst)
+ /* !f->dst is a special case for bridge
+ * It means the MAC belongs to the bridge
+ * Therefore need a little more filtering
+ * we only want to dump the !f->dst case
+ */
+ if (f->dst)
goto skip;
-
- err = fdb_fill_info(skb, br, f,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWNEIGH,
- NLM_F_MULTI);
- if (err < 0)
- goto out;
-skip:
- *idx += 1;
}
+ if (!filter_dev && f->dst)
+ goto skip;
+
+ err = fdb_fill_info(skb, br, f,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWNEIGH,
+ NLM_F_MULTI);
+ if (err < 0)
+ break;
+skip:
+ *idx += 1;
}
+ rcu_read_unlock();
-out:
return err;
}
@@ -766,7 +747,6 @@ out:
static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
const __u8 *addr, __u16 state, __u16 flags, __u16 vid)
{
- struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
struct net_bridge_fdb_entry *fdb;
bool modified = false;
@@ -787,7 +767,7 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
if (!(flags & NLM_F_CREATE))
return -ENOENT;
- fdb = fdb_create(head, source, addr, vid, 0, 0);
+ fdb = fdb_create(br, source, addr, vid, 0, 0);
if (!fdb)
return -ENOMEM;
@@ -1012,65 +992,60 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p)
{
- struct net_bridge_fdb_entry *fdb, *tmp;
- int i;
+ struct net_bridge_fdb_entry *f, *tmp;
int err;
ASSERT_RTNL();
- for (i = 0; i < BR_HASH_SIZE; i++) {
- hlist_for_each_entry(fdb, &br->hash[i], hlist) {
- /* We only care for static entries */
- if (!fdb->is_static)
- continue;
-
- err = dev_uc_add(p->dev, fdb->addr.addr);
- if (err)
- goto rollback;
- }
+ /* the key here is that static entries change only under rtnl */
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
+ /* We only care for static entries */
+ if (!f->is_static)
+ continue;
+ err = dev_uc_add(p->dev, f->key.addr.addr);
+ if (err)
+ goto rollback;
}
- return 0;
+done:
+ rcu_read_unlock();
-rollback:
- for (i = 0; i < BR_HASH_SIZE; i++) {
- hlist_for_each_entry(tmp, &br->hash[i], hlist) {
- /* If we reached the fdb that failed, we can stop */
- if (tmp == fdb)
- break;
-
- /* We only care for static entries */
- if (!tmp->is_static)
- continue;
+ return err;
- dev_uc_del(p->dev, tmp->addr.addr);
- }
+rollback:
+ hlist_for_each_entry_rcu(tmp, &br->fdb_list, fdb_node) {
+ /* We only care for static entries */
+ if (!tmp->is_static)
+ continue;
+ if (tmp == f)
+ break;
+ dev_uc_del(p->dev, tmp->key.addr.addr);
}
- return err;
+
+ goto done;
}
void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p)
{
- struct net_bridge_fdb_entry *fdb;
- int i;
+ struct net_bridge_fdb_entry *f;
ASSERT_RTNL();
- for (i = 0; i < BR_HASH_SIZE; i++) {
- hlist_for_each_entry_rcu(fdb, &br->hash[i], hlist) {
- /* We only care for static entries */
- if (!fdb->is_static)
- continue;
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
+ /* We only care for static entries */
+ if (!f->is_static)
+ continue;
- dev_uc_del(p->dev, fdb->addr.addr);
- }
+ dev_uc_del(p->dev, f->key.addr.addr);
}
+ rcu_read_unlock();
}
int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
const unsigned char *addr, u16 vid)
{
struct net_bridge_fdb_entry *fdb;
- struct hlist_head *head;
bool modified = false;
int err = 0;
@@ -1078,10 +1053,9 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
spin_lock_bh(&br->hash_lock);
- head = &br->hash[br_mac_hash(addr, vid)];
fdb = br_fdb_find(br, addr, vid);
if (!fdb) {
- fdb = fdb_create(head, p, addr, vid, 0, 0);
+ fdb = fdb_create(br, p, addr, vid, 0, 0);
if (!fdb) {
err = -ENOMEM;
goto err_unlock;
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index b0f4c734900b..6d9f48bd374a 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -760,9 +760,9 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
void br_mdb_init(void)
{
- rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, 0);
- rtnl_register(PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, 0);
- rtnl_register(PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, 0);
+ rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, 0);
}
void br_mdb_uninit(void)
diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c
index 20cbb727df4d..8e2d7cfa4e16 100644
--- a/net/bridge/br_nf_core.c
+++ b/net/bridge/br_nf_core.c
@@ -78,7 +78,6 @@ void br_netfilter_rtable_init(struct net_bridge *br)
atomic_set(&rt->dst.__refcnt, 1);
rt->dst.dev = br->dev;
- rt->dst.path = &rt->dst;
dst_init_metrics(&rt->dst, br_dst_default_metrics, true);
rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE;
rt->dst.ops = &fake_dst_ops;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 1312b8d20ec3..80559fd11b7e 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -168,12 +168,17 @@ struct net_bridge_vlan_group {
u16 pvid;
};
+struct net_bridge_fdb_key {
+ mac_addr addr;
+ u16 vlan_id;
+};
+
struct net_bridge_fdb_entry {
- struct hlist_node hlist;
+ struct rhash_head rhnode;
struct net_bridge_port *dst;
- mac_addr addr;
- __u16 vlan_id;
+ struct net_bridge_fdb_key key;
+ struct hlist_node fdb_node;
unsigned char is_local:1,
is_static:1,
added_by_user:1,
@@ -315,7 +320,7 @@ struct net_bridge {
struct net_bridge_vlan_group __rcu *vlgrp;
#endif
- struct hlist_head hash[BR_HASH_SIZE];
+ struct rhashtable fdb_hash_tbl;
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
union {
struct rtable fake_rtable;
@@ -405,6 +410,7 @@ struct net_bridge {
int offload_fwd_mark;
#endif
bool neigh_suppress_enabled;
+ struct hlist_head fdb_list;
};
struct br_input_skb_cb {
@@ -515,6 +521,8 @@ static inline void br_netpoll_disable(struct net_bridge_port *p)
/* br_fdb.c */
int br_fdb_init(void);
void br_fdb_fini(void);
+int br_fdb_hash_init(struct net_bridge *br);
+void br_fdb_hash_fini(struct net_bridge *br);
void br_fdb_flush(struct net_bridge *br);
void br_fdb_find_delete_local(struct net_bridge *br,
const struct net_bridge_port *p,
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 9700e0f3307b..ee775f4ff76c 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -121,13 +121,13 @@ br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
switch (type) {
case RTM_DELNEIGH:
- br_switchdev_fdb_call_notifiers(false, fdb->addr.addr,
- fdb->vlan_id,
+ br_switchdev_fdb_call_notifiers(false, fdb->key.addr.addr,
+ fdb->key.vlan_id,
fdb->dst->dev);
break;
case RTM_NEWNEIGH:
- br_switchdev_fdb_call_notifiers(true, fdb->addr.addr,
- fdb->vlan_id,
+ br_switchdev_fdb_call_notifiers(true, fdb->key.addr.addr,
+ fdb->key.vlan_id,
fdb->dst->dev);
break;
}
diff --git a/net/can/gw.c b/net/can/gw.c
index 73a02af4b5d7..398dd0395ad9 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -1014,6 +1014,8 @@ static struct pernet_operations cangw_pernet_ops = {
static __init int cgw_module_init(void)
{
+ int ret;
+
/* sanitize given module parameter */
max_hops = clamp_t(unsigned int, max_hops, CGW_MIN_HOPS, CGW_MAX_HOPS);
@@ -1031,15 +1033,19 @@ static __init int cgw_module_init(void)
notifier.notifier_call = cgw_notifier;
register_netdevice_notifier(&notifier);
- if (__rtnl_register(PF_CAN, RTM_GETROUTE, NULL, cgw_dump_jobs, 0)) {
+ ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_GETROUTE,
+ NULL, cgw_dump_jobs, 0);
+ if (ret) {
unregister_netdevice_notifier(&notifier);
kmem_cache_destroy(cgw_cache);
return -ENOBUFS;
}
- /* Only the first call to __rtnl_register can fail */
- __rtnl_register(PF_CAN, RTM_NEWROUTE, cgw_create_job, NULL, 0);
- __rtnl_register(PF_CAN, RTM_DELROUTE, cgw_remove_job, NULL, 0);
+ /* Only the first call to rtnl_register_module can fail */
+ rtnl_register_module(THIS_MODULE, PF_CAN, RTM_NEWROUTE,
+ cgw_create_job, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_CAN, RTM_DELROUTE,
+ cgw_remove_job, NULL, 0);
return 0;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index f47e96b62308..b0eee49a2489 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2803,7 +2803,7 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
segs = skb_mac_gso_segment(skb, features);
- if (unlikely(skb_needs_check(skb, tx_path)))
+ if (unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs)))
skb_warn_bad_offload(skb);
return segs;
@@ -3162,6 +3162,21 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
int rc;
qdisc_calculate_pkt_len(skb, q);
+
+ if (q->flags & TCQ_F_NOLOCK) {
+ if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
+ __qdisc_drop(skb, &to_free);
+ rc = NET_XMIT_DROP;
+ } else {
+ rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
+ __qdisc_run(q);
+ }
+
+ if (unlikely(to_free))
+ kfree_skb_list(to_free);
+ return rc;
+ }
+
/*
* Heuristic to force contended enqueues to serialize on a
* separate lock before trying to get qdisc main lock.
@@ -3192,9 +3207,9 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
contended = false;
}
__qdisc_run(q);
- } else
- qdisc_run_end(q);
+ }
+ qdisc_run_end(q);
rc = NET_XMIT_SUCCESS;
} else {
rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
@@ -3204,6 +3219,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
contended = false;
}
__qdisc_run(q);
+ qdisc_run_end(q);
}
}
spin_unlock(root_lock);
@@ -4143,19 +4159,22 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)
while (head) {
struct Qdisc *q = head;
- spinlock_t *root_lock;
+ spinlock_t *root_lock = NULL;
head = head->next_sched;
- root_lock = qdisc_lock(q);
- spin_lock(root_lock);
+ if (!(q->flags & TCQ_F_NOLOCK)) {
+ root_lock = qdisc_lock(q);
+ spin_lock(root_lock);
+ }
/* We need to make sure head->next_sched is read
* before clearing __QDISC_STATE_SCHED
*/
smp_mb__before_atomic();
clear_bit(__QDISC_STATE_SCHED, &q->state);
qdisc_run(q);
- spin_unlock(root_lock);
+ if (root_lock)
+ spin_unlock(root_lock);
}
}
}
@@ -7073,17 +7092,21 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
}
EXPORT_SYMBOL(dev_change_proto_down);
-u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op, u32 *prog_id)
+void __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op,
+ struct netdev_bpf *xdp)
{
- struct netdev_bpf xdp;
-
- memset(&xdp, 0, sizeof(xdp));
- xdp.command = XDP_QUERY_PROG;
+ memset(xdp, 0, sizeof(*xdp));
+ xdp->command = XDP_QUERY_PROG;
/* Query must always succeed. */
- WARN_ON(bpf_op(dev, &xdp) < 0);
- if (prog_id)
- *prog_id = xdp.prog_id;
+ WARN_ON(bpf_op(dev, xdp) < 0);
+}
+
+static u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op)
+{
+ struct netdev_bpf xdp;
+
+ __dev_xdp_query(dev, bpf_op, &xdp);
return xdp.prog_attached;
}
@@ -7106,6 +7129,27 @@ static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
return bpf_op(dev, &xdp);
}
+static void dev_xdp_uninstall(struct net_device *dev)
+{
+ struct netdev_bpf xdp;
+ bpf_op_t ndo_bpf;
+
+ /* Remove generic XDP */
+ WARN_ON(dev_xdp_install(dev, generic_xdp_install, NULL, 0, NULL));
+
+ /* Remove from the driver */
+ ndo_bpf = dev->netdev_ops->ndo_bpf;
+ if (!ndo_bpf)
+ return;
+
+ __dev_xdp_query(dev, ndo_bpf, &xdp);
+ if (xdp.prog_attached == XDP_ATTACHED_NONE)
+ return;
+
+ /* Program removal should always succeed */
+ WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags, NULL));
+}
+
/**
* dev_change_xdp_fd - set or clear a bpf program for a device rx path
* @dev: device
@@ -7134,10 +7178,10 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
bpf_chk = generic_xdp_install;
if (fd >= 0) {
- if (bpf_chk && __dev_xdp_attached(dev, bpf_chk, NULL))
+ if (bpf_chk && __dev_xdp_attached(dev, bpf_chk))
return -EEXIST;
if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
- __dev_xdp_attached(dev, bpf_op, NULL))
+ __dev_xdp_attached(dev, bpf_op))
return -EBUSY;
prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
@@ -7236,6 +7280,7 @@ static void rollback_registered_many(struct list_head *head)
/* Shutdown queueing discipline. */
dev_shutdown(dev);
+ dev_xdp_uninstall(dev);
/* Notify protocols, that we are about to destroy
* this device. They should clean all the things.
@@ -8195,7 +8240,6 @@ EXPORT_SYMBOL(alloc_netdev_mqs);
void free_netdev(struct net_device *dev)
{
struct napi_struct *p, *n;
- struct bpf_prog *prog;
might_sleep();
netif_free_tx_queues(dev);
@@ -8214,12 +8258,6 @@ void free_netdev(struct net_device *dev)
free_percpu(dev->pcpu_refcnt);
dev->pcpu_refcnt = NULL;
- prog = rcu_dereference_protected(dev->xdp_prog, 1);
- if (prog) {
- bpf_prog_put(prog);
- static_key_slow_dec(&generic_xdp_needed);
- }
-
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED) {
netdev_freemem(dev);
diff --git a/net/core/dst.c b/net/core/dst.c
index 662a2d4a3d19..007aa0b08291 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -21,6 +21,7 @@
#include <linux/sched.h>
#include <linux/prefetch.h>
#include <net/lwtunnel.h>
+#include <net/xfrm.h>
#include <net/dst.h>
#include <net/dst_metadata.h>
@@ -62,15 +63,12 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
struct net_device *dev, int initial_ref, int initial_obsolete,
unsigned short flags)
{
- dst->child = NULL;
dst->dev = dev;
if (dev)
dev_hold(dev);
dst->ops = ops;
dst_init_metrics(dst, dst_default_metrics.metrics, true);
dst->expires = 0UL;
- dst->path = dst;
- dst->from = NULL;
#ifdef CONFIG_XFRM
dst->xfrm = NULL;
#endif
@@ -88,7 +86,6 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
dst->__use = 0;
dst->lastuse = jiffies;
dst->flags = flags;
- dst->next = NULL;
if (!(flags & DST_NOCOUNT))
dst_entries_add(ops, 1);
}
@@ -116,12 +113,17 @@ EXPORT_SYMBOL(dst_alloc);
struct dst_entry *dst_destroy(struct dst_entry * dst)
{
- struct dst_entry *child;
+ struct dst_entry *child = NULL;
smp_rmb();
- child = dst->child;
+#ifdef CONFIG_XFRM
+ if (dst->xfrm) {
+ struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
+ child = xdst->child;
+ }
+#endif
if (!(dst->flags & DST_NOCOUNT))
dst_entries_add(dst->ops, -1);
diff --git a/net/core/filter.c b/net/core/filter.c
index 6a85e67fafce..754abe1041b7 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3013,6 +3013,8 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
if (flags & BPF_F_DONT_FRAGMENT)
info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
+ if (flags & BPF_F_ZERO_CSUM_TX)
+ info->key.tun_flags &= ~TUNNEL_CSUM;
info->key.tun_id = cpu_to_be64(from->tunnel_id);
info->key.tos = from->tunnel_tos;
@@ -3026,8 +3028,6 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
IPV6_FLOWLABEL_MASK;
} else {
info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
- if (flags & BPF_F_ZERO_CSUM_TX)
- info->key.tun_flags &= ~TUNNEL_CSUM;
}
return 0;
@@ -4437,6 +4437,42 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
offsetof(struct sock_common, skc_num));
break;
+
+ case offsetof(struct bpf_sock_ops, is_fullsock):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+ struct bpf_sock_ops_kern,
+ is_fullsock),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern,
+ is_fullsock));
+ break;
+
+/* Helper macro for adding read access to tcp_sock fields. */
+#define SOCK_OPS_GET_TCP32(FIELD_NAME) \
+ do { \
+ BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, FIELD_NAME) != 4); \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
+ struct bpf_sock_ops_kern, \
+ is_fullsock), \
+ si->dst_reg, si->src_reg, \
+ offsetof(struct bpf_sock_ops_kern, \
+ is_fullsock)); \
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2); \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
+ struct bpf_sock_ops_kern, sk),\
+ si->dst_reg, si->src_reg, \
+ offsetof(struct bpf_sock_ops_kern, sk));\
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, \
+ offsetof(struct tcp_sock, FIELD_NAME)); \
+ } while (0)
+
+ case offsetof(struct bpf_sock_ops, snd_cwnd):
+ SOCK_OPS_GET_TCP32(snd_cwnd);
+ break;
+
+ case offsetof(struct bpf_sock_ops, srtt_us):
+ SOCK_OPS_GET_TCP32(srtt_us);
+ break;
}
return insn - insn_buf;
}
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 15ce30063765..cc75488d3653 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -133,10 +133,10 @@ skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type,
ctrl->addr_type = type;
}
-static void
-__skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
- struct flow_dissector *flow_dissector,
- void *target_container)
+void
+skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container)
{
struct ip_tunnel_info *info;
struct ip_tunnel_key *key;
@@ -212,6 +212,7 @@ __skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
tp->dst = key->tp_dst;
}
}
+EXPORT_SYMBOL(skb_flow_dissect_tunnel_info);
static enum flow_dissect_ret
__skb_flow_dissect_mpls(const struct sk_buff *skb,
@@ -576,9 +577,6 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
FLOW_DISSECTOR_KEY_BASIC,
target_container);
- __skb_flow_dissect_tunnel_info(skb, flow_dissector,
- target_container);
-
if (dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
struct ethhdr *eth = eth_hdr(skb);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 87f28557b329..b2b2323bdc84 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -252,10 +252,10 @@ __gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats,
}
}
-static void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
- const struct gnet_stats_queue __percpu *cpu,
- const struct gnet_stats_queue *q,
- __u32 qlen)
+void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
+ const struct gnet_stats_queue __percpu *cpu,
+ const struct gnet_stats_queue *q,
+ __u32 qlen)
{
if (cpu) {
__gnet_stats_copy_queue_cpu(qstats, cpu);
@@ -269,6 +269,7 @@ static void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
qstats->qlen = qlen;
}
+EXPORT_SYMBOL(__gnet_stats_copy_queue);
/**
* gnet_stats_copy_queue - copy queue statistics into statistics TLV
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index f95a15086225..b9ce241cd28c 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -399,7 +399,7 @@ struct pktgen_dev {
__u8 ipsmode; /* IPSEC mode (config) */
__u8 ipsproto; /* IPSEC type (config) */
__u32 spi;
- struct dst_entry dst;
+ struct xfrm_dst xdst;
struct dst_ops dstops;
#endif
char result[512];
@@ -2609,7 +2609,7 @@ static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev)
* supports both transport/tunnel mode + ESP/AH type.
*/
if ((x->props.mode == XFRM_MODE_TUNNEL) && (pkt_dev->spi != 0))
- skb->_skb_refdst = (unsigned long)&pkt_dev->dst | SKB_DST_NOREF;
+ skb->_skb_refdst = (unsigned long)&pkt_dev->xdst.u.dst | SKB_DST_NOREF;
rcu_read_lock_bh();
err = x->outer_mode->output(x, skb);
@@ -3742,10 +3742,10 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
* performance under such circumstance.
*/
pkt_dev->dstops.family = AF_INET;
- pkt_dev->dst.dev = pkt_dev->odev;
- dst_init_metrics(&pkt_dev->dst, pktgen_dst_metrics, false);
- pkt_dev->dst.child = &pkt_dev->dst;
- pkt_dev->dst.ops = &pkt_dev->dstops;
+ pkt_dev->xdst.u.dst.dev = pkt_dev->odev;
+ dst_init_metrics(&pkt_dev->xdst.u.dst, pktgen_dst_metrics, false);
+ pkt_dev->xdst.child = &pkt_dev->xdst.u.dst;
+ pkt_dev->xdst.u.dst.ops = &pkt_dev->dstops;
#endif
return add_dev_to_thread(t, pkt_dev);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index dabba2a91fc8..c688dc564b11 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -62,7 +62,9 @@
struct rtnl_link {
rtnl_doit_func doit;
rtnl_dumpit_func dumpit;
+ struct module *owner;
unsigned int flags;
+ struct rcu_head rcu;
};
static DEFINE_MUTEX(rtnl_mutex);
@@ -127,8 +129,7 @@ bool lockdep_rtnl_is_held(void)
EXPORT_SYMBOL(lockdep_rtnl_is_held);
#endif /* #ifdef CONFIG_PROVE_LOCKING */
-static struct rtnl_link __rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
-static refcount_t rtnl_msg_handlers_ref[RTNL_FAMILY_MAX + 1];
+static struct rtnl_link *__rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
static inline int rtm_msgindex(int msgtype)
{
@@ -144,72 +145,127 @@ static inline int rtm_msgindex(int msgtype)
return msgindex;
}
-/**
- * __rtnl_register - Register a rtnetlink message type
- * @protocol: Protocol family or PF_UNSPEC
- * @msgtype: rtnetlink message type
- * @doit: Function pointer called for each request message
- * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
- * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions
- *
- * Registers the specified function pointers (at least one of them has
- * to be non-NULL) to be called whenever a request message for the
- * specified protocol family and message type is received.
- *
- * The special protocol family PF_UNSPEC may be used to define fallback
- * function pointers for the case when no entry for the specific protocol
- * family exists.
- *
- * Returns 0 on success or a negative error code.
- */
-int __rtnl_register(int protocol, int msgtype,
- rtnl_doit_func doit, rtnl_dumpit_func dumpit,
- unsigned int flags)
+static struct rtnl_link *rtnl_get_link(int protocol, int msgtype)
+{
+ struct rtnl_link **tab;
+
+ if (protocol >= ARRAY_SIZE(rtnl_msg_handlers))
+ protocol = PF_UNSPEC;
+
+ tab = rcu_dereference_rtnl(rtnl_msg_handlers[protocol]);
+ if (!tab)
+ tab = rcu_dereference_rtnl(rtnl_msg_handlers[PF_UNSPEC]);
+
+ return tab[msgtype];
+}
+
+static int rtnl_register_internal(struct module *owner,
+ int protocol, int msgtype,
+ rtnl_doit_func doit, rtnl_dumpit_func dumpit,
+ unsigned int flags)
{
- struct rtnl_link *tab;
+ struct rtnl_link *link, *old;
+ struct rtnl_link __rcu **tab;
int msgindex;
+ int ret = -ENOBUFS;
BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
msgindex = rtm_msgindex(msgtype);
- tab = rcu_dereference_raw(rtnl_msg_handlers[protocol]);
+ rtnl_lock();
+ tab = rtnl_msg_handlers[protocol];
if (tab == NULL) {
- tab = kcalloc(RTM_NR_MSGTYPES, sizeof(*tab), GFP_KERNEL);
- if (tab == NULL)
- return -ENOBUFS;
+ tab = kcalloc(RTM_NR_MSGTYPES, sizeof(void *), GFP_KERNEL);
+ if (!tab)
+ goto unlock;
+ /* ensures we see the 0 stores */
rcu_assign_pointer(rtnl_msg_handlers[protocol], tab);
}
+ old = rtnl_dereference(tab[msgindex]);
+ if (old) {
+ link = kmemdup(old, sizeof(*old), GFP_KERNEL);
+ if (!link)
+ goto unlock;
+ } else {
+ link = kzalloc(sizeof(*link), GFP_KERNEL);
+ if (!link)
+ goto unlock;
+ }
+
+ WARN_ON(link->owner && link->owner != owner);
+ link->owner = owner;
+
+ WARN_ON(doit && link->doit && link->doit != doit);
if (doit)
- tab[msgindex].doit = doit;
+ link->doit = doit;
+ WARN_ON(dumpit && link->dumpit && link->dumpit != dumpit);
if (dumpit)
- tab[msgindex].dumpit = dumpit;
- tab[msgindex].flags |= flags;
+ link->dumpit = dumpit;
- return 0;
+ link->flags |= flags;
+
+ /* publish protocol:msgtype */
+ rcu_assign_pointer(tab[msgindex], link);
+ ret = 0;
+ if (old)
+ kfree_rcu(old, rcu);
+unlock:
+ rtnl_unlock();
+ return ret;
+}
+
+/**
+ * rtnl_register_module - Register a rtnetlink message type
+ *
+ * @owner: module registering the hook (THIS_MODULE)
+ * @protocol: Protocol family or PF_UNSPEC
+ * @msgtype: rtnetlink message type
+ * @doit: Function pointer called for each request message
+ * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
+ * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions
+ *
+ * Like rtnl_register, but for use by removable modules.
+ */
+int rtnl_register_module(struct module *owner,
+ int protocol, int msgtype,
+ rtnl_doit_func doit, rtnl_dumpit_func dumpit,
+ unsigned int flags)
+{
+ return rtnl_register_internal(owner, protocol, msgtype,
+ doit, dumpit, flags);
}
-EXPORT_SYMBOL_GPL(__rtnl_register);
+EXPORT_SYMBOL_GPL(rtnl_register_module);
/**
* rtnl_register - Register a rtnetlink message type
+ * @protocol: Protocol family or PF_UNSPEC
+ * @msgtype: rtnetlink message type
+ * @doit: Function pointer called for each request message
+ * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
+ * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions
*
- * Identical to __rtnl_register() but panics on failure. This is useful
- * as failure of this function is very unlikely, it can only happen due
- * to lack of memory when allocating the chain to store all message
- * handlers for a protocol. Meant for use in init functions where lack
- * of memory implies no sense in continuing.
+ * Registers the specified function pointers (at least one of them has
+ * to be non-NULL) to be called whenever a request message for the
+ * specified protocol family and message type is received.
+ *
+ * The special protocol family PF_UNSPEC may be used to define fallback
+ * function pointers for the case when no entry for the specific protocol
+ * family exists.
*/
void rtnl_register(int protocol, int msgtype,
rtnl_doit_func doit, rtnl_dumpit_func dumpit,
unsigned int flags)
{
- if (__rtnl_register(protocol, msgtype, doit, dumpit, flags) < 0)
- panic("Unable to register rtnetlink message handler, "
- "protocol = %d, message type = %d\n",
- protocol, msgtype);
+ int err;
+
+ err = rtnl_register_internal(NULL, protocol, msgtype, doit, dumpit,
+ flags);
+ if (err)
+ pr_err("Unable to register rtnetlink message handler, "
+ "protocol = %d, message type = %d\n", protocol, msgtype);
}
-EXPORT_SYMBOL_GPL(rtnl_register);
/**
* rtnl_unregister - Unregister a rtnetlink message type
@@ -220,24 +276,25 @@ EXPORT_SYMBOL_GPL(rtnl_register);
*/
int rtnl_unregister(int protocol, int msgtype)
{
- struct rtnl_link *handlers;
+ struct rtnl_link **tab, *link;
int msgindex;
BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
msgindex = rtm_msgindex(msgtype);
rtnl_lock();
- handlers = rtnl_dereference(rtnl_msg_handlers[protocol]);
- if (!handlers) {
+ tab = rtnl_dereference(rtnl_msg_handlers[protocol]);
+ if (!tab) {
rtnl_unlock();
return -ENOENT;
}
- handlers[msgindex].doit = NULL;
- handlers[msgindex].dumpit = NULL;
- handlers[msgindex].flags = 0;
+ link = tab[msgindex];
+ rcu_assign_pointer(tab[msgindex], NULL);
rtnl_unlock();
+ kfree_rcu(link, rcu);
+
return 0;
}
EXPORT_SYMBOL_GPL(rtnl_unregister);
@@ -251,20 +308,27 @@ EXPORT_SYMBOL_GPL(rtnl_unregister);
*/
void rtnl_unregister_all(int protocol)
{
- struct rtnl_link *handlers;
+ struct rtnl_link **tab, *link;
+ int msgindex;
BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
rtnl_lock();
- handlers = rtnl_dereference(rtnl_msg_handlers[protocol]);
+ tab = rtnl_msg_handlers[protocol];
RCU_INIT_POINTER(rtnl_msg_handlers[protocol], NULL);
+ for (msgindex = 0; msgindex < RTM_NR_MSGTYPES; msgindex++) {
+ link = tab[msgindex];
+ if (!link)
+ continue;
+
+ rcu_assign_pointer(tab[msgindex], NULL);
+ kfree_rcu(link, rcu);
+ }
rtnl_unlock();
synchronize_net();
- while (refcount_read(&rtnl_msg_handlers_ref[protocol]) > 1)
- schedule();
- kfree(handlers);
+ kfree(tab);
}
EXPORT_SYMBOL_GPL(rtnl_unregister_all);
@@ -1261,6 +1325,7 @@ static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id)
{
const struct net_device_ops *ops = dev->netdev_ops;
const struct bpf_prog *generic_xdp_prog;
+ struct netdev_bpf xdp;
ASSERT_RTNL();
@@ -1273,7 +1338,10 @@ static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id)
if (!ops->ndo_bpf)
return XDP_ATTACHED_NONE;
- return __dev_xdp_attached(dev, ops->ndo_bpf, prog_id);
+ __dev_xdp_query(dev, ops->ndo_bpf, &xdp);
+ *prog_id = xdp.prog_id;
+
+ return xdp.prog_attached;
}
static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
@@ -1569,6 +1637,8 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_PROMISCUITY] = { .type = NLA_U32 },
[IFLA_NUM_TX_QUEUES] = { .type = NLA_U32 },
[IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 },
+ [IFLA_GSO_MAX_SEGS] = { .type = NLA_U32 },
+ [IFLA_GSO_MAX_SIZE] = { .type = NLA_U32 },
[IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
[IFLA_CARRIER_CHANGES] = { .type = NLA_U32 }, /* ignored */
[IFLA_PHYS_SWITCH_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
@@ -2219,6 +2289,34 @@ static int do_setlink(const struct sk_buff *skb,
}
}
+ if (tb[IFLA_GSO_MAX_SIZE]) {
+ u32 max_size = nla_get_u32(tb[IFLA_GSO_MAX_SIZE]);
+
+ if (max_size > GSO_MAX_SIZE) {
+ err = -EINVAL;
+ goto errout;
+ }
+
+ if (dev->gso_max_size ^ max_size) {
+ netif_set_gso_max_size(dev, max_size);
+ status |= DO_SETLINK_MODIFIED;
+ }
+ }
+
+ if (tb[IFLA_GSO_MAX_SEGS]) {
+ u32 max_segs = nla_get_u32(tb[IFLA_GSO_MAX_SEGS]);
+
+ if (max_segs > GSO_MAX_SEGS) {
+ err = -EINVAL;
+ goto errout;
+ }
+
+ if (dev->gso_max_segs ^ max_segs) {
+ dev->gso_max_segs = max_segs;
+ status |= DO_SETLINK_MODIFIED;
+ }
+ }
+
if (tb[IFLA_OPERSTATE])
set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
@@ -2583,6 +2681,10 @@ struct net_device *rtnl_create_link(struct net *net,
dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
if (tb[IFLA_GROUP])
dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
+ if (tb[IFLA_GSO_MAX_SIZE])
+ netif_set_gso_max_size(dev, nla_get_u32(tb[IFLA_GSO_MAX_SIZE]));
+ if (tb[IFLA_GSO_MAX_SEGS])
+ dev->gso_max_segs = nla_get_u32(tb[IFLA_GSO_MAX_SEGS]);
return dev;
}
@@ -2973,18 +3075,26 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
s_idx = 1;
for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) {
+ struct rtnl_link **tab;
int type = cb->nlh->nlmsg_type-RTM_BASE;
- struct rtnl_link *handlers;
+ struct rtnl_link *link;
rtnl_dumpit_func dumpit;
if (idx < s_idx || idx == PF_PACKET)
continue;
- handlers = rtnl_dereference(rtnl_msg_handlers[idx]);
- if (!handlers)
+ if (type < 0 || type >= RTM_NR_MSGTYPES)
continue;
- dumpit = READ_ONCE(handlers[type].dumpit);
+ tab = rcu_dereference_rtnl(rtnl_msg_handlers[idx]);
+ if (!tab)
+ continue;
+
+ link = tab[type];
+ if (!link)
+ continue;
+
+ dumpit = link->dumpit;
if (!dumpit)
continue;
@@ -4314,7 +4424,8 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
- struct rtnl_link *handlers;
+ struct rtnl_link *link;
+ struct module *owner;
int err = -EOPNOTSUPP;
rtnl_doit_func doit;
unsigned int flags;
@@ -4338,79 +4449,85 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN))
return -EPERM;
- if (family >= ARRAY_SIZE(rtnl_msg_handlers))
- family = PF_UNSPEC;
-
rcu_read_lock();
- handlers = rcu_dereference(rtnl_msg_handlers[family]);
- if (!handlers) {
- family = PF_UNSPEC;
- handlers = rcu_dereference(rtnl_msg_handlers[family]);
- }
-
if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
struct sock *rtnl;
rtnl_dumpit_func dumpit;
u16 min_dump_alloc = 0;
- dumpit = READ_ONCE(handlers[type].dumpit);
- if (!dumpit) {
+ link = rtnl_get_link(family, type);
+ if (!link || !link->dumpit) {
family = PF_UNSPEC;
- handlers = rcu_dereference(rtnl_msg_handlers[PF_UNSPEC]);
- if (!handlers)
- goto err_unlock;
-
- dumpit = READ_ONCE(handlers[type].dumpit);
- if (!dumpit)
+ link = rtnl_get_link(family, type);
+ if (!link || !link->dumpit)
goto err_unlock;
}
-
- refcount_inc(&rtnl_msg_handlers_ref[family]);
+ owner = link->owner;
+ dumpit = link->dumpit;
if (type == RTM_GETLINK - RTM_BASE)
min_dump_alloc = rtnl_calcit(skb, nlh);
+ err = 0;
+ /* need to do this before rcu_read_unlock() */
+ if (!try_module_get(owner))
+ err = -EPROTONOSUPPORT;
+
rcu_read_unlock();
rtnl = net->rtnl;
- {
+ if (err == 0) {
struct netlink_dump_control c = {
.dump = dumpit,
.min_dump_alloc = min_dump_alloc,
+ .module = owner,
};
err = netlink_dump_start(rtnl, skb, nlh, &c);
+ /* netlink_dump_start() will keep a reference on
+ * module if dump is still in progress.
+ */
+ module_put(owner);
}
- refcount_dec(&rtnl_msg_handlers_ref[family]);
return err;
}
- doit = READ_ONCE(handlers[type].doit);
- if (!doit) {
+ link = rtnl_get_link(family, type);
+ if (!link || !link->doit) {
family = PF_UNSPEC;
- handlers = rcu_dereference(rtnl_msg_handlers[family]);
+ link = rtnl_get_link(PF_UNSPEC, type);
+ if (!link || !link->doit)
+ goto out_unlock;
}
- flags = READ_ONCE(handlers[type].flags);
+ owner = link->owner;
+ if (!try_module_get(owner)) {
+ err = -EPROTONOSUPPORT;
+ goto out_unlock;
+ }
+
+ flags = link->flags;
if (flags & RTNL_FLAG_DOIT_UNLOCKED) {
- refcount_inc(&rtnl_msg_handlers_ref[family]);
- doit = READ_ONCE(handlers[type].doit);
+ doit = link->doit;
rcu_read_unlock();
if (doit)
err = doit(skb, nlh, extack);
- refcount_dec(&rtnl_msg_handlers_ref[family]);
+ module_put(owner);
return err;
}
-
rcu_read_unlock();
rtnl_lock();
- handlers = rtnl_dereference(rtnl_msg_handlers[family]);
- if (handlers) {
- doit = READ_ONCE(handlers[type].doit);
- if (doit)
- err = doit(skb, nlh, extack);
- }
+ link = rtnl_get_link(family, type);
+ if (link && link->doit)
+ err = link->doit(skb, nlh, extack);
rtnl_unlock();
+
+ module_put(owner);
+
+ return err;
+
+out_unlock:
+ rcu_read_unlock();
return err;
err_unlock:
@@ -4498,11 +4615,6 @@ static struct pernet_operations rtnetlink_net_ops = {
void __init rtnetlink_init(void)
{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(rtnl_msg_handlers_ref); i++)
- refcount_set(&rtnl_msg_handlers_ref[i], 1);
-
if (register_pernet_subsys(&rtnetlink_net_ops))
panic("rtnetlink_init: cannot initialize rtnetlink\n");
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 5eeb1d20cc38..c5bb52bc73a1 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -235,7 +235,9 @@ struct sock *reuseport_select_sock(struct sock *sk,
if (prog && skb)
sk2 = run_bpf(reuse, socks, prog, skb, hdr_len);
- else
+
+ /* no bpf or invalid bpf result: fall back to hash usage */
+ if (!sk2)
sk2 = reuse->socks[reciprocal_scale(hash, socks)];
}
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 178bb9833311..37ccbe62eb1a 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -63,9 +63,10 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
*/
local_bh_disable();
inet_twsk_schedule(tw, timeo);
- /* Linkage updates. */
- __inet_twsk_hashdance(tw, sk, &dccp_hashinfo);
- inet_twsk_put(tw);
+ /* Linkage updates.
+ * Note that access to tw after this point is illegal.
+ */
+ inet_twsk_hashdance(tw, sk, &dccp_hashinfo);
local_bh_enable();
} else {
/* Sorry, if we're out of memory, just CLOSE this
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 9153247dad28..d1885cf59319 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -1418,9 +1418,12 @@ void __init dn_dev_init(void)
dn_dev_devices_on();
- rtnl_register(PF_DECnet, RTM_NEWADDR, dn_nl_newaddr, NULL, 0);
- rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL, 0);
- rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr, 0);
+ rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_NEWADDR,
+ dn_nl_newaddr, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_DELADDR,
+ dn_nl_deladdr, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETADDR,
+ NULL, dn_nl_dump_ifaddr, 0);
proc_create("decnet_dev", S_IRUGO, init_net.proc_net, &dn_dev_seq_fops);
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index b37a1b833c77..fce94cbd4378 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -792,8 +792,10 @@ void __init dn_fib_init(void)
register_dnaddr_notifier(&dn_fib_dnaddr_notifier);
- rtnl_register(PF_DECnet, RTM_NEWROUTE, dn_fib_rtm_newroute, NULL, 0);
- rtnl_register(PF_DECnet, RTM_DELROUTE, dn_fib_rtm_delroute, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_NEWROUTE,
+ dn_fib_rtm_newroute, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_DELROUTE,
+ dn_fib_rtm_delroute, NULL, 0);
}
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 324cb9f2f551..73160d4aebbe 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -199,11 +199,11 @@ static void dn_dst_check_expire(struct timer_list *unused)
lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) {
if (atomic_read(&rt->dst.__refcnt) > 1 ||
(now - rt->dst.lastuse) < expire) {
- rtp = &rt->dst.dn_next;
+ rtp = &rt->dn_next;
continue;
}
- *rtp = rt->dst.dn_next;
- rt->dst.dn_next = NULL;
+ *rtp = rt->dn_next;
+ rt->dn_next = NULL;
dst_dev_put(&rt->dst);
dst_release(&rt->dst);
}
@@ -233,11 +233,11 @@ static int dn_dst_gc(struct dst_ops *ops)
lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) {
if (atomic_read(&rt->dst.__refcnt) > 1 ||
(now - rt->dst.lastuse) < expire) {
- rtp = &rt->dst.dn_next;
+ rtp = &rt->dn_next;
continue;
}
- *rtp = rt->dst.dn_next;
- rt->dst.dn_next = NULL;
+ *rtp = rt->dn_next;
+ rt->dn_next = NULL;
dst_dev_put(&rt->dst);
dst_release(&rt->dst);
break;
@@ -333,8 +333,8 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou
lockdep_is_held(&dn_rt_hash_table[hash].lock))) != NULL) {
if (compare_keys(&rth->fld, &rt->fld)) {
/* Put it first */
- *rthp = rth->dst.dn_next;
- rcu_assign_pointer(rth->dst.dn_next,
+ *rthp = rth->dn_next;
+ rcu_assign_pointer(rth->dn_next,
dn_rt_hash_table[hash].chain);
rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth);
@@ -345,10 +345,10 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou
*rp = rth;
return 0;
}
- rthp = &rth->dst.dn_next;
+ rthp = &rth->dn_next;
}
- rcu_assign_pointer(rt->dst.dn_next, dn_rt_hash_table[hash].chain);
+ rcu_assign_pointer(rt->dn_next, dn_rt_hash_table[hash].chain);
rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt);
dst_hold_and_use(&rt->dst, now);
@@ -369,8 +369,8 @@ static void dn_run_flush(struct timer_list *unused)
goto nothing_to_declare;
for(; rt; rt = next) {
- next = rcu_dereference_raw(rt->dst.dn_next);
- RCU_INIT_POINTER(rt->dst.dn_next, NULL);
+ next = rcu_dereference_raw(rt->dn_next);
+ RCU_INIT_POINTER(rt->dn_next, NULL);
dst_dev_put(&rt->dst);
dst_release(&rt->dst);
}
@@ -1183,6 +1183,7 @@ make_route:
if (rt == NULL)
goto e_nobufs;
+ rt->dn_next = NULL;
memset(&rt->fld, 0, sizeof(rt->fld));
rt->fld.saddr = oldflp->saddr;
rt->fld.daddr = oldflp->daddr;
@@ -1252,7 +1253,7 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowidn *
if (!(flags & MSG_TRYHARD)) {
rcu_read_lock_bh();
for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt;
- rt = rcu_dereference_bh(rt->dst.dn_next)) {
+ rt = rcu_dereference_bh(rt->dn_next)) {
if ((flp->daddr == rt->fld.daddr) &&
(flp->saddr == rt->fld.saddr) &&
(flp->flowidn_mark == rt->fld.flowidn_mark) &&
@@ -1448,6 +1449,7 @@ make_route:
if (rt == NULL)
goto e_nobufs;
+ rt->dn_next = NULL;
memset(&rt->fld, 0, sizeof(rt->fld));
rt->rt_saddr = fld.saddr;
rt->rt_daddr = fld.daddr;
@@ -1529,7 +1531,7 @@ static int dn_route_input(struct sk_buff *skb)
rcu_read_lock();
for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL;
- rt = rcu_dereference(rt->dst.dn_next)) {
+ rt = rcu_dereference(rt->dn_next)) {
if ((rt->fld.saddr == cb->src) &&
(rt->fld.daddr == cb->dst) &&
(rt->fld.flowidn_oif == 0) &&
@@ -1749,7 +1751,7 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
rcu_read_lock_bh();
for(rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0;
rt;
- rt = rcu_dereference_bh(rt->dst.dn_next), idx++) {
+ rt = rcu_dereference_bh(rt->dn_next), idx++) {
if (idx < s_idx)
continue;
skb_dst_set(skb, dst_clone(&rt->dst));
@@ -1795,7 +1797,7 @@ static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_rou
{
struct dn_rt_cache_iter_state *s = seq->private;
- rt = rcu_dereference_bh(rt->dst.dn_next);
+ rt = rcu_dereference_bh(rt->dn_next);
while (!rt) {
rcu_read_unlock_bh();
if (--s->bucket < 0)
@@ -1921,11 +1923,11 @@ void __init dn_route_init(void)
&dn_rt_cache_seq_fops);
#ifdef CONFIG_DECNET_ROUTER
- rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute,
- dn_fib_dump, 0);
+ rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETROUTE,
+ dn_cache_getroute, dn_fib_dump, 0);
#else
- rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute,
- dn_cache_dump, 0);
+ rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETROUTE,
+ dn_cache_getroute, dn_cache_dump, 0);
#endif
}
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 03c3bdf25468..bbf2c82cf7b2 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -16,6 +16,15 @@ config NET_DSA
if NET_DSA
+config NET_DSA_LEGACY
+ bool "Support for older platform device and Device Tree registration"
+ default y
+ ---help---
+ Say Y if you want to enable support for the older platform device and
+ deprecated Device Tree binding registration.
+
+ This feature is scheduled for removal in 4.17.
+
# tagging formats
config NET_DSA_TAG_BRCM
bool
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 0e13c1f95d13..9e4d3536f977 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -1,7 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
# the core
obj-$(CONFIG_NET_DSA) += dsa_core.o
-dsa_core-y += dsa.o dsa2.o legacy.o master.o port.o slave.o switch.o
+dsa_core-y += dsa.o dsa2.o master.o port.o slave.o switch.o
+dsa_core-$(CONFIG_NET_DSA_LEGACY) += legacy.o
# tagging formats
dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 1e287420ff49..21f9bed11988 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -241,7 +241,7 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
for (port = 0; port < ds->num_ports; port++) {
dp = &ds->ports[port];
- if (dsa_port_is_user(dp))
+ if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
dp->cpu_dp = dst->cpu_dp;
}
}
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 7d036696e8c4..b03665e8fb4e 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -97,8 +97,17 @@ const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol);
bool dsa_schedule_work(struct work_struct *work);
/* legacy.c */
+#if IS_ENABLED(CONFIG_NET_DSA_LEGACY)
int dsa_legacy_register(void);
void dsa_legacy_unregister(void);
+#else
+static inline int dsa_legacy_register(void)
+{
+ return -ENODEV;
+}
+
+static inline void dsa_legacy_unregister(void) { }
+#endif
int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
const unsigned char *addr, u16 vid,
diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c
index 84611d7fcfa2..aa56d3fb5da4 100644
--- a/net/dsa/legacy.c
+++ b/net/dsa/legacy.c
@@ -718,26 +718,6 @@ static int dsa_resume(struct device *d)
}
#endif
-/* legacy way, bypassing the bridge *****************************************/
-int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
- struct net_device *dev,
- const unsigned char *addr, u16 vid,
- u16 flags)
-{
- struct dsa_port *dp = dsa_slave_to_port(dev);
-
- return dsa_port_fdb_add(dp, addr, vid);
-}
-
-int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
- struct net_device *dev,
- const unsigned char *addr, u16 vid)
-{
- struct dsa_port *dp = dsa_slave_to_port(dev);
-
- return dsa_port_fdb_del(dp, addr, vid);
-}
-
static SIMPLE_DEV_PM_OPS(dsa_pm_ops, dsa_suspend, dsa_resume);
static const struct of_device_id dsa_of_match_table[] = {
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index a95a55f79137..f52307296de4 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -708,14 +708,12 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev,
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_mall_tc_entry *mall_tc_entry;
__be16 protocol = cls->common.protocol;
- struct net *net = dev_net(dev);
struct dsa_switch *ds = dp->ds;
struct net_device *to_dev;
const struct tc_action *a;
struct dsa_port *to_dp;
int err = -EOPNOTSUPP;
LIST_HEAD(actions);
- int ifindex;
if (!ds->ops->port_mirror_add)
return err;
@@ -729,8 +727,7 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev,
if (is_tcf_mirred_egress_mirror(a) && protocol == htons(ETH_P_ALL)) {
struct dsa_mall_mirror_tc_entry *mirror;
- ifindex = tcf_mirred_ifindex(a);
- to_dev = __dev_get_by_index(net, ifindex);
+ to_dev = tcf_mirred_dev(a);
if (!to_dev)
return -EINVAL;
@@ -943,6 +940,26 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
.set_rxnfc = dsa_slave_set_rxnfc,
};
+/* legacy way, bypassing the bridge *****************************************/
+int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
+ struct net_device *dev,
+ const unsigned char *addr, u16 vid,
+ u16 flags)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+
+ return dsa_port_fdb_add(dp, addr, vid);
+}
+
+int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
+ struct net_device *dev,
+ const unsigned char *addr, u16 vid)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+
+ return dsa_port_fdb_del(dp, addr, vid);
+}
+
static const struct net_device_ops dsa_slave_netdev_ops = {
.ndo_open = dsa_slave_open,
.ndo_stop = dsa_slave_close,
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 29608d087a7c..b93511726069 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -83,29 +83,52 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
static int dsa_switch_fdb_add(struct dsa_switch *ds,
struct dsa_notifier_fdb_info *info)
{
- /* Do not care yet about other switch chips of the fabric */
- if (ds->index != info->sw_index)
- return 0;
+ int port = dsa_towards_port(ds, info->sw_index, info->port);
if (!ds->ops->port_fdb_add)
return -EOPNOTSUPP;
- return ds->ops->port_fdb_add(ds, info->port, info->addr,
- info->vid);
+ return ds->ops->port_fdb_add(ds, port, info->addr, info->vid);
}
static int dsa_switch_fdb_del(struct dsa_switch *ds,
struct dsa_notifier_fdb_info *info)
{
- /* Do not care yet about other switch chips of the fabric */
- if (ds->index != info->sw_index)
- return 0;
+ int port = dsa_towards_port(ds, info->sw_index, info->port);
if (!ds->ops->port_fdb_del)
return -EOPNOTSUPP;
- return ds->ops->port_fdb_del(ds, info->port, info->addr,
- info->vid);
+ return ds->ops->port_fdb_del(ds, port, info->addr, info->vid);
+}
+
+static int
+dsa_switch_mdb_prepare_bitmap(struct dsa_switch *ds,
+ const struct switchdev_obj_port_mdb *mdb,
+ const unsigned long *bitmap)
+{
+ int port, err;
+
+ if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add)
+ return -EOPNOTSUPP;
+
+ for_each_set_bit(port, bitmap, ds->num_ports) {
+ err = ds->ops->port_mdb_prepare(ds, port, mdb);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static void dsa_switch_mdb_add_bitmap(struct dsa_switch *ds,
+ const struct switchdev_obj_port_mdb *mdb,
+ const unsigned long *bitmap)
+{
+ int port;
+
+ for_each_set_bit(port, bitmap, ds->num_ports)
+ ds->ops->port_mdb_add(ds, port, mdb);
}
static int dsa_switch_mdb_add(struct dsa_switch *ds,
@@ -114,7 +137,7 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
const struct switchdev_obj_port_mdb *mdb = info->mdb;
struct switchdev_trans *trans = info->trans;
DECLARE_BITMAP(group, ds->num_ports);
- int port, err;
+ int port;
/* Build a mask of Multicast group members */
bitmap_zero(group, ds->num_ports);
@@ -124,21 +147,10 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
if (dsa_is_dsa_port(ds, port))
set_bit(port, group);
- if (switchdev_trans_ph_prepare(trans)) {
- if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add)
- return -EOPNOTSUPP;
-
- for_each_set_bit(port, group, ds->num_ports) {
- err = ds->ops->port_mdb_prepare(ds, port, mdb, trans);
- if (err)
- return err;
- }
-
- return 0;
- }
+ if (switchdev_trans_ph_prepare(trans))
+ return dsa_switch_mdb_prepare_bitmap(ds, mdb, group);
- for_each_set_bit(port, group, ds->num_ports)
- ds->ops->port_mdb_add(ds, port, mdb, trans);
+ dsa_switch_mdb_add_bitmap(ds, mdb, group);
return 0;
}
@@ -157,13 +169,43 @@ static int dsa_switch_mdb_del(struct dsa_switch *ds,
return 0;
}
+static int
+dsa_switch_vlan_prepare_bitmap(struct dsa_switch *ds,
+ const struct switchdev_obj_port_vlan *vlan,
+ const unsigned long *bitmap)
+{
+ int port, err;
+
+ if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add)
+ return -EOPNOTSUPP;
+
+ for_each_set_bit(port, bitmap, ds->num_ports) {
+ err = ds->ops->port_vlan_prepare(ds, port, vlan);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static void
+dsa_switch_vlan_add_bitmap(struct dsa_switch *ds,
+ const struct switchdev_obj_port_vlan *vlan,
+ const unsigned long *bitmap)
+{
+ int port;
+
+ for_each_set_bit(port, bitmap, ds->num_ports)
+ ds->ops->port_vlan_add(ds, port, vlan);
+}
+
static int dsa_switch_vlan_add(struct dsa_switch *ds,
struct dsa_notifier_vlan_info *info)
{
const struct switchdev_obj_port_vlan *vlan = info->vlan;
struct switchdev_trans *trans = info->trans;
DECLARE_BITMAP(members, ds->num_ports);
- int port, err;
+ int port;
/* Build a mask of VLAN members */
bitmap_zero(members, ds->num_ports);
@@ -173,21 +215,10 @@ static int dsa_switch_vlan_add(struct dsa_switch *ds,
if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
set_bit(port, members);
- if (switchdev_trans_ph_prepare(trans)) {
- if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add)
- return -EOPNOTSUPP;
-
- for_each_set_bit(port, members, ds->num_ports) {
- err = ds->ops->port_vlan_prepare(ds, port, vlan, trans);
- if (err)
- return err;
- }
-
- return 0;
- }
+ if (switchdev_trans_ph_prepare(trans))
+ return dsa_switch_vlan_prepare_bitmap(ds, vlan, members);
- for_each_set_bit(port, members, ds->num_ports)
- ds->ops->port_vlan_add(ds, port, vlan, trans);
+ dsa_switch_vlan_add_bitmap(ds, vlan, members);
return 0;
}
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index 8475434af7d5..11535bc70743 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -13,10 +13,13 @@
*/
#include <linux/etherdevice.h>
+#include <linux/if_vlan.h>
#include "dsa_priv.h"
#define MTK_HDR_LEN 4
+#define MTK_HDR_XMIT_UNTAGGED 0
+#define MTK_HDR_XMIT_TAGGED_TPID_8100 1
#define MTK_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0)
#define MTK_HDR_XMIT_DP_BIT_MASK GENMASK(5, 0)
@@ -25,20 +28,37 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
{
struct dsa_port *dp = dsa_slave_to_port(dev);
u8 *mtk_tag;
+ bool is_vlan_skb = true;
- if (skb_cow_head(skb, MTK_HDR_LEN) < 0)
- return NULL;
-
- skb_push(skb, MTK_HDR_LEN);
+ /* Build the special tag after the MAC Source Address. If VLAN header
+ * is present, it's required that VLAN header and special tag is
+ * being combined. Only in this way we can allow the switch can parse
+ * the both special and VLAN tag at the same time and then look up VLAN
+ * table with VID.
+ */
+ if (!skb_vlan_tagged(skb)) {
+ if (skb_cow_head(skb, MTK_HDR_LEN) < 0)
+ return NULL;
- memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN);
+ skb_push(skb, MTK_HDR_LEN);
+ memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN);
+ is_vlan_skb = false;
+ }
- /* Build the tag after the MAC Source Address */
mtk_tag = skb->data + 2 * ETH_ALEN;
- mtk_tag[0] = 0;
+
+ /* Mark tag attribute on special tag insertion to notify hardware
+ * whether that's a combined special tag with 802.1Q header.
+ */
+ mtk_tag[0] = is_vlan_skb ? MTK_HDR_XMIT_TAGGED_TPID_8100 :
+ MTK_HDR_XMIT_UNTAGGED;
mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK;
- mtk_tag[2] = 0;
- mtk_tag[3] = 0;
+
+ /* Tag control information is kept for 802.1Q */
+ if (!is_vlan_skb) {
+ mtk_tag[2] = 0;
+ mtk_tag[3] = 0;
+ }
return skb;
}
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index e7d15fb0d94d..f6f58108b4c5 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -19,6 +19,7 @@
#include <linux/slab.h>
#include <linux/wait.h>
#include <linux/vmalloc.h>
+#include <linux/bootmem.h>
#include <net/addrconf.h>
#include <net/inet_connection_sock.h>
@@ -168,6 +169,60 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
}
EXPORT_SYMBOL_GPL(__inet_inherit_port);
+static struct inet_listen_hashbucket *
+inet_lhash2_bucket_sk(struct inet_hashinfo *h, struct sock *sk)
+{
+ u32 hash;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6)
+ hash = ipv6_portaddr_hash(sock_net(sk),
+ &sk->sk_v6_rcv_saddr,
+ inet_sk(sk)->inet_num);
+ else
+#endif
+ hash = ipv4_portaddr_hash(sock_net(sk),
+ inet_sk(sk)->inet_rcv_saddr,
+ inet_sk(sk)->inet_num);
+ return inet_lhash2_bucket(h, hash);
+}
+
+static void inet_hash2(struct inet_hashinfo *h, struct sock *sk)
+{
+ struct inet_listen_hashbucket *ilb2;
+
+ if (!h->lhash2)
+ return;
+
+ ilb2 = inet_lhash2_bucket_sk(h, sk);
+
+ spin_lock(&ilb2->lock);
+ if (sk->sk_reuseport && sk->sk_family == AF_INET6)
+ hlist_add_tail_rcu(&inet_csk(sk)->icsk_listen_portaddr_node,
+ &ilb2->head);
+ else
+ hlist_add_head_rcu(&inet_csk(sk)->icsk_listen_portaddr_node,
+ &ilb2->head);
+ ilb2->count++;
+ spin_unlock(&ilb2->lock);
+}
+
+static void inet_unhash2(struct inet_hashinfo *h, struct sock *sk)
+{
+ struct inet_listen_hashbucket *ilb2;
+
+ if (!h->lhash2 ||
+ WARN_ON_ONCE(hlist_unhashed(&inet_csk(sk)->icsk_listen_portaddr_node)))
+ return;
+
+ ilb2 = inet_lhash2_bucket_sk(h, sk);
+
+ spin_lock(&ilb2->lock);
+ hlist_del_init_rcu(&inet_csk(sk)->icsk_listen_portaddr_node);
+ ilb2->count--;
+ spin_unlock(&ilb2->lock);
+}
+
static inline int compute_score(struct sock *sk, struct net *net,
const unsigned short hnum, const __be32 daddr,
const int dif, const int sdif, bool exact_dif)
@@ -207,6 +262,40 @@ static inline int compute_score(struct sock *sk, struct net *net,
*/
/* called with rcu_read_lock() : No refcount taken on the socket */
+static struct sock *inet_lhash2_lookup(struct net *net,
+ struct inet_listen_hashbucket *ilb2,
+ struct sk_buff *skb, int doff,
+ const __be32 saddr, __be16 sport,
+ const __be32 daddr, const unsigned short hnum,
+ const int dif, const int sdif)
+{
+ bool exact_dif = inet_exact_dif_match(net, skb);
+ struct inet_connection_sock *icsk;
+ struct sock *sk, *result = NULL;
+ int score, hiscore = 0;
+ u32 phash = 0;
+
+ inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
+ sk = (struct sock *)icsk;
+ score = compute_score(sk, net, hnum, daddr,
+ dif, sdif, exact_dif);
+ if (score > hiscore) {
+ if (sk->sk_reuseport) {
+ phash = inet_ehashfn(net, daddr, hnum,
+ saddr, sport);
+ result = reuseport_select_sock(sk, phash,
+ skb, doff);
+ if (result)
+ return result;
+ }
+ result = sk;
+ hiscore = score;
+ }
+ }
+
+ return result;
+}
+
struct sock *__inet_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
@@ -216,32 +305,57 @@ struct sock *__inet_lookup_listener(struct net *net,
{
unsigned int hash = inet_lhashfn(net, hnum);
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
- int score, hiscore = 0, matches = 0, reuseport = 0;
bool exact_dif = inet_exact_dif_match(net, skb);
+ struct inet_listen_hashbucket *ilb2;
struct sock *sk, *result = NULL;
+ int score, hiscore = 0;
+ unsigned int hash2;
u32 phash = 0;
+ if (ilb->count <= 10 || !hashinfo->lhash2)
+ goto port_lookup;
+
+ /* Too many sk in the ilb bucket (which is hashed by port alone).
+ * Try lhash2 (which is hashed by port and addr) instead.
+ */
+
+ hash2 = ipv4_portaddr_hash(net, daddr, hnum);
+ ilb2 = inet_lhash2_bucket(hashinfo, hash2);
+ if (ilb2->count > ilb->count)
+ goto port_lookup;
+
+ result = inet_lhash2_lookup(net, ilb2, skb, doff,
+ saddr, sport, daddr, hnum,
+ dif, sdif);
+ if (result)
+ return result;
+
+ /* Lookup lhash2 with INADDR_ANY */
+
+ hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
+ ilb2 = inet_lhash2_bucket(hashinfo, hash2);
+ if (ilb2->count > ilb->count)
+ goto port_lookup;
+
+ return inet_lhash2_lookup(net, ilb2, skb, doff,
+ saddr, sport, daddr, hnum,
+ dif, sdif);
+
+port_lookup:
sk_for_each_rcu(sk, &ilb->head) {
score = compute_score(sk, net, hnum, daddr,
dif, sdif, exact_dif);
if (score > hiscore) {
- reuseport = sk->sk_reuseport;
- if (reuseport) {
+ if (sk->sk_reuseport) {
phash = inet_ehashfn(net, daddr, hnum,
saddr, sport);
result = reuseport_select_sock(sk, phash,
skb, doff);
if (result)
return result;
- matches = 1;
}
result = sk;
hiscore = score;
- } else if (score == hiscore && reuseport) {
- matches++;
- if (reciprocal_scale(phash, matches) == 0)
- result = sk;
- phash = next_pseudo_random32(phash);
}
}
return result;
@@ -483,6 +597,8 @@ int __inet_hash(struct sock *sk, struct sock *osk)
hlist_add_tail_rcu(&sk->sk_node, &ilb->head);
else
hlist_add_head_rcu(&sk->sk_node, &ilb->head);
+ inet_hash2(hashinfo, sk);
+ ilb->count++;
sock_set_flag(sk, SOCK_RCU_FREE);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
unlock:
@@ -509,28 +625,35 @@ EXPORT_SYMBOL_GPL(inet_hash);
void inet_unhash(struct sock *sk)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
+ struct inet_listen_hashbucket *ilb;
spinlock_t *lock;
bool listener = false;
- int done;
if (sk_unhashed(sk))
return;
if (sk->sk_state == TCP_LISTEN) {
- lock = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)].lock;
+ ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
+ lock = &ilb->lock;
listener = true;
} else {
lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
}
spin_lock_bh(lock);
+ if (sk_unhashed(sk))
+ goto unlock;
+
if (rcu_access_pointer(sk->sk_reuseport_cb))
reuseport_detach_sock(sk);
- if (listener)
- done = __sk_del_node_init(sk);
- else
- done = __sk_nulls_del_node_init_rcu(sk);
- if (done)
- sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+ if (listener) {
+ inet_unhash2(hashinfo, sk);
+ __sk_del_node_init(sk);
+ ilb->count--;
+ } else {
+ __sk_nulls_del_node_init_rcu(sk);
+ }
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+unlock:
spin_unlock_bh(lock);
}
EXPORT_SYMBOL_GPL(inet_unhash);
@@ -665,10 +788,37 @@ void inet_hashinfo_init(struct inet_hashinfo *h)
for (i = 0; i < INET_LHTABLE_SIZE; i++) {
spin_lock_init(&h->listening_hash[i].lock);
INIT_HLIST_HEAD(&h->listening_hash[i].head);
+ h->listening_hash[i].count = 0;
}
+
+ h->lhash2 = NULL;
}
EXPORT_SYMBOL_GPL(inet_hashinfo_init);
+void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
+ unsigned long numentries, int scale,
+ unsigned long low_limit,
+ unsigned long high_limit)
+{
+ unsigned int i;
+
+ h->lhash2 = alloc_large_system_hash(name,
+ sizeof(*h->lhash2),
+ numentries,
+ scale,
+ 0,
+ NULL,
+ &h->lhash2_mask,
+ low_limit,
+ high_limit);
+
+ for (i = 0; i <= h->lhash2_mask; i++) {
+ spin_lock_init(&h->lhash2[i].lock);
+ INIT_HLIST_HEAD(&h->lhash2[i].head);
+ h->lhash2[i].count = 0;
+ }
+}
+
int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
{
unsigned int locksz = sizeof(spinlock_t);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index b563e0c46bac..277ff69a312d 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -97,7 +97,7 @@ static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
* Essentially we whip up a timewait bucket, copy the relevant info into it
* from the SK, and mess with hash chains and list linkage.
*/
-void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
+void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
struct inet_hashinfo *hashinfo)
{
const struct inet_sock *inet = inet_sk(sk);
@@ -119,18 +119,6 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
spin_lock(lock);
- /*
- * Step 2: Hash TW into tcp ehash chain.
- * Notes :
- * - tw_refcnt is set to 4 because :
- * - We have one reference from bhash chain.
- * - We have one reference from ehash chain.
- * - We have one reference from timer.
- * - One reference for ourself (our caller will release it).
- * We can use atomic_set() because prior spin_lock()/spin_unlock()
- * committed into memory all tw fields.
- */
- refcount_set(&tw->tw_refcnt, 4);
inet_twsk_add_node_rcu(tw, &ehead->chain);
/* Step 3: Remove SK from hash chain */
@@ -138,8 +126,19 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
spin_unlock(lock);
+
+ /* tw_refcnt is set to 3 because we have :
+ * - one reference for bhash chain.
+ * - one reference for ehash chain.
+ * - one reference for timer.
+ * We can use atomic_set() because prior spin_lock()/spin_unlock()
+ * committed into memory all tw fields.
+ * Also note that after this point, we lost our implicit reference
+ * so we are not allowed to use tw anymore.
+ */
+ refcount_set(&tw->tw_refcnt, 3);
}
-EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
+EXPORT_SYMBOL_GPL(inet_twsk_hashdance);
static void tw_timer_handler(struct timer_list *t)
{
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 9c1735632c8c..9a80d84fc182 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -114,7 +114,8 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
static struct rtnl_link_ops ipgre_link_ops __read_mostly;
static int ipgre_tunnel_init(struct net_device *dev);
static void erspan_build_header(struct sk_buff *skb,
- __be32 id, u32 index, bool truncate);
+ __be32 id, u32 index,
+ bool truncate, bool is_ipv4);
static unsigned int ipgre_net_id __read_mostly;
static unsigned int gre_tap_net_id __read_mostly;
@@ -255,34 +256,41 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
{
struct net *net = dev_net(skb->dev);
struct metadata_dst *tun_dst = NULL;
+ struct erspan_base_hdr *ershdr;
+ struct erspan_metadata *pkt_md;
struct ip_tunnel_net *itn;
struct ip_tunnel *tunnel;
- struct erspanhdr *ershdr;
const struct iphdr *iph;
- __be32 index;
+ int ver;
int len;
itn = net_generic(net, erspan_net_id);
len = gre_hdr_len + sizeof(*ershdr);
+ /* Check based hdr len */
if (unlikely(!pskb_may_pull(skb, len)))
return PACKET_REJECT;
iph = ip_hdr(skb);
- ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len);
+ ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
+ ver = (ntohs(ershdr->ver_vlan) & VER_MASK) >> VER_OFFSET;
/* The original GRE header does not have key field,
* Use ERSPAN 10-bit session ID as key.
*/
tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK);
- index = ershdr->md.index;
+ pkt_md = (struct erspan_metadata *)(ershdr + 1);
tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
tpi->flags | TUNNEL_KEY,
iph->saddr, iph->daddr, tpi->key);
if (tunnel) {
+ len = gre_hdr_len + erspan_hdr_len(ver);
+ if (unlikely(!pskb_may_pull(skb, len)))
+ return -ENOMEM;
+
if (__iptunnel_pull_header(skb,
- gre_hdr_len + sizeof(*ershdr),
+ len,
htons(ETH_P_TEB),
false, false) < 0)
goto drop;
@@ -306,12 +314,27 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
if (!md)
return PACKET_REJECT;
- md->index = index;
+ memcpy(md, pkt_md, sizeof(*md));
+ md->version = ver;
+
info = &tun_dst->u.tun_info;
info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
info->options_len = sizeof(*md);
} else {
- tunnel->index = ntohl(index);
+ tunnel->erspan_ver = ver;
+ if (ver == 1) {
+ tunnel->index = ntohl(pkt_md->u.index);
+ } else {
+ u16 md2_flags;
+ u16 dir, hwid;
+
+ md2_flags = ntohs(pkt_md->u.md2.flags);
+ dir = (md2_flags & DIR_MASK) >> DIR_OFFSET;
+ hwid = (md2_flags & HWID_MASK) >> HWID_OFFSET;
+ tunnel->dir = dir;
+ tunnel->hwid = hwid;
+ }
+
}
skb_reset_mac_header(skb);
@@ -405,7 +428,8 @@ static int gre_rcv(struct sk_buff *skb)
if (hdr_len < 0)
goto drop;
- if (unlikely(tpi.proto == htons(ETH_P_ERSPAN))) {
+ if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
+ tpi.proto == htons(ETH_P_ERSPAN2))) {
if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
return 0;
}
@@ -560,6 +584,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
bool truncate = false;
struct flowi4 fl;
int tunnel_hlen;
+ int version;
__be16 df;
tun_info = skb_tunnel_info(skb);
@@ -568,9 +593,13 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
goto err_free_skb;
key = &tun_info->key;
+ md = ip_tunnel_info_opts(tun_info);
+ if (!md)
+ goto err_free_rt;
/* ERSPAN has fixed 8 byte GRE header */
- tunnel_hlen = 8 + sizeof(struct erspanhdr);
+ version = md->version;
+ tunnel_hlen = 8 + erspan_hdr_len(version);
rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
if (!rt)
@@ -584,12 +613,23 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
truncate = true;
}
- md = ip_tunnel_info_opts(tun_info);
- if (!md)
- goto err_free_rt;
+ if (version == 1) {
+ erspan_build_header(skb, tunnel_id_to_key32(key->tun_id),
+ ntohl(md->u.index), truncate, true);
+ } else if (version == 2) {
+ u16 md2_flags;
+ u8 direction;
+ u16 hwid;
- erspan_build_header(skb, tunnel_id_to_key32(key->tun_id),
- ntohl(md->index), truncate);
+ md2_flags = ntohs(md->u.md2.flags);
+ direction = (md2_flags & DIR_MASK) >> DIR_OFFSET;
+ hwid = (md2_flags & HWID_MASK) >> HWID_OFFSET;
+
+ erspan_build_header_v2(skb, tunnel_id_to_key32(key->tun_id),
+ direction, hwid, truncate, true);
+ } else {
+ goto err_free_rt;
+ }
gre_build_header(skb, 8, TUNNEL_SEQ,
htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++));
@@ -668,52 +708,6 @@ free_skb:
return NETDEV_TX_OK;
}
-static inline u8 tos_to_cos(u8 tos)
-{
- u8 dscp, cos;
-
- dscp = tos >> 2;
- cos = dscp >> 3;
- return cos;
-}
-
-static void erspan_build_header(struct sk_buff *skb,
- __be32 id, u32 index, bool truncate)
-{
- struct iphdr *iphdr = ip_hdr(skb);
- struct ethhdr *eth = eth_hdr(skb);
- enum erspan_encap_type enc_type;
- struct erspanhdr *ershdr;
- struct qtag_prefix {
- __be16 eth_type;
- __be16 tci;
- } *qp;
- u16 vlan_tci = 0;
-
- enc_type = ERSPAN_ENCAP_NOVLAN;
-
- /* If mirrored packet has vlan tag, extract tci and
- * perserve vlan header in the mirrored frame.
- */
- if (eth->h_proto == htons(ETH_P_8021Q)) {
- qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN);
- vlan_tci = ntohs(qp->tci);
- enc_type = ERSPAN_ENCAP_INFRAME;
- }
-
- skb_push(skb, sizeof(*ershdr));
- ershdr = (struct erspanhdr *)skb->data;
- memset(ershdr, 0, sizeof(*ershdr));
-
- ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) |
- (ERSPAN_VERSION << VER_OFFSET));
- ershdr->session_id = htons((u16)(ntohl(id) & ID_MASK) |
- ((tos_to_cos(iphdr->tos) << COS_OFFSET) & COS_MASK) |
- (enc_type << EN_OFFSET & EN_MASK) |
- ((truncate << T_OFFSET) & T_MASK));
- ershdr->md.index = htonl(index & INDEX_MASK);
-}
-
static netdev_tx_t erspan_xmit(struct sk_buff *skb,
struct net_device *dev)
{
@@ -737,7 +731,14 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
}
/* Push ERSPAN header */
- erspan_build_header(skb, tunnel->parms.o_key, tunnel->index, truncate);
+ if (tunnel->erspan_ver == 1)
+ erspan_build_header(skb, tunnel->parms.o_key, tunnel->index,
+ truncate, true);
+ else
+ erspan_build_header_v2(skb, tunnel->parms.o_key,
+ tunnel->dir, tunnel->hwid,
+ truncate, true);
+
tunnel->parms.o_flags &= ~TUNNEL_KEY;
__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN));
return NETDEV_TX_OK;
@@ -1209,13 +1210,32 @@ static int ipgre_netlink_parms(struct net_device *dev,
if (data[IFLA_GRE_FWMARK])
*fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
- if (data[IFLA_GRE_ERSPAN_INDEX]) {
- t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
+ if (data[IFLA_GRE_ERSPAN_VER]) {
+ t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
- if (t->index & ~INDEX_MASK)
+ if (t->erspan_ver != 1 && t->erspan_ver != 2)
return -EINVAL;
}
+ if (t->erspan_ver == 1) {
+ if (data[IFLA_GRE_ERSPAN_INDEX]) {
+ t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
+ if (t->index & ~INDEX_MASK)
+ return -EINVAL;
+ }
+ } else if (t->erspan_ver == 2) {
+ if (data[IFLA_GRE_ERSPAN_DIR]) {
+ t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
+ if (t->dir & ~(DIR_MASK >> DIR_OFFSET))
+ return -EINVAL;
+ }
+ if (data[IFLA_GRE_ERSPAN_HWID]) {
+ t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
+ if (t->hwid & ~(HWID_MASK >> HWID_OFFSET))
+ return -EINVAL;
+ }
+ }
+
return 0;
}
@@ -1282,7 +1302,7 @@ static int erspan_tunnel_init(struct net_device *dev)
tunnel->tun_hlen = 8;
tunnel->parms.iph.protocol = IPPROTO_GRE;
tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
- sizeof(struct erspanhdr);
+ erspan_hdr_len(tunnel->erspan_ver);
t_hlen = tunnel->hlen + sizeof(struct iphdr);
dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
@@ -1412,6 +1432,12 @@ static size_t ipgre_get_size(const struct net_device *dev)
nla_total_size(4) +
/* IFLA_GRE_ERSPAN_INDEX */
nla_total_size(4) +
+ /* IFLA_GRE_ERSPAN_VER */
+ nla_total_size(1) +
+ /* IFLA_GRE_ERSPAN_DIR */
+ nla_total_size(1) +
+ /* IFLA_GRE_ERSPAN_HWID */
+ nla_total_size(2) +
0;
}
@@ -1454,9 +1480,18 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
goto nla_put_failure;
}
- if (t->index)
+ if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
+ goto nla_put_failure;
+
+ if (t->erspan_ver == 1) {
if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
goto nla_put_failure;
+ } else if (t->erspan_ver == 2) {
+ if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
+ goto nla_put_failure;
+ if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
+ goto nla_put_failure;
+ }
return 0;
@@ -1492,6 +1527,9 @@ static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
[IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 },
[IFLA_GRE_FWMARK] = { .type = NLA_U32 },
[IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
+ [IFLA_GRE_ERSPAN_VER] = { .type = NLA_U8 },
+ [IFLA_GRE_ERSPAN_DIR] = { .type = NLA_U8 },
+ [IFLA_GRE_ERSPAN_HWID] = { .type = NLA_U16 },
};
static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 43b69af242e1..f0ed031f3594 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1106,7 +1106,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
new = true;
}
- __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
+ __ip_rt_update_pmtu((struct rtable *) xfrm_dst_path(&rt->dst), &fl4, mtu);
if (!dst_check(&rt->dst, 0)) {
if (new)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f08eebe60446..c470fec9062f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3578,6 +3578,9 @@ void __init tcp_init(void)
percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL);
inet_hashinfo_init(&tcp_hashinfo);
+ inet_hashinfo2_init(&tcp_hashinfo, "tcp_listen_portaddr_hash",
+ thash_entries, 21, /* one slot per 2 MB*/
+ 0, 64 * 1024);
tcp_hashinfo.bind_bucket_cachep =
kmem_cache_create("tcp_bind_bucket",
sizeof(struct inet_bind_bucket), 0,
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 78c192ee03a4..018a48477355 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -379,18 +379,9 @@ fastopen:
bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
struct tcp_fastopen_cookie *cookie)
{
- unsigned long last_syn_loss = 0;
const struct dst_entry *dst;
- int syn_loss = 0;
- tcp_fastopen_cache_get(sk, mss, cookie, &syn_loss, &last_syn_loss);
-
- /* Recurring FO SYN losses: no cookie or data in SYN */
- if (syn_loss > 1 &&
- time_before(jiffies, last_syn_loss + (60*HZ << syn_loss))) {
- cookie->len = -1;
- return false;
- }
+ tcp_fastopen_cache_get(sk, mss, cookie);
/* Firewall blackhole issue check */
if (tcp_fastopen_active_should_disable(sk)) {
@@ -448,6 +439,8 @@ EXPORT_SYMBOL(tcp_fastopen_defer_connect);
* following circumstances:
* 1. client side TFO socket receives out of order FIN
* 2. client side TFO socket receives out of order RST
+ * 3. client side TFO socket has timed out three times consecutively during
+ * or after handshake
* We disable active side TFO globally for 1hr at first. Then if it
* happens again, we disable it for 2h, then 4h, 8h, ...
* And we reset the timeout back to 1hr when we see a successful active
@@ -524,3 +517,20 @@ void tcp_fastopen_active_disable_ofo_check(struct sock *sk)
dst_release(dst);
}
}
+
+void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired)
+{
+ u32 timeouts = inet_csk(sk)->icsk_retransmits;
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ /* Broken middle-boxes may black-hole Fast Open connection during or
+ * even after the handshake. Be extremely conservative and pause
+ * Fast Open globally after hitting the third consecutive timeout or
+ * exceeding the configured timeout limit.
+ */
+ if ((tp->syn_fastopen || tp->syn_data || tp->syn_data_acked) &&
+ (timeouts == 2 || (timeouts < 2 && expired))) {
+ tcp_fastopen_active_disable(sk);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL);
+ }
+}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 45f750e85714..4d55c4b338ee 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -578,8 +578,8 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
void tcp_rcv_space_adjust(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
+ u32 copied;
int time;
- int copied;
tcp_mstamp_refresh(tp);
time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
@@ -602,38 +602,31 @@ void tcp_rcv_space_adjust(struct sock *sk)
if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
- int rcvwin, rcvmem, rcvbuf;
+ int rcvmem, rcvbuf;
+ u64 rcvwin, grow;
/* minimal window to cope with packet losses, assuming
* steady state. Add some cushion because of small variations.
*/
- rcvwin = (copied << 1) + 16 * tp->advmss;
+ rcvwin = ((u64)copied << 1) + 16 * tp->advmss;
- /* If rate increased by 25%,
- * assume slow start, rcvwin = 3 * copied
- * If rate increased by 50%,
- * assume sender can use 2x growth, rcvwin = 4 * copied
- */
- if (copied >=
- tp->rcvq_space.space + (tp->rcvq_space.space >> 2)) {
- if (copied >=
- tp->rcvq_space.space + (tp->rcvq_space.space >> 1))
- rcvwin <<= 1;
- else
- rcvwin += (rcvwin >> 1);
- }
+ /* Accommodate for sender rate increase (eg. slow start) */
+ grow = rcvwin * (copied - tp->rcvq_space.space);
+ do_div(grow, tp->rcvq_space.space);
+ rcvwin += (grow << 1);
rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
while (tcp_win_from_space(sk, rcvmem) < tp->advmss)
rcvmem += 128;
- rcvbuf = min(rcvwin / tp->advmss * rcvmem,
- sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
+ do_div(rcvwin, tp->advmss);
+ rcvbuf = min_t(u64, rcvwin * rcvmem,
+ sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
if (rcvbuf > sk->sk_rcvbuf) {
sk->sk_rcvbuf = rcvbuf;
/* Make the window clamp follow along. */
- tp->window_clamp = rcvwin;
+ tp->window_clamp = tcp_win_from_space(sk, rcvbuf);
}
}
tp->rcvq_space.space = copied;
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 7097f92d16e5..759e6bc8327b 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -546,8 +546,7 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
static DEFINE_SEQLOCK(fastopen_seqlock);
void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
- struct tcp_fastopen_cookie *cookie,
- int *syn_loss, unsigned long *last_syn_loss)
+ struct tcp_fastopen_cookie *cookie)
{
struct tcp_metrics_block *tm;
@@ -564,8 +563,6 @@ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
*cookie = tfom->cookie;
if (cookie->len <= 0 && tfom->try_exp == 1)
cookie->exp = true;
- *syn_loss = tfom->syn_loss;
- *last_syn_loss = *syn_loss ? tfom->last_syn_loss : 0;
} while (read_seqretry(&fastopen_seqlock, seq));
}
rcu_read_unlock();
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index b079b619b60c..a8384b0c11f8 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -316,9 +316,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
*/
local_bh_disable();
inet_twsk_schedule(tw, timeo);
- /* Linkage updates. */
- __inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
- inet_twsk_put(tw);
+ /* Linkage updates.
+ * Note that access to tw after this point is illegal.
+ */
+ inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
local_bh_enable();
} else {
/* Sorry, if we're out of memory, just CLOSE this
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index a4d214c7b506..04be9f833927 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2414,15 +2414,12 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
/* Schedule a loss probe in 2*RTT for SACK capable connections
- * in Open state, that are either limited by cwnd or application.
+ * not in loss recovery, that are either limited by cwnd or application.
*/
if ((early_retrans != 3 && early_retrans != 4) ||
!tp->packets_out || !tcp_is_sack(tp) ||
- icsk->icsk_ca_state != TCP_CA_Open)
- return false;
-
- if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
- !tcp_write_queue_empty(sk))
+ (icsk->icsk_ca_state != TCP_CA_Open &&
+ icsk->icsk_ca_state != TCP_CA_CWR))
return false;
/* Probe timeout is 2*rtt. Add minimum RTO to account
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 968fda198376..6db3124cdbda 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -183,11 +183,6 @@ static int tcp_write_timeout(struct sock *sk)
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
if (icsk->icsk_retransmits) {
dst_negative_advice(sk);
- if (tp->syn_fastopen || tp->syn_data)
- tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
- if (tp->syn_data && icsk->icsk_retransmits == 1)
- NET_INC_STATS(sock_net(sk),
- LINUX_MIB_TCPFASTOPENACTIVEFAIL);
} else if (!tp->syn_data && !tp->syn_fastopen) {
sk_rethink_txhash(sk);
}
@@ -195,17 +190,6 @@ static int tcp_write_timeout(struct sock *sk)
expired = icsk->icsk_retransmits >= retry_until;
} else {
if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0)) {
- /* Some middle-boxes may black-hole Fast Open _after_
- * the handshake. Therefore we conservatively disable
- * Fast Open on this path on recurring timeouts after
- * successful Fast Open.
- */
- if (tp->syn_data_acked) {
- tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
- if (icsk->icsk_retransmits == net->ipv4.sysctl_tcp_retries1)
- NET_INC_STATS(sock_net(sk),
- LINUX_MIB_TCPFASTOPENACTIVEFAIL);
- }
/* Black hole detection */
tcp_mtu_probing(icsk, sk);
@@ -228,6 +212,7 @@ static int tcp_write_timeout(struct sock *sk)
expired = retransmits_timed_out(sk, retry_until,
icsk->icsk_user_timeout);
}
+ tcp_fastopen_active_detect_blackhole(sk, expired);
if (expired) {
/* Has it gone just too far? */
tcp_write_err(sk);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e4ff25c947c5..e9c0d1e1772e 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -357,18 +357,12 @@ fail:
}
EXPORT_SYMBOL(udp_lib_get_port);
-static u32 udp4_portaddr_hash(const struct net *net, __be32 saddr,
- unsigned int port)
-{
- return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port;
-}
-
int udp_v4_get_port(struct sock *sk, unsigned short snum)
{
unsigned int hash2_nulladdr =
- udp4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum);
+ ipv4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum);
unsigned int hash2_partial =
- udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0);
+ ipv4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0);
/* precompute partial secondary hash */
udp_sk(sk)->udp_portaddr_hash = hash2_partial;
@@ -445,7 +439,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
struct sk_buff *skb)
{
struct sock *sk, *result;
- int score, badness, matches = 0, reuseport = 0;
+ int score, badness;
u32 hash = 0;
result = NULL;
@@ -454,23 +448,16 @@ static struct sock *udp4_lib_lookup2(struct net *net,
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif, exact_dif);
if (score > badness) {
- reuseport = sk->sk_reuseport;
- if (reuseport) {
+ if (sk->sk_reuseport) {
hash = udp_ehashfn(net, daddr, hnum,
saddr, sport);
result = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
if (result)
return result;
- matches = 1;
}
badness = score;
result = sk;
- } else if (score == badness && reuseport) {
- matches++;
- if (reciprocal_scale(hash, matches) == 0)
- result = sk;
- hash = next_pseudo_random32(hash);
}
}
return result;
@@ -488,11 +475,11 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
bool exact_dif = udp_lib_exact_dif_match(net, skb);
- int score, badness, matches = 0, reuseport = 0;
+ int score, badness;
u32 hash = 0;
if (hslot->count > 10) {
- hash2 = udp4_portaddr_hash(net, daddr, hnum);
+ hash2 = ipv4_portaddr_hash(net, daddr, hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];
if (hslot->count < hslot2->count)
@@ -503,7 +490,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
exact_dif, hslot2, skb);
if (!result) {
unsigned int old_slot2 = slot2;
- hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
+ hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
slot2 = hash2 & udptable->mask;
/* avoid searching the same slot again. */
if (unlikely(slot2 == old_slot2))
@@ -526,23 +513,16 @@ begin:
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif, exact_dif);
if (score > badness) {
- reuseport = sk->sk_reuseport;
- if (reuseport) {
+ if (sk->sk_reuseport) {
hash = udp_ehashfn(net, daddr, hnum,
saddr, sport);
result = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
if (result)
return result;
- matches = 1;
}
result = sk;
badness = score;
- } else if (score == badness && reuseport) {
- matches++;
- if (reciprocal_scale(hash, matches) == 0)
- result = sk;
- hash = next_pseudo_random32(hash);
}
}
return result;
@@ -1775,7 +1755,7 @@ EXPORT_SYMBOL(udp_lib_rehash);
static void udp_v4_rehash(struct sock *sk)
{
- u16 new_hash = udp4_portaddr_hash(sock_net(sk),
+ u16 new_hash = ipv4_portaddr_hash(sock_net(sk),
inet_sk(sk)->inet_rcv_saddr,
inet_sk(sk)->inet_num);
udp_lib_rehash(sk, new_hash);
@@ -1966,9 +1946,9 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
struct sk_buff *nskb;
if (use_hash2) {
- hash2_any = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum) &
+ hash2_any = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum) &
udptable->mask;
- hash2 = udp4_portaddr_hash(net, daddr, hnum) & udptable->mask;
+ hash2 = ipv4_portaddr_hash(net, daddr, hnum) & udptable->mask;
start_lookup:
hslot = &udptable->hash2[hash2];
offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
@@ -2200,7 +2180,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
int dif, int sdif)
{
unsigned short hnum = ntohs(loc_port);
- unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum);
+ unsigned int hash2 = ipv4_portaddr_hash(net, loc_addr, hnum);
unsigned int slot2 = hash2 & udp_table.mask;
struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr);
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index e6265e2c274e..7d885a44dc9d 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -62,7 +62,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));
- top_iph->ttl = ip4_dst_hoplimit(dst->child);
+ top_iph->ttl = ip4_dst_hoplimit(xfrm_dst_child(dst));
top_iph->saddr = x->props.saddr.a4;
top_iph->daddr = x->id.daddr.a4;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f49bd7897e95..ed06b1190f05 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -6595,27 +6595,45 @@ int __init addrconf_init(void)
rtnl_af_register(&inet6_ops);
- err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo,
- 0);
+ err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETLINK,
+ NULL, inet6_dump_ifinfo, 0);
if (err < 0)
goto errout;
- /* Only the first call to __rtnl_register can fail */
- __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, 0);
- __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, 0);
- __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr,
- inet6_dump_ifaddr, RTNL_FLAG_DOIT_UNLOCKED);
- __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL,
- inet6_dump_ifmcaddr, 0);
- __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL,
- inet6_dump_ifacaddr, 0);
- __rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf,
- inet6_netconf_dump_devconf, RTNL_FLAG_DOIT_UNLOCKED);
-
- ipv6_addr_label_rtnl_register();
+ err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWADDR,
+ inet6_rtm_newaddr, NULL, 0);
+ if (err < 0)
+ goto errout;
+ err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELADDR,
+ inet6_rtm_deladdr, NULL, 0);
+ if (err < 0)
+ goto errout;
+ err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDR,
+ inet6_rtm_getaddr, inet6_dump_ifaddr,
+ RTNL_FLAG_DOIT_UNLOCKED);
+ if (err < 0)
+ goto errout;
+ err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETMULTICAST,
+ NULL, inet6_dump_ifmcaddr, 0);
+ if (err < 0)
+ goto errout;
+ err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETANYCAST,
+ NULL, inet6_dump_ifacaddr, 0);
+ if (err < 0)
+ goto errout;
+ err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETNETCONF,
+ inet6_netconf_get_devconf,
+ inet6_netconf_dump_devconf,
+ RTNL_FLAG_DOIT_UNLOCKED);
+ if (err < 0)
+ goto errout;
+ err = ipv6_addr_label_rtnl_register();
+ if (err < 0)
+ goto errout;
return 0;
errout:
+ rtnl_unregister_all(PF_INET6);
rtnl_af_unregister(&inet6_ops);
unregister_netdevice_notifier(&ipv6_dev_notf);
errlo:
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 00e1f8ee08f8..1d6ced37ad71 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -547,13 +547,22 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
return err;
}
-void __init ipv6_addr_label_rtnl_register(void)
+int __init ipv6_addr_label_rtnl_register(void)
{
- __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel,
- NULL, RTNL_FLAG_DOIT_UNLOCKED);
- __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel,
- NULL, RTNL_FLAG_DOIT_UNLOCKED);
- __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get,
- ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED);
-}
+ int ret;
+ ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWADDRLABEL,
+ ip6addrlbl_newdel,
+ NULL, RTNL_FLAG_DOIT_UNLOCKED);
+ if (ret < 0)
+ return ret;
+ ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELADDRLABEL,
+ ip6addrlbl_newdel,
+ NULL, RTNL_FLAG_DOIT_UNLOCKED);
+ if (ret < 0)
+ return ret;
+ ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDRLABEL,
+ ip6addrlbl_get,
+ ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED);
+ return ret;
+}
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 6eb5e68f112a..44c39c5f0638 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -512,9 +512,7 @@ static int ila_nl_dump(struct sk_buff *skb, struct netlink_callback *cb)
struct ila_map *ila;
int ret;
- ret = rhashtable_walk_start(rhiter);
- if (ret && ret != -EAGAIN)
- goto done;
+ rhashtable_walk_start(rhiter);
for (;;) {
ila = rhashtable_walk_next(rhiter);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index b01858f5deb1..2febe26de6a1 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -125,6 +125,40 @@ static inline int compute_score(struct sock *sk, struct net *net,
}
/* called with rcu_read_lock() */
+static struct sock *inet6_lhash2_lookup(struct net *net,
+ struct inet_listen_hashbucket *ilb2,
+ struct sk_buff *skb, int doff,
+ const struct in6_addr *saddr,
+ const __be16 sport, const struct in6_addr *daddr,
+ const unsigned short hnum, const int dif, const int sdif)
+{
+ bool exact_dif = inet6_exact_dif_match(net, skb);
+ struct inet_connection_sock *icsk;
+ struct sock *sk, *result = NULL;
+ int score, hiscore = 0;
+ u32 phash = 0;
+
+ inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
+ sk = (struct sock *)icsk;
+ score = compute_score(sk, net, hnum, daddr, dif, sdif,
+ exact_dif);
+ if (score > hiscore) {
+ if (sk->sk_reuseport) {
+ phash = inet6_ehashfn(net, daddr, hnum,
+ saddr, sport);
+ result = reuseport_select_sock(sk, phash,
+ skb, doff);
+ if (result)
+ return result;
+ }
+ result = sk;
+ hiscore = score;
+ }
+ }
+
+ return result;
+}
+
struct sock *inet6_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
@@ -134,31 +168,56 @@ struct sock *inet6_lookup_listener(struct net *net,
{
unsigned int hash = inet_lhashfn(net, hnum);
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
- int score, hiscore = 0, matches = 0, reuseport = 0;
bool exact_dif = inet6_exact_dif_match(net, skb);
+ struct inet_listen_hashbucket *ilb2;
struct sock *sk, *result = NULL;
+ int score, hiscore = 0;
+ unsigned int hash2;
u32 phash = 0;
+ if (ilb->count <= 10 || !hashinfo->lhash2)
+ goto port_lookup;
+
+ /* Too many sk in the ilb bucket (which is hashed by port alone).
+ * Try lhash2 (which is hashed by port and addr) instead.
+ */
+
+ hash2 = ipv6_portaddr_hash(net, daddr, hnum);
+ ilb2 = inet_lhash2_bucket(hashinfo, hash2);
+ if (ilb2->count > ilb->count)
+ goto port_lookup;
+
+ result = inet6_lhash2_lookup(net, ilb2, skb, doff,
+ saddr, sport, daddr, hnum,
+ dif, sdif);
+ if (result)
+ return result;
+
+ /* Lookup lhash2 with in6addr_any */
+
+ hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
+ ilb2 = inet_lhash2_bucket(hashinfo, hash2);
+ if (ilb2->count > ilb->count)
+ goto port_lookup;
+
+ return inet6_lhash2_lookup(net, ilb2, skb, doff,
+ saddr, sport, daddr, hnum,
+ dif, sdif);
+
+port_lookup:
sk_for_each(sk, &ilb->head) {
score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif);
if (score > hiscore) {
- reuseport = sk->sk_reuseport;
- if (reuseport) {
+ if (sk->sk_reuseport) {
phash = inet6_ehashfn(net, daddr, hnum,
saddr, sport);
result = reuseport_select_sock(sk, phash,
skb, doff);
if (result)
return result;
- matches = 1;
}
result = sk;
hiscore = score;
- } else if (score == hiscore && reuseport) {
- matches++;
- if (reciprocal_scale(phash, matches) == 0)
- result = sk;
- phash = next_pseudo_random32(phash);
}
}
return result;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index f5285f4e1d08..a64d559fa513 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -893,7 +893,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
ins = &fn->leaf;
for (iter = leaf; iter;
- iter = rcu_dereference_protected(iter->dst.rt6_next,
+ iter = rcu_dereference_protected(iter->rt6_next,
lockdep_is_held(&rt->rt6i_table->tb6_lock))) {
/*
* Search for duplicates
@@ -950,7 +950,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
break;
next_iter:
- ins = &iter->dst.rt6_next;
+ ins = &iter->rt6_next;
}
if (fallback_ins && !found) {
@@ -979,7 +979,7 @@ next_iter:
&sibling->rt6i_siblings);
break;
}
- sibling = rcu_dereference_protected(sibling->dst.rt6_next,
+ sibling = rcu_dereference_protected(sibling->rt6_next,
lockdep_is_held(&rt->rt6i_table->tb6_lock));
}
/* For each sibling in the list, increment the counter of
@@ -1009,7 +1009,7 @@ add:
if (err)
return err;
- rcu_assign_pointer(rt->dst.rt6_next, iter);
+ rcu_assign_pointer(rt->rt6_next, iter);
atomic_inc(&rt->rt6i_ref);
rcu_assign_pointer(rt->rt6i_node, fn);
rcu_assign_pointer(*ins, rt);
@@ -1040,7 +1040,7 @@ add:
atomic_inc(&rt->rt6i_ref);
rcu_assign_pointer(rt->rt6i_node, fn);
- rt->dst.rt6_next = iter->dst.rt6_next;
+ rt->rt6_next = iter->rt6_next;
rcu_assign_pointer(*ins, rt);
call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE,
rt, extack);
@@ -1059,14 +1059,14 @@ add:
if (nsiblings) {
/* Replacing an ECMP route, remove all siblings */
- ins = &rt->dst.rt6_next;
+ ins = &rt->rt6_next;
iter = rcu_dereference_protected(*ins,
lockdep_is_held(&rt->rt6i_table->tb6_lock));
while (iter) {
if (iter->rt6i_metric > rt->rt6i_metric)
break;
if (rt6_qualify_for_ecmp(iter)) {
- *ins = iter->dst.rt6_next;
+ *ins = iter->rt6_next;
iter->rt6i_node = NULL;
fib6_purge_rt(iter, fn, info->nl_net);
if (rcu_access_pointer(fn->rr_ptr) == iter)
@@ -1075,7 +1075,7 @@ add:
nsiblings--;
info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
} else {
- ins = &iter->dst.rt6_next;
+ ins = &iter->rt6_next;
}
iter = rcu_dereference_protected(*ins,
lockdep_is_held(&rt->rt6i_table->tb6_lock));
@@ -1644,7 +1644,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE);
/* Unlink it */
- *rtp = rt->dst.rt6_next;
+ *rtp = rt->rt6_next;
rt->rt6i_node = NULL;
net->ipv6.rt6_stats->fib_rt_entries--;
net->ipv6.rt6_stats->fib_discarded_routes++;
@@ -1672,7 +1672,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
FOR_WALKERS(net, w) {
if (w->state == FWS_C && w->leaf == rt) {
RT6_TRACE("walker %p adjusted by delroute\n", w);
- w->leaf = rcu_dereference_protected(rt->dst.rt6_next,
+ w->leaf = rcu_dereference_protected(rt->rt6_next,
lockdep_is_held(&table->tb6_lock));
if (!w->leaf)
w->state = FWS_U;
@@ -1731,7 +1731,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
fib6_del_route(table, fn, rtp, info);
return 0;
}
- rtp_next = &cur->dst.rt6_next;
+ rtp_next = &cur->rt6_next;
}
return -ENOENT;
}
@@ -2142,8 +2142,8 @@ int __init fib6_init(void)
if (ret)
goto out_kmem_cache_create;
- ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib,
- 0);
+ ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, NULL,
+ inet6_dump_fib, 0);
if (ret)
goto out_unregister_subsys;
@@ -2208,7 +2208,7 @@ static int ipv6_route_yield(struct fib6_walker *w)
do {
iter->w.leaf = rcu_dereference_protected(
- iter->w.leaf->dst.rt6_next,
+ iter->w.leaf->rt6_next,
lockdep_is_held(&iter->tbl->tb6_lock));
iter->skip--;
if (!iter->skip && iter->w.leaf)
@@ -2274,7 +2274,7 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
if (!v)
goto iter_table;
- n = rcu_dereference_bh(((struct rt6_info *)v)->dst.rt6_next);
+ n = rcu_dereference_bh(((struct rt6_info *)v)->rt6_next);
if (n) {
++*pos;
return n;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 4cfd8e0696fe..5c9c65f1d5c2 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -55,6 +55,8 @@
#include <net/ip6_route.h>
#include <net/ip6_tunnel.h>
#include <net/gre.h>
+#include <net/erspan.h>
+#include <net/dst_metadata.h>
static bool log_ecn_error = true;
@@ -68,11 +70,13 @@ static unsigned int ip6gre_net_id __read_mostly;
struct ip6gre_net {
struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE];
+ struct ip6_tnl __rcu *collect_md_tun;
struct net_device *fb_tunnel_dev;
};
static struct rtnl_link_ops ip6gre_link_ops __read_mostly;
static struct rtnl_link_ops ip6gre_tap_ops __read_mostly;
+static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly;
static int ip6gre_tunnel_init(struct net_device *dev);
static void ip6gre_tunnel_setup(struct net_device *dev);
static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t);
@@ -121,7 +125,8 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
unsigned int h1 = HASH_KEY(key);
struct ip6_tnl *t, *cand = NULL;
struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
- int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
+ int dev_type = (gre_proto == htons(ETH_P_TEB) ||
+ gre_proto == htons(ETH_P_ERSPAN)) ?
ARPHRD_ETHER : ARPHRD_IP6GRE;
int score, cand_score = 4;
@@ -226,6 +231,10 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
if (cand)
return cand;
+ t = rcu_dereference(ign->collect_md_tun);
+ if (t && t->dev->flags & IFF_UP)
+ return t;
+
dev = ign->fb_tunnel_dev;
if (dev->flags & IFF_UP)
return netdev_priv(dev);
@@ -261,6 +270,9 @@ static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t)
{
struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t);
+ if (t->parms.collect_md)
+ rcu_assign_pointer(ign->collect_md_tun, t);
+
rcu_assign_pointer(t->next, rtnl_dereference(*tp));
rcu_assign_pointer(*tp, t);
}
@@ -270,6 +282,9 @@ static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t)
struct ip6_tnl __rcu **tp;
struct ip6_tnl *iter;
+ if (t->parms.collect_md)
+ rcu_assign_pointer(ign->collect_md_tun, NULL);
+
for (tp = ip6gre_bucket(ign, t);
(iter = rtnl_dereference(*tp)) != NULL;
tp = &iter->next) {
@@ -460,7 +475,108 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
&ipv6h->saddr, &ipv6h->daddr, tpi->key,
tpi->proto);
if (tunnel) {
- ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
+ if (tunnel->parms.collect_md) {
+ struct metadata_dst *tun_dst;
+ __be64 tun_id;
+ __be16 flags;
+
+ flags = tpi->flags;
+ tun_id = key32_to_tunnel_id(tpi->key);
+
+ tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id, 0);
+ if (!tun_dst)
+ return PACKET_REJECT;
+
+ ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
+ } else {
+ ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
+ }
+
+ return PACKET_RCVD;
+ }
+
+ return PACKET_REJECT;
+}
+
+static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len,
+ struct tnl_ptk_info *tpi)
+{
+ struct erspan_base_hdr *ershdr;
+ struct erspan_metadata *pkt_md;
+ const struct ipv6hdr *ipv6h;
+ struct ip6_tnl *tunnel;
+ u8 ver;
+
+ ipv6h = ipv6_hdr(skb);
+ ershdr = (struct erspan_base_hdr *)skb->data;
+
+ if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr))))
+ return PACKET_REJECT;
+
+ ver = (ntohs(ershdr->ver_vlan) & VER_MASK) >> VER_OFFSET;
+ tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK);
+ pkt_md = (struct erspan_metadata *)(ershdr + 1);
+
+ tunnel = ip6gre_tunnel_lookup(skb->dev,
+ &ipv6h->saddr, &ipv6h->daddr, tpi->key,
+ tpi->proto);
+ if (tunnel) {
+ int len = erspan_hdr_len(ver);
+
+ if (unlikely(!pskb_may_pull(skb, len)))
+ return -ENOMEM;
+
+ if (__iptunnel_pull_header(skb, len,
+ htons(ETH_P_TEB),
+ false, false) < 0)
+ return PACKET_REJECT;
+
+ if (tunnel->parms.collect_md) {
+ struct metadata_dst *tun_dst;
+ struct ip_tunnel_info *info;
+ struct erspan_metadata *md;
+ __be64 tun_id;
+ __be16 flags;
+
+ tpi->flags |= TUNNEL_KEY;
+ flags = tpi->flags;
+ tun_id = key32_to_tunnel_id(tpi->key);
+
+ tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id,
+ sizeof(*md));
+ if (!tun_dst)
+ return PACKET_REJECT;
+
+ info = &tun_dst->u.tun_info;
+ md = ip_tunnel_info_opts(info);
+ if (!md)
+ return PACKET_REJECT;
+
+ memcpy(md, pkt_md, sizeof(*md));
+ md->version = ver;
+ info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
+ info->options_len = sizeof(*md);
+
+ ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
+
+ } else {
+ tunnel->parms.erspan_ver = ver;
+
+ if (ver == 1) {
+ tunnel->parms.index = ntohl(pkt_md->u.index);
+ } else {
+ u16 md2_flags;
+ u16 dir, hwid;
+
+ md2_flags = ntohs(pkt_md->u.md2.flags);
+ dir = (md2_flags & DIR_MASK) >> DIR_OFFSET;
+ hwid = (md2_flags & HWID_MASK) >> HWID_OFFSET;
+ tunnel->parms.dir = dir;
+ tunnel->parms.hwid = hwid;
+ }
+
+ ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
+ }
return PACKET_RCVD;
}
@@ -481,6 +597,13 @@ static int gre_rcv(struct sk_buff *skb)
if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false))
goto drop;
+ if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
+ tpi.proto == htons(ETH_P_ERSPAN2))) {
+ if (ip6erspan_rcv(skb, hdr_len, &tpi) == PACKET_RCVD)
+ return 0;
+ goto drop;
+ }
+
if (ip6gre_rcv(skb, &tpi) == PACKET_RCVD)
return 0;
@@ -496,6 +619,78 @@ static int gre_handle_offloads(struct sk_buff *skb, bool csum)
csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
}
+static void prepare_ip6gre_xmit_ipv4(struct sk_buff *skb,
+ struct net_device *dev,
+ struct flowi6 *fl6, __u8 *dsfield,
+ int *encap_limit)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ struct ip6_tnl *t = netdev_priv(dev);
+
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ *encap_limit = t->parms.encap_limit;
+
+ memcpy(fl6, &t->fl.u.ip6, sizeof(*fl6));
+
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+ *dsfield = ipv4_get_dsfield(iph);
+ else
+ *dsfield = ip6_tclass(t->parms.flowinfo);
+
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+ fl6->flowi6_mark = skb->mark;
+ else
+ fl6->flowi6_mark = t->parms.fwmark;
+
+ fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+}
+
+static int prepare_ip6gre_xmit_ipv6(struct sk_buff *skb,
+ struct net_device *dev,
+ struct flowi6 *fl6, __u8 *dsfield,
+ int *encap_limit)
+{
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ struct ip6_tnl *t = netdev_priv(dev);
+ __u16 offset;
+
+ offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
+ /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
+
+ if (offset > 0) {
+ struct ipv6_tlv_tnl_enc_lim *tel;
+
+ tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
+ if (tel->encap_limit == 0) {
+ icmpv6_send(skb, ICMPV6_PARAMPROB,
+ ICMPV6_HDR_FIELD, offset + 2);
+ return -1;
+ }
+ *encap_limit = tel->encap_limit - 1;
+ } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) {
+ *encap_limit = t->parms.encap_limit;
+ }
+
+ memcpy(fl6, &t->fl.u.ip6, sizeof(*fl6));
+
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+ *dsfield = ipv6_get_dsfield(ipv6h);
+ else
+ *dsfield = ip6_tclass(t->parms.flowinfo);
+
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
+ fl6->flowlabel |= ip6_flowlabel(ipv6h);
+
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+ fl6->flowi6_mark = skb->mark;
+ else
+ fl6->flowi6_mark = t->parms.fwmark;
+
+ fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+
+ return 0;
+}
+
static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
struct net_device *dev, __u8 dsfield,
struct flowi6 *fl6, int encap_limit,
@@ -517,8 +712,38 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
/* Push GRE header. */
protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
- gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
- protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno));
+
+ if (tunnel->parms.collect_md) {
+ struct ip_tunnel_info *tun_info;
+ const struct ip_tunnel_key *key;
+ __be16 flags;
+
+ tun_info = skb_tunnel_info(skb);
+ if (unlikely(!tun_info ||
+ !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+ ip_tunnel_info_af(tun_info) != AF_INET6))
+ return -EINVAL;
+
+ key = &tun_info->key;
+ memset(fl6, 0, sizeof(*fl6));
+ fl6->flowi6_proto = IPPROTO_GRE;
+ fl6->daddr = key->u.ipv6.dst;
+ fl6->flowlabel = key->label;
+ fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+
+ dsfield = key->tos;
+ flags = key->tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
+ tunnel->tun_hlen = gre_calc_hlen(flags);
+
+ gre_build_header(skb, tunnel->tun_hlen,
+ flags, protocol,
+ tunnel_id_to_key32(tun_info->key.tun_id), 0);
+
+ } else {
+ gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
+ protocol, tunnel->parms.o_key,
+ htonl(tunnel->o_seqno));
+ }
return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
NEXTHDR_GRE);
@@ -527,30 +752,17 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- const struct iphdr *iph = ip_hdr(skb);
int encap_limit = -1;
struct flowi6 fl6;
- __u8 dsfield;
+ __u8 dsfield = 0;
__u32 mtu;
int err;
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- encap_limit = t->parms.encap_limit;
-
- memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
-
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
- dsfield = ipv4_get_dsfield(iph);
- else
- dsfield = ip6_tclass(t->parms.flowinfo);
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
- fl6.flowi6_mark = skb->mark;
- else
- fl6.flowi6_mark = t->parms.fwmark;
-
- fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+ if (!t->parms.collect_md)
+ prepare_ip6gre_xmit_ipv4(skb, dev, &fl6,
+ &dsfield, &encap_limit);
err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
if (err)
@@ -574,46 +786,17 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
struct ip6_tnl *t = netdev_priv(dev);
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
int encap_limit = -1;
- __u16 offset;
struct flowi6 fl6;
- __u8 dsfield;
+ __u8 dsfield = 0;
__u32 mtu;
int err;
if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr))
return -1;
- offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
- /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
- ipv6h = ipv6_hdr(skb);
-
- if (offset > 0) {
- struct ipv6_tlv_tnl_enc_lim *tel;
- tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
- if (tel->encap_limit == 0) {
- icmpv6_send(skb, ICMPV6_PARAMPROB,
- ICMPV6_HDR_FIELD, offset + 2);
- return -1;
- }
- encap_limit = tel->encap_limit - 1;
- } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- encap_limit = t->parms.encap_limit;
-
- memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
-
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
- dsfield = ipv6_get_dsfield(ipv6h);
- else
- dsfield = ip6_tclass(t->parms.flowinfo);
-
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
- fl6.flowlabel |= ip6_flowlabel(ipv6h);
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
- fl6.flowi6_mark = skb->mark;
- else
- fl6.flowi6_mark = t->parms.fwmark;
-
- fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+ if (!t->parms.collect_md &&
+ prepare_ip6gre_xmit_ipv6(skb, dev, &fl6, &dsfield, &encap_limit))
+ return -1;
if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)))
return -1;
@@ -660,7 +843,8 @@ static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev)
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
encap_limit = t->parms.encap_limit;
- memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+ if (!t->parms.collect_md)
+ memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
if (err)
@@ -705,6 +889,141 @@ tx_err:
return NETDEV_TX_OK;
}
+static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct dst_entry *dst = skb_dst(skb);
+ struct net_device_stats *stats;
+ bool truncate = false;
+ int encap_limit = -1;
+ __u8 dsfield = false;
+ struct flowi6 fl6;
+ int err = -EINVAL;
+ __u32 mtu;
+
+ if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr))
+ goto tx_err;
+
+ if (gre_handle_offloads(skb, false))
+ goto tx_err;
+
+ if (skb->len > dev->mtu + dev->hard_header_len) {
+ pskb_trim(skb, dev->mtu + dev->hard_header_len);
+ truncate = true;
+ }
+
+ t->parms.o_flags &= ~TUNNEL_KEY;
+ IPCB(skb)->flags = 0;
+
+ /* For collect_md mode, derive fl6 from the tunnel key,
+ * for native mode, call prepare_ip6gre_xmit_{ipv4,ipv6}.
+ */
+ if (t->parms.collect_md) {
+ struct ip_tunnel_info *tun_info;
+ const struct ip_tunnel_key *key;
+ struct erspan_metadata *md;
+
+ tun_info = skb_tunnel_info(skb);
+ if (unlikely(!tun_info ||
+ !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+ ip_tunnel_info_af(tun_info) != AF_INET6))
+ return -EINVAL;
+
+ key = &tun_info->key;
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_GRE;
+ fl6.daddr = key->u.ipv6.dst;
+ fl6.flowlabel = key->label;
+ fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+
+ dsfield = key->tos;
+ md = ip_tunnel_info_opts(tun_info);
+ if (!md)
+ goto tx_err;
+
+ if (md->version == 1) {
+ erspan_build_header(skb,
+ tunnel_id_to_key32(key->tun_id),
+ ntohl(md->u.index), truncate,
+ false);
+ } else if (md->version == 2) {
+ u16 md2_flags;
+ u16 dir, hwid;
+
+ md2_flags = ntohs(md->u.md2.flags);
+ dir = (md2_flags & DIR_MASK) >> DIR_OFFSET;
+ hwid = (md2_flags & HWID_MASK) >> HWID_OFFSET;
+
+ erspan_build_header_v2(skb,
+ tunnel_id_to_key32(key->tun_id),
+ dir, hwid, truncate,
+ false);
+ }
+ } else {
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+ prepare_ip6gre_xmit_ipv4(skb, dev, &fl6,
+ &dsfield, &encap_limit);
+ break;
+ case htons(ETH_P_IPV6):
+ if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr))
+ goto tx_err;
+ if (prepare_ip6gre_xmit_ipv6(skb, dev, &fl6,
+ &dsfield, &encap_limit))
+ goto tx_err;
+ break;
+ default:
+ memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+ break;
+ }
+
+ if (t->parms.erspan_ver == 1)
+ erspan_build_header(skb, t->parms.o_key,
+ t->parms.index,
+ truncate, false);
+ else
+ erspan_build_header_v2(skb, t->parms.o_key,
+ t->parms.dir,
+ t->parms.hwid,
+ truncate, false);
+ fl6.daddr = t->parms.raddr;
+ }
+
+ /* Push GRE header. */
+ gre_build_header(skb, 8, TUNNEL_SEQ,
+ htons(ETH_P_ERSPAN), 0, htonl(t->o_seqno++));
+
+ /* TooBig packet may have updated dst->dev's mtu */
+ if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
+ dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu);
+
+ err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
+ NEXTHDR_GRE);
+ if (err != 0) {
+ /* XXX: send ICMP error even if DF is not set. */
+ if (err == -EMSGSIZE) {
+ if (skb->protocol == htons(ETH_P_IP))
+ icmp_send(skb, ICMP_DEST_UNREACH,
+ ICMP_FRAG_NEEDED, htonl(mtu));
+ else
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ }
+
+ goto tx_err;
+ }
+ return NETDEV_TX_OK;
+
+tx_err:
+ stats = &t->dev->stats;
+ stats->tx_errors++;
+ stats->tx_dropped++;
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
+}
+
static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
{
struct net_device *dev = t->dev;
@@ -1048,6 +1367,11 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
dev->mtu -= 8;
+ if (tunnel->parms.collect_md) {
+ dev->features |= NETIF_F_NETNS_LOCAL;
+ netif_keep_dst(dev);
+ }
+
return 0;
}
@@ -1062,6 +1386,9 @@ static int ip6gre_tunnel_init(struct net_device *dev)
tunnel = netdev_priv(dev);
+ if (tunnel->parms.collect_md)
+ return 0;
+
memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr));
memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr));
@@ -1084,7 +1411,6 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev)
dev_hold(dev);
}
-
static struct inet6_protocol ip6gre_protocol __read_mostly = {
.handler = gre_rcv,
.err_handler = ip6gre_err,
@@ -1099,7 +1425,8 @@ static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head)
for_each_netdev_safe(net, dev, aux)
if (dev->rtnl_link_ops == &ip6gre_link_ops ||
- dev->rtnl_link_ops == &ip6gre_tap_ops)
+ dev->rtnl_link_ops == &ip6gre_tap_ops ||
+ dev->rtnl_link_ops == &ip6erspan_tap_ops)
unregister_netdevice_queue(dev, head);
for (prio = 0; prio < 4; prio++) {
@@ -1221,6 +1548,70 @@ out:
return ip6gre_tunnel_validate(tb, data, extack);
}
+static int ip6erspan_tap_validate(struct nlattr *tb[], struct nlattr *data[],
+ struct netlink_ext_ack *extack)
+{
+ __be16 flags = 0;
+ int ret, ver = 0;
+
+ if (!data)
+ return 0;
+
+ ret = ip6gre_tap_validate(tb, data, extack);
+ if (ret)
+ return ret;
+
+ /* ERSPAN should only have GRE sequence and key flag */
+ if (data[IFLA_GRE_OFLAGS])
+ flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
+ if (data[IFLA_GRE_IFLAGS])
+ flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
+ if (!data[IFLA_GRE_COLLECT_METADATA] &&
+ flags != (GRE_SEQ | GRE_KEY))
+ return -EINVAL;
+
+ /* ERSPAN Session ID only has 10-bit. Since we reuse
+ * 32-bit key field as ID, check it's range.
+ */
+ if (data[IFLA_GRE_IKEY] &&
+ (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
+ return -EINVAL;
+
+ if (data[IFLA_GRE_OKEY] &&
+ (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
+ return -EINVAL;
+
+ if (data[IFLA_GRE_ERSPAN_VER]) {
+ ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
+ if (ver != 1 && ver != 2)
+ return -EINVAL;
+ }
+
+ if (ver == 1) {
+ if (data[IFLA_GRE_ERSPAN_INDEX]) {
+ u32 index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
+
+ if (index & ~INDEX_MASK)
+ return -EINVAL;
+ }
+ } else if (ver == 2) {
+ if (data[IFLA_GRE_ERSPAN_DIR]) {
+ u16 dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
+
+ if (dir & ~(DIR_MASK >> DIR_OFFSET))
+ return -EINVAL;
+ }
+
+ if (data[IFLA_GRE_ERSPAN_HWID]) {
+ u16 hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
+
+ if (hwid & ~(HWID_MASK >> HWID_OFFSET))
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
static void ip6gre_netlink_parms(struct nlattr *data[],
struct __ip6_tnl_parm *parms)
@@ -1267,6 +1658,22 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
if (data[IFLA_GRE_FWMARK])
parms->fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
+
+ if (data[IFLA_GRE_COLLECT_METADATA])
+ parms->collect_md = true;
+
+ if (data[IFLA_GRE_ERSPAN_VER])
+ parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
+
+ if (parms->erspan_ver == 1) {
+ if (data[IFLA_GRE_ERSPAN_INDEX])
+ parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
+ } else if (parms->erspan_ver == 2) {
+ if (data[IFLA_GRE_ERSPAN_DIR])
+ parms->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
+ if (data[IFLA_GRE_ERSPAN_HWID])
+ parms->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
+ }
}
static int ip6gre_tap_init(struct net_device *dev)
@@ -1303,6 +1710,59 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = {
NETIF_F_HIGHDMA | \
NETIF_F_HW_CSUM)
+static int ip6erspan_tap_init(struct net_device *dev)
+{
+ struct ip6_tnl *tunnel;
+ int t_hlen;
+ int ret;
+
+ tunnel = netdev_priv(dev);
+
+ tunnel->dev = dev;
+ tunnel->net = dev_net(dev);
+ strcpy(tunnel->parms.name, dev->name);
+
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+ if (!dev->tstats)
+ return -ENOMEM;
+
+ ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
+ if (ret) {
+ free_percpu(dev->tstats);
+ dev->tstats = NULL;
+ return ret;
+ }
+
+ tunnel->tun_hlen = 8;
+ tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
+ erspan_hdr_len(tunnel->parms.erspan_ver);
+ t_hlen = tunnel->hlen + sizeof(struct ipv6hdr);
+
+ dev->hard_header_len = LL_MAX_HEADER + t_hlen;
+ dev->mtu = ETH_DATA_LEN - t_hlen;
+ if (dev->type == ARPHRD_ETHER)
+ dev->mtu -= ETH_HLEN;
+ if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ dev->mtu -= 8;
+
+ dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ tunnel = netdev_priv(dev);
+ ip6gre_tnl_link_config(tunnel, 1);
+
+ return 0;
+}
+
+static const struct net_device_ops ip6erspan_netdev_ops = {
+ .ndo_init = ip6erspan_tap_init,
+ .ndo_uninit = ip6gre_tunnel_uninit,
+ .ndo_start_xmit = ip6erspan_tunnel_xmit,
+ .ndo_set_mac_address = eth_mac_addr,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_change_mtu = ip6_tnl_change_mtu,
+ .ndo_get_stats64 = ip_tunnel_get_stats64,
+ .ndo_get_iflink = ip6_tnl_get_iflink,
+};
+
static void ip6gre_tap_setup(struct net_device *dev)
{
@@ -1372,8 +1832,13 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
ip6gre_netlink_parms(data, &nt->parms);
- if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
- return -EEXIST;
+ if (nt->parms.collect_md) {
+ if (rtnl_dereference(ign->collect_md_tun))
+ return -EEXIST;
+ } else {
+ if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
+ return -EEXIST;
+ }
if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
eth_hw_addr_random(dev);
@@ -1492,8 +1957,12 @@ static size_t ip6gre_get_size(const struct net_device *dev)
nla_total_size(2) +
/* IFLA_GRE_ENCAP_DPORT */
nla_total_size(2) +
+ /* IFLA_GRE_COLLECT_METADATA */
+ nla_total_size(0) +
/* IFLA_GRE_FWMARK */
nla_total_size(4) +
+ /* IFLA_GRE_ERSPAN_INDEX */
+ nla_total_size(4) +
0;
}
@@ -1515,7 +1984,8 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) ||
nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) ||
nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags) ||
- nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark))
+ nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark) ||
+ nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index))
goto nla_put_failure;
if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
@@ -1528,6 +1998,24 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
t->encap.flags))
goto nla_put_failure;
+ if (p->collect_md) {
+ if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
+ goto nla_put_failure;
+ }
+
+ if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, p->erspan_ver))
+ goto nla_put_failure;
+
+ if (p->erspan_ver == 1) {
+ if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index))
+ goto nla_put_failure;
+ } else if (p->erspan_ver == 2) {
+ if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, p->dir))
+ goto nla_put_failure;
+ if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, p->hwid))
+ goto nla_put_failure;
+ }
+
return 0;
nla_put_failure:
@@ -1550,9 +2038,28 @@ static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = {
[IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
[IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
[IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
+ [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
[IFLA_GRE_FWMARK] = { .type = NLA_U32 },
+ [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
+ [IFLA_GRE_ERSPAN_VER] = { .type = NLA_U8 },
+ [IFLA_GRE_ERSPAN_DIR] = { .type = NLA_U8 },
+ [IFLA_GRE_ERSPAN_HWID] = { .type = NLA_U16 },
};
+static void ip6erspan_tap_setup(struct net_device *dev)
+{
+ ether_setup(dev);
+
+ dev->netdev_ops = &ip6erspan_netdev_ops;
+ dev->needs_free_netdev = true;
+ dev->priv_destructor = ip6gre_dev_free;
+
+ dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+ dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ netif_keep_dst(dev);
+}
+
static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
.kind = "ip6gre",
.maxtype = IFLA_GRE_MAX,
@@ -1582,6 +2089,20 @@ static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = {
.get_link_net = ip6_tnl_get_link_net,
};
+static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly = {
+ .kind = "ip6erspan",
+ .maxtype = IFLA_GRE_MAX,
+ .policy = ip6gre_policy,
+ .priv_size = sizeof(struct ip6_tnl),
+ .setup = ip6erspan_tap_setup,
+ .validate = ip6erspan_tap_validate,
+ .newlink = ip6gre_newlink,
+ .changelink = ip6gre_changelink,
+ .get_size = ip6gre_get_size,
+ .fill_info = ip6gre_fill_info,
+ .get_link_net = ip6_tnl_get_link_net,
+};
+
/*
* And now the modules code and kernel interface.
*/
@@ -1610,9 +2131,15 @@ static int __init ip6gre_init(void)
if (err < 0)
goto tap_ops_failed;
+ err = rtnl_link_register(&ip6erspan_tap_ops);
+ if (err < 0)
+ goto erspan_link_failed;
+
out:
return err;
+erspan_link_failed:
+ rtnl_link_unregister(&ip6gre_tap_ops);
tap_ops_failed:
rtnl_link_unregister(&ip6gre_link_ops);
rtnl_link_failed:
@@ -1626,6 +2153,7 @@ static void __exit ip6gre_fini(void)
{
rtnl_link_unregister(&ip6gre_tap_ops);
rtnl_link_unregister(&ip6gre_link_ops);
+ rtnl_link_unregister(&ip6erspan_tap_ops);
inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE);
unregister_pernet_device(&ip6gre_net_ops);
}
@@ -1637,4 +2165,5 @@ MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
MODULE_DESCRIPTION("GRE over IPv6 tunneling device");
MODULE_ALIAS_RTNL_LINK("ip6gre");
MODULE_ALIAS_RTNL_LINK("ip6gretap");
+MODULE_ALIAS_RTNL_LINK("ip6erspan");
MODULE_ALIAS_NETDEV("ip6gre0");
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5110a418cc4d..176d74fb3b4d 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1201,13 +1201,13 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
rt->dst.dev->mtu : dst_mtu(&rt->dst);
else
mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
- rt->dst.dev->mtu : dst_mtu(rt->dst.path);
+ rt->dst.dev->mtu : dst_mtu(xfrm_dst_path(&rt->dst));
if (np->frag_size < mtu) {
if (np->frag_size)
mtu = np->frag_size;
}
cork->base.fragsize = mtu;
- if (dst_allfrag(rt->dst.path))
+ if (dst_allfrag(xfrm_dst_path(&rt->dst)))
cork->base.flags |= IPCORK_ALLFRAG;
cork->base.length = 0;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index db84f523656d..6ff2f21ae3fc 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -861,7 +861,7 @@ int ip6_tnl_rcv(struct ip6_tnl *t, struct sk_buff *skb,
struct metadata_dst *tun_dst,
bool log_ecn_err)
{
- return __ip6_tnl_rcv(t, skb, tpi, NULL, ip6ip6_dscp_ecn_decapsulate,
+ return __ip6_tnl_rcv(t, skb, tpi, tun_dst, ip6ip6_dscp_ecn_decapsulate,
log_ecn_err);
}
EXPORT_SYMBOL(ip6_tnl_rcv);
@@ -979,6 +979,9 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
int ret = 0;
struct net *net = t->net;
+ if (t->parms.collect_md)
+ return 1;
+
if ((p->flags & IP6_TNL_F_CAP_XMIT) ||
((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
(ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_XMIT))) {
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index a2e1a864eb46..890f9bda06a4 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1425,10 +1425,13 @@ int __init ip6_mr_init(void)
goto add_proto_fail;
}
#endif
- rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
- ip6mr_rtm_dumproute, 0);
- return 0;
+ err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
+ NULL, ip6mr_rtm_dumproute, 0);
+ if (err == 0)
+ return 0;
+
#ifdef CONFIG_IPV6_PIMSM_V2
+ inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
add_proto_fail:
unregister_netdevice_notifier(&ip6_mr_notifier);
#endif
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7a8d1500d374..b3f4d19b3ca5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -186,7 +186,7 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
{
- return dst_metrics_write_ptr(rt->dst.from);
+ return dst_metrics_write_ptr(&rt->from->dst);
}
static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
@@ -391,7 +391,7 @@ static void ip6_dst_destroy(struct dst_entry *dst)
{
struct rt6_info *rt = (struct rt6_info *)dst;
struct rt6_exception_bucket *bucket;
- struct dst_entry *from = dst->from;
+ struct rt6_info *from = rt->from;
struct inet6_dev *idev;
dst_destroy_metrics_generic(dst);
@@ -409,8 +409,8 @@ static void ip6_dst_destroy(struct dst_entry *dst)
kfree(bucket);
}
- dst->from = NULL;
- dst_release(from);
+ rt->from = NULL;
+ dst_release(&from->dst);
}
static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -443,9 +443,9 @@ static bool rt6_check_expired(const struct rt6_info *rt)
if (rt->rt6i_flags & RTF_EXPIRES) {
if (time_after(jiffies, rt->dst.expires))
return true;
- } else if (rt->dst.from) {
+ } else if (rt->from) {
return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
- rt6_check_expired((struct rt6_info *)rt->dst.from);
+ rt6_check_expired(rt->from);
}
return false;
}
@@ -502,7 +502,7 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
if (!oif && ipv6_addr_any(saddr))
goto out;
- for (sprt = rt; sprt; sprt = rcu_dereference(sprt->dst.rt6_next)) {
+ for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {
struct net_device *dev = sprt->dst.dev;
if (oif) {
@@ -721,7 +721,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
match = NULL;
cont = NULL;
- for (rt = rr_head; rt; rt = rcu_dereference(rt->dst.rt6_next)) {
+ for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) {
if (rt->rt6i_metric != metric) {
cont = rt;
break;
@@ -731,7 +731,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
}
for (rt = leaf; rt && rt != rr_head;
- rt = rcu_dereference(rt->dst.rt6_next)) {
+ rt = rcu_dereference(rt->rt6_next)) {
if (rt->rt6i_metric != metric) {
cont = rt;
break;
@@ -743,7 +743,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
if (match || !cont)
return match;
- for (rt = cont; rt; rt = rcu_dereference(rt->dst.rt6_next))
+ for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next))
match = find_match(rt, oif, strict, &mpri, match, do_rr);
return match;
@@ -781,7 +781,7 @@ static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
&do_rr);
if (do_rr) {
- struct rt6_info *next = rcu_dereference(rt0->dst.rt6_next);
+ struct rt6_info *next = rcu_dereference(rt0->rt6_next);
/* no entries matched; do round-robin */
if (!next || next->rt6i_metric != rt0->rt6i_metric)
@@ -1054,7 +1054,7 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
*/
if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
- ort = (struct rt6_info *)ort->dst.from;
+ ort = ort->from;
rcu_read_lock();
dev = ip6_rt_get_dev_rcu(ort);
@@ -1274,7 +1274,7 @@ static int rt6_insert_exception(struct rt6_info *nrt,
/* ort can't be a cache or pcpu route */
if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
- ort = (struct rt6_info *)ort->dst.from;
+ ort = ort->from;
WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
spin_lock_bh(&rt6_exception_lock);
@@ -1415,8 +1415,8 @@ static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
/* Remove the passed in cached rt from the hash table that contains it */
int rt6_remove_exception_rt(struct rt6_info *rt)
{
- struct rt6_info *from = (struct rt6_info *)rt->dst.from;
struct rt6_exception_bucket *bucket;
+ struct rt6_info *from = rt->from;
struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
int err;
@@ -1460,8 +1460,8 @@ int rt6_remove_exception_rt(struct rt6_info *rt)
*/
static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
{
- struct rt6_info *from = (struct rt6_info *)rt->dst.from;
struct rt6_exception_bucket *bucket;
+ struct rt6_info *from = rt->from;
struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
@@ -1929,9 +1929,9 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
static void rt6_dst_from_metrics_check(struct rt6_info *rt)
{
- if (rt->dst.from &&
- dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
- dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
+ if (rt->from &&
+ dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(&rt->from->dst))
+ dst_init_metrics(&rt->dst, dst_metrics_ptr(&rt->from->dst), true);
}
static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
@@ -1951,7 +1951,7 @@ static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
{
if (!__rt6_check_expired(rt) &&
rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
- rt6_check((struct rt6_info *)(rt->dst.from), cookie))
+ rt6_check(rt->from, cookie))
return &rt->dst;
else
return NULL;
@@ -1971,7 +1971,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
rt6_dst_from_metrics_check(rt);
if (rt->rt6i_flags & RTF_PCPU ||
- (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from))
+ (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
return rt6_dst_from_check(rt, cookie);
else
return rt6_check(rt, cookie);
@@ -3055,11 +3055,11 @@ out:
static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
{
- BUG_ON(from->dst.from);
+ BUG_ON(from->from);
rt->rt6i_flags &= ~RTF_EXPIRES;
dst_hold(&from->dst);
- rt->dst.from = &from->dst;
+ rt->from = from;
dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
}
@@ -4596,8 +4596,6 @@ static int __net_init ip6_route_net_init(struct net *net)
GFP_KERNEL);
if (!net->ipv6.ip6_null_entry)
goto out_ip6_dst_entries;
- net->ipv6.ip6_null_entry->dst.path =
- (struct dst_entry *)net->ipv6.ip6_null_entry;
net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
ip6_template_metrics, true);
@@ -4609,8 +4607,6 @@ static int __net_init ip6_route_net_init(struct net *net)
GFP_KERNEL);
if (!net->ipv6.ip6_prohibit_entry)
goto out_ip6_null_entry;
- net->ipv6.ip6_prohibit_entry->dst.path =
- (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
ip6_template_metrics, true);
@@ -4620,8 +4616,6 @@ static int __net_init ip6_route_net_init(struct net *net)
GFP_KERNEL);
if (!net->ipv6.ip6_blk_hole_entry)
goto out_ip6_prohibit_entry;
- net->ipv6.ip6_blk_hole_entry->dst.path =
- (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
ip6_template_metrics, true);
@@ -4778,11 +4772,20 @@ int __init ip6_route_init(void)
if (ret)
goto fib6_rules_init;
- ret = -ENOBUFS;
- if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) ||
- __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) ||
- __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL,
- RTNL_FLAG_DOIT_UNLOCKED))
+ ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
+ inet6_rtm_newroute, NULL, 0);
+ if (ret < 0)
+ goto out_register_late_subsys;
+
+ ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
+ inet6_rtm_delroute, NULL, 0);
+ if (ret < 0)
+ goto out_register_late_subsys;
+
+ ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
+ inet6_rtm_getroute, NULL,
+ RTNL_FLAG_DOIT_UNLOCKED);
+ if (ret < 0)
goto out_register_late_subsys;
ret = register_netdevice_notifier(&ip6_route_dev_notifier);
@@ -4800,6 +4803,7 @@ out:
return ret;
out_register_late_subsys:
+ rtnl_unregister_all(PF_INET6);
unregister_pernet_subsys(&ip6_route_net_late_ops);
fib6_rules_init:
fib6_rules_cleanup();
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index c81407770956..7f5621d09571 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -306,9 +306,7 @@ static int seg6_genl_dumphmac(struct sk_buff *skb, struct netlink_callback *cb)
struct seg6_hmac_info *hinfo;
int ret;
- ret = rhashtable_walk_start(iter);
- if (ret && ret != -EAGAIN)
- goto done;
+ rhashtable_walk_start(iter);
for (;;) {
hinfo = rhashtable_walk_next(iter);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3f30fa313bf2..eecf9f0faf29 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -89,28 +89,12 @@ static u32 udp6_ehashfn(const struct net *net,
udp_ipv6_hash_secret + net_hash_mix(net));
}
-static u32 udp6_portaddr_hash(const struct net *net,
- const struct in6_addr *addr6,
- unsigned int port)
-{
- unsigned int hash, mix = net_hash_mix(net);
-
- if (ipv6_addr_any(addr6))
- hash = jhash_1word(0, mix);
- else if (ipv6_addr_v4mapped(addr6))
- hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix);
- else
- hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix);
-
- return hash ^ port;
-}
-
int udp_v6_get_port(struct sock *sk, unsigned short snum)
{
unsigned int hash2_nulladdr =
- udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum);
+ ipv6_portaddr_hash(sock_net(sk), &in6addr_any, snum);
unsigned int hash2_partial =
- udp6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0);
+ ipv6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0);
/* precompute partial secondary hash */
udp_sk(sk)->udp_portaddr_hash = hash2_partial;
@@ -119,7 +103,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum)
static void udp_v6_rehash(struct sock *sk)
{
- u16 new_hash = udp6_portaddr_hash(sock_net(sk),
+ u16 new_hash = ipv6_portaddr_hash(sock_net(sk),
&sk->sk_v6_rcv_saddr,
inet_sk(sk)->inet_num);
@@ -184,7 +168,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
struct udp_hslot *hslot2, struct sk_buff *skb)
{
struct sock *sk, *result;
- int score, badness, matches = 0, reuseport = 0;
+ int score, badness;
u32 hash = 0;
result = NULL;
@@ -193,8 +177,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif, exact_dif);
if (score > badness) {
- reuseport = sk->sk_reuseport;
- if (reuseport) {
+ if (sk->sk_reuseport) {
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);
@@ -202,15 +185,9 @@ static struct sock *udp6_lib_lookup2(struct net *net,
sizeof(struct udphdr));
if (result)
return result;
- matches = 1;
}
result = sk;
badness = score;
- } else if (score == badness && reuseport) {
- matches++;
- if (reciprocal_scale(hash, matches) == 0)
- result = sk;
- hash = next_pseudo_random32(hash);
}
}
return result;
@@ -228,11 +205,11 @@ struct sock *__udp6_lib_lookup(struct net *net,
unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
bool exact_dif = udp6_lib_exact_dif_match(net, skb);
- int score, badness, matches = 0, reuseport = 0;
+ int score, badness;
u32 hash = 0;
if (hslot->count > 10) {
- hash2 = udp6_portaddr_hash(net, daddr, hnum);
+ hash2 = ipv6_portaddr_hash(net, daddr, hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];
if (hslot->count < hslot2->count)
@@ -243,7 +220,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
hslot2, skb);
if (!result) {
unsigned int old_slot2 = slot2;
- hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum);
+ hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
slot2 = hash2 & udptable->mask;
/* avoid searching the same slot again. */
if (unlikely(slot2 == old_slot2))
@@ -267,23 +244,16 @@ begin:
score = compute_score(sk, net, saddr, sport, daddr, hnum, dif,
sdif, exact_dif);
if (score > badness) {
- reuseport = sk->sk_reuseport;
- if (reuseport) {
+ if (sk->sk_reuseport) {
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);
result = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
if (result)
return result;
- matches = 1;
}
result = sk;
badness = score;
- } else if (score == badness && reuseport) {
- matches++;
- if (reciprocal_scale(hash, matches) == 0)
- result = sk;
- hash = next_pseudo_random32(hash);
}
}
return result;
@@ -719,9 +689,9 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
struct sk_buff *nskb;
if (use_hash2) {
- hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) &
+ hash2_any = ipv6_portaddr_hash(net, &in6addr_any, hnum) &
udptable->mask;
- hash2 = udp6_portaddr_hash(net, daddr, hnum) & udptable->mask;
+ hash2 = ipv6_portaddr_hash(net, daddr, hnum) & udptable->mask;
start_lookup:
hslot = &udptable->hash2[hash2];
offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
@@ -909,7 +879,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
int dif, int sdif)
{
unsigned short hnum = ntohs(loc_port);
- unsigned int hash2 = udp6_portaddr_hash(net, loc_addr, hnum);
+ unsigned int hash2 = ipv6_portaddr_hash(net, loc_addr, hnum);
unsigned int slot2 = hash2 & udp_table.mask;
struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum);
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 02556e356f87..e66b94f46532 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -59,7 +59,7 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
if (x->props.flags & XFRM_STATE_NOECN)
dsfield &= ~INET_ECN_MASK;
ipv6_change_dsfield(top_iph, 0, dsfield);
- top_iph->hop_limit = ip6_dst_hoplimit(dst->child);
+ top_iph->hop_limit = ip6_dst_hoplimit(xfrm_dst_child(dst));
top_iph->saddr = *(struct in6_addr *)&x->props.saddr;
top_iph->daddr = *(struct in6_addr *)&x->id.daddr;
return 0;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 885ade234a49..09fb44ee3b45 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -265,7 +265,7 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
in6_dev_put(xdst->u.rt6.rt6i_idev);
xdst->u.rt6.rt6i_idev = loopback_idev;
in6_dev_hold(loopback_idev);
- xdst = (struct xfrm_dst *)xdst->u.dst.child;
+ xdst = (struct xfrm_dst *)xfrm_dst_child(&xdst->u.dst);
} while (xdst->u.dst.xfrm);
__in6_dev_put(loopback_idev);
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 86c8dfef56a4..a5125624a76d 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -257,9 +257,7 @@ __mesh_path_lookup_by_idx(struct mesh_table *tbl, int idx)
if (ret)
return NULL;
- ret = rhashtable_walk_start(&iter);
- if (ret && ret != -EAGAIN)
- goto err;
+ rhashtable_walk_start(&iter);
while ((mpath = rhashtable_walk_next(&iter))) {
if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
@@ -269,7 +267,6 @@ __mesh_path_lookup_by_idx(struct mesh_table *tbl, int idx)
if (i++ == idx)
break;
}
-err:
rhashtable_walk_stop(&iter);
rhashtable_walk_exit(&iter);
@@ -513,9 +510,7 @@ void mesh_plink_broken(struct sta_info *sta)
if (ret)
return;
- ret = rhashtable_walk_start(&iter);
- if (ret && ret != -EAGAIN)
- goto out;
+ rhashtable_walk_start(&iter);
while ((mpath = rhashtable_walk_next(&iter))) {
if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
@@ -535,7 +530,6 @@ void mesh_plink_broken(struct sta_info *sta)
WLAN_REASON_MESH_PATH_DEST_UNREACHABLE, bcast);
}
}
-out:
rhashtable_walk_stop(&iter);
rhashtable_walk_exit(&iter);
}
@@ -584,9 +578,7 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta)
if (ret)
return;
- ret = rhashtable_walk_start(&iter);
- if (ret && ret != -EAGAIN)
- goto out;
+ rhashtable_walk_start(&iter);
while ((mpath = rhashtable_walk_next(&iter))) {
if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
@@ -597,7 +589,7 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta)
if (rcu_access_pointer(mpath->next_hop) == sta)
__mesh_path_del(tbl, mpath);
}
-out:
+
rhashtable_walk_stop(&iter);
rhashtable_walk_exit(&iter);
}
@@ -614,9 +606,7 @@ static void mpp_flush_by_proxy(struct ieee80211_sub_if_data *sdata,
if (ret)
return;
- ret = rhashtable_walk_start(&iter);
- if (ret && ret != -EAGAIN)
- goto out;
+ rhashtable_walk_start(&iter);
while ((mpath = rhashtable_walk_next(&iter))) {
if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
@@ -627,7 +617,7 @@ static void mpp_flush_by_proxy(struct ieee80211_sub_if_data *sdata,
if (ether_addr_equal(mpath->mpp, proxy))
__mesh_path_del(tbl, mpath);
}
-out:
+
rhashtable_walk_stop(&iter);
rhashtable_walk_exit(&iter);
}
@@ -642,9 +632,7 @@ static void table_flush_by_iface(struct mesh_table *tbl)
if (ret)
return;
- ret = rhashtable_walk_start(&iter);
- if (ret && ret != -EAGAIN)
- goto out;
+ rhashtable_walk_start(&iter);
while ((mpath = rhashtable_walk_next(&iter))) {
if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
@@ -653,7 +641,7 @@ static void table_flush_by_iface(struct mesh_table *tbl)
break;
__mesh_path_del(tbl, mpath);
}
-out:
+
rhashtable_walk_stop(&iter);
rhashtable_walk_exit(&iter);
}
@@ -873,9 +861,7 @@ void mesh_path_tbl_expire(struct ieee80211_sub_if_data *sdata,
if (ret)
return;
- ret = rhashtable_walk_start(&iter);
- if (ret && ret != -EAGAIN)
- goto out;
+ rhashtable_walk_start(&iter);
while ((mpath = rhashtable_walk_next(&iter))) {
if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
@@ -887,7 +873,7 @@ void mesh_path_tbl_expire(struct ieee80211_sub_if_data *sdata,
time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE))
__mesh_path_del(tbl, mpath);
}
-out:
+
rhashtable_walk_stop(&iter);
rhashtable_walk_exit(&iter);
}
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 8ca9915befc8..5dce8336d33f 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -2510,12 +2510,15 @@ static int __init mpls_init(void)
rtnl_af_register(&mpls_af_ops);
- rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, 0);
- rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, 0);
- rtnl_register(PF_MPLS, RTM_GETROUTE, mpls_getroute, mpls_dump_routes,
- 0);
- rtnl_register(PF_MPLS, RTM_GETNETCONF, mpls_netconf_get_devconf,
- mpls_netconf_dump_devconf, 0);
+ rtnl_register_module(THIS_MODULE, PF_MPLS, RTM_NEWROUTE,
+ mpls_rtm_newroute, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_MPLS, RTM_DELROUTE,
+ mpls_rtm_delroute, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_MPLS, RTM_GETROUTE,
+ mpls_getroute, mpls_dump_routes, 0);
+ rtnl_register_module(THIS_MODULE, PF_MPLS, RTM_GETNETCONF,
+ mpls_netconf_get_devconf,
+ mpls_netconf_dump_devconf, 0);
err = ipgre_tunnel_encap_add_mpls_ops();
if (err)
pr_err("Can't add mpls over gre tunnel ops\n");
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index f8166c1d5430..3f1624ee056f 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -251,11 +251,7 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
if (err)
return;
- err = rhashtable_walk_start(&hti);
- if (err && err != -EAGAIN) {
- iter->err = err;
- goto out;
- }
+ rhashtable_walk_start(&hti);
while ((he = rhashtable_walk_next(&hti))) {
if (IS_ERR(he)) {
@@ -306,9 +302,7 @@ static void nft_rhash_gc(struct work_struct *work)
if (err)
goto schedule;
- err = rhashtable_walk_start(&hti);
- if (err && err != -EAGAIN)
- goto out;
+ rhashtable_walk_start(&hti);
while ((he = rhashtable_walk_next(&hti))) {
if (IS_ERR(he)) {
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index 2b4ab189bba7..5639fb03bdd9 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -93,7 +93,8 @@ match_policy_out(const struct sk_buff *skb, const struct xt_policy_info *info,
if (dst->xfrm == NULL)
return -1;
- for (i = 0; dst && dst->xfrm; dst = dst->child, i++) {
+ for (i = 0; dst && dst->xfrm;
+ dst = ((struct xfrm_dst *)dst)->child, i++) {
pos = strict ? i : 0;
if (pos >= info->len)
return 0;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 79cc1bf36e4a..972bfe113043 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -65,6 +65,7 @@
#include <linux/net_namespace.h>
#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include <net/sock.h>
#include <net/scm.h>
#include <net/netlink.h>
@@ -145,8 +146,6 @@ static atomic_t nl_table_users = ATOMIC_INIT(0);
static BLOCKING_NOTIFIER_HEAD(netlink_chain);
-static DEFINE_SPINLOCK(netlink_tap_lock);
-static struct list_head netlink_tap_all __read_mostly;
static const struct rhashtable_params netlink_rhashtable_params;
@@ -173,14 +172,24 @@ static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb,
return new;
}
+static unsigned int netlink_tap_net_id;
+
+struct netlink_tap_net {
+ struct list_head netlink_tap_all;
+ struct mutex netlink_tap_lock;
+};
+
int netlink_add_tap(struct netlink_tap *nt)
{
+ struct net *net = dev_net(nt->dev);
+ struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id);
+
if (unlikely(nt->dev->type != ARPHRD_NETLINK))
return -EINVAL;
- spin_lock(&netlink_tap_lock);
- list_add_rcu(&nt->list, &netlink_tap_all);
- spin_unlock(&netlink_tap_lock);
+ mutex_lock(&nn->netlink_tap_lock);
+ list_add_rcu(&nt->list, &nn->netlink_tap_all);
+ mutex_unlock(&nn->netlink_tap_lock);
__module_get(nt->module);
@@ -190,12 +199,14 @@ EXPORT_SYMBOL_GPL(netlink_add_tap);
static int __netlink_remove_tap(struct netlink_tap *nt)
{
+ struct net *net = dev_net(nt->dev);
+ struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id);
bool found = false;
struct netlink_tap *tmp;
- spin_lock(&netlink_tap_lock);
+ mutex_lock(&nn->netlink_tap_lock);
- list_for_each_entry(tmp, &netlink_tap_all, list) {
+ list_for_each_entry(tmp, &nn->netlink_tap_all, list) {
if (nt == tmp) {
list_del_rcu(&nt->list);
found = true;
@@ -205,7 +216,7 @@ static int __netlink_remove_tap(struct netlink_tap *nt)
pr_warn("__netlink_remove_tap: %p not found\n", nt);
out:
- spin_unlock(&netlink_tap_lock);
+ mutex_unlock(&nn->netlink_tap_lock);
if (found)
module_put(nt->module);
@@ -224,6 +235,26 @@ int netlink_remove_tap(struct netlink_tap *nt)
}
EXPORT_SYMBOL_GPL(netlink_remove_tap);
+static __net_init int netlink_tap_init_net(struct net *net)
+{
+ struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id);
+
+ INIT_LIST_HEAD(&nn->netlink_tap_all);
+ mutex_init(&nn->netlink_tap_lock);
+ return 0;
+}
+
+static void __net_exit netlink_tap_exit_net(struct net *net)
+{
+}
+
+static struct pernet_operations netlink_tap_net_ops = {
+ .init = netlink_tap_init_net,
+ .exit = netlink_tap_exit_net,
+ .id = &netlink_tap_net_id,
+ .size = sizeof(struct netlink_tap_net),
+};
+
static bool netlink_filter_tap(const struct sk_buff *skb)
{
struct sock *sk = skb->sk;
@@ -277,7 +308,7 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb,
return ret;
}
-static void __netlink_deliver_tap(struct sk_buff *skb)
+static void __netlink_deliver_tap(struct sk_buff *skb, struct netlink_tap_net *nn)
{
int ret;
struct netlink_tap *tmp;
@@ -285,19 +316,21 @@ static void __netlink_deliver_tap(struct sk_buff *skb)
if (!netlink_filter_tap(skb))
return;
- list_for_each_entry_rcu(tmp, &netlink_tap_all, list) {
+ list_for_each_entry_rcu(tmp, &nn->netlink_tap_all, list) {
ret = __netlink_deliver_tap_skb(skb, tmp->dev);
if (unlikely(ret))
break;
}
}
-static void netlink_deliver_tap(struct sk_buff *skb)
+static void netlink_deliver_tap(struct net *net, struct sk_buff *skb)
{
+ struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id);
+
rcu_read_lock();
- if (unlikely(!list_empty(&netlink_tap_all)))
- __netlink_deliver_tap(skb);
+ if (unlikely(!list_empty(&nn->netlink_tap_all)))
+ __netlink_deliver_tap(skb, nn);
rcu_read_unlock();
}
@@ -306,7 +339,7 @@ static void netlink_deliver_tap_kernel(struct sock *dst, struct sock *src,
struct sk_buff *skb)
{
if (!(netlink_is_kernel(dst) && netlink_is_kernel(src)))
- netlink_deliver_tap(skb);
+ netlink_deliver_tap(sock_net(dst), skb);
}
static void netlink_overrun(struct sock *sk)
@@ -1216,7 +1249,7 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
{
int len = skb->len;
- netlink_deliver_tap(skb);
+ netlink_deliver_tap(sock_net(sk), skb);
skb_queue_tail(&sk->sk_receive_queue, skb);
sk->sk_data_ready(sk);
@@ -2481,8 +2514,9 @@ static int netlink_walk_start(struct nl_seq_iter *iter)
return err;
}
- err = rhashtable_walk_start(&iter->hti);
- return err == -EAGAIN ? 0 : err;
+ rhashtable_walk_start(&iter->hti);
+
+ return 0;
}
static void netlink_walk_stop(struct nl_seq_iter *iter)
@@ -2733,12 +2767,11 @@ static int __init netlink_proto_init(void)
}
}
- INIT_LIST_HEAD(&netlink_tap_all);
-
netlink_add_usersock_entry();
sock_register(&netlink_family_ops);
register_pernet_subsys(&netlink_net_ops);
+ register_pernet_subsys(&netlink_tap_net_ops);
/* The netlink device handler may be needed early. */
rtnetlink_init();
out:
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index 8faa20b4d457..7dda33b9b784 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -115,11 +115,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
if (!s_num)
rhashtable_walk_enter(&tbl->hash, hti);
- ret = rhashtable_walk_start(hti);
- if (ret == -EAGAIN)
- ret = 0;
- if (ret)
- goto stop;
+ rhashtable_walk_start(hti);
while ((nlsk = rhashtable_walk_next(hti))) {
if (IS_ERR(nlsk)) {
@@ -146,8 +142,8 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
}
}
-stop:
rhashtable_walk_stop(hti);
+
if (ret)
goto done;
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index dbe2379329c5..76d050aba7a4 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -56,12 +56,12 @@
u64 ovs_flow_used_time(unsigned long flow_jiffies)
{
- struct timespec cur_ts;
+ struct timespec64 cur_ts;
u64 cur_ms, idle_ms;
- ktime_get_ts(&cur_ts);
+ ktime_get_ts64(&cur_ts);
idle_ms = jiffies_to_msecs(jiffies - flow_jiffies);
- cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC +
+ cur_ms = (u64)(u32)cur_ts.tv_sec * MSEC_PER_SEC +
cur_ts.tv_nsec / NSEC_PER_MSEC;
return cur_ms - idle_ms;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 624ea74353dd..bce1f78b0de5 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -644,12 +644,12 @@ static int erspan_tun_opt_from_nlattr(const struct nlattr *attr,
BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
memset(&opts, 0, sizeof(opts));
- opts.index = nla_get_be32(attr);
+ opts.u.index = nla_get_be32(attr);
/* Index has only 20-bit */
- if (ntohl(opts.index) & ~INDEX_MASK) {
+ if (ntohl(opts.u.index) & ~INDEX_MASK) {
OVS_NLERR(log, "ERSPAN index number %x too large.",
- ntohl(opts.index));
+ ntohl(opts.u.index));
return -EINVAL;
}
@@ -907,7 +907,7 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb,
return -EMSGSIZE;
else if (output->tun_flags & TUNNEL_ERSPAN_OPT &&
nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
- ((struct erspan_metadata *)tun_opts)->index))
+ ((struct erspan_metadata *)tun_opts)->u.index))
return -EMSGSIZE;
}
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 04a3128adcf0..3e7747549f90 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -126,18 +126,12 @@ internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
}
}
-static void internal_set_rx_headroom(struct net_device *dev, int new_hr)
-{
- dev->needed_headroom = new_hr < 0 ? 0 : new_hr;
-}
-
static const struct net_device_ops internal_dev_netdev_ops = {
.ndo_open = internal_dev_open,
.ndo_stop = internal_dev_stop,
.ndo_start_xmit = internal_dev_xmit,
.ndo_set_mac_address = eth_mac_addr,
.ndo_get_stats64 = internal_get_stats,
- .ndo_set_rx_headroom = internal_set_rx_headroom,
};
static struct rtnl_link_ops internal_dev_link_ops __read_mostly = {
@@ -154,7 +148,7 @@ static void do_setup(struct net_device *netdev)
netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH |
- IFF_PHONY_HEADROOM | IFF_NO_QUEUE;
+ IFF_NO_QUEUE;
netdev->needs_free_netdev = true;
netdev->priv_destructor = internal_dev_destructor;
netdev->ethtool_ops = &internal_dev_ethtool_ops;
@@ -195,7 +189,6 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
err = -ENOMEM;
goto error_free_netdev;
}
- vport->dev->needed_headroom = vport->dp->max_headroom;
dev_net_set(vport->dev, ovs_dp_get_net(vport->dp));
internal_dev = internal_dev_priv(vport->dev);
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index da754fc926e7..871eaf2cb85e 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -299,16 +299,21 @@ out:
int __init phonet_netlink_register(void)
{
- int err = __rtnl_register(PF_PHONET, RTM_NEWADDR, addr_doit,
- NULL, 0);
+ int err = rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_NEWADDR,
+ addr_doit, NULL, 0);
if (err)
return err;
- /* Further __rtnl_register() cannot fail */
- __rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL, 0);
- __rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit, 0);
- __rtnl_register(PF_PHONET, RTM_NEWROUTE, route_doit, NULL, 0);
- __rtnl_register(PF_PHONET, RTM_DELROUTE, route_doit, NULL, 0);
- __rtnl_register(PF_PHONET, RTM_GETROUTE, NULL, route_dumpit, 0);
+ /* Further rtnl_register_module() cannot fail */
+ rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_DELADDR,
+ addr_doit, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_GETADDR,
+ NULL, getaddr_dumpit, 0);
+ rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_NEWROUTE,
+ route_doit, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_DELROUTE,
+ route_doit, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_GETROUTE,
+ NULL, route_dumpit, 0);
return 0;
}
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 77ab05e23001..5fb3929e3d7d 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -1116,9 +1116,13 @@ static int __init qrtr_proto_init(void)
return rc;
}
- rtnl_register(PF_QIPCRTR, RTM_NEWADDR, qrtr_addr_doit, NULL, 0);
+ rc = rtnl_register_module(THIS_MODULE, PF_QIPCRTR, RTM_NEWADDR, qrtr_addr_doit, NULL, 0);
+ if (rc) {
+ sock_unregister(qrtr_family.family);
+ proto_unregister(&qrtr_proto);
+ }
- return 0;
+ return rc;
}
postcore_initcall(qrtr_proto_init);
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 7ee2d5d68b78..6492c0b608a4 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -230,8 +230,8 @@ static struct rds_connection *__rds_conn_create(struct net *net,
rdsdebug("allocated conn %p for %pI4 -> %pI4 over %s %s\n",
conn, &laddr, &faddr,
- trans->t_name ? trans->t_name : "[unknown]",
- is_outgoing ? "(outgoing)" : "");
+ strnlen(trans->t_name, sizeof(trans->t_name)) ? trans->t_name :
+ "[unknown]", is_outgoing ? "(outgoing)" : "");
/*
* Since we ran without holding the conn lock, someone could
@@ -366,6 +366,8 @@ void rds_conn_shutdown(struct rds_conn_path *cp)
* to the conn hash, so we never trigger a reconnect on this
* conn - the reconnect is always triggered by the active peer. */
cancel_delayed_work_sync(&cp->cp_conn_w);
+ if (conn->c_destroy_in_prog)
+ return;
rcu_read_lock();
if (!hlist_unhashed(&conn->c_hash_node)) {
rcu_read_unlock();
@@ -445,7 +447,6 @@ void rds_conn_destroy(struct rds_connection *conn)
*/
rds_cong_remove_conn(conn);
- put_net(conn->c_net);
kfree(conn->c_path);
kmem_cache_free(rds_conn_slab, conn);
diff --git a/net/rds/rds.h b/net/rds/rds.h
index c349c71babff..d09f6c1facb4 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -150,7 +150,7 @@ struct rds_connection {
/* Protocol version */
unsigned int c_version;
- struct net *c_net;
+ possible_net_t c_net;
struct list_head c_map_item;
unsigned long c_map_queued;
@@ -165,13 +165,13 @@ struct rds_connection {
static inline
struct net *rds_conn_net(struct rds_connection *conn)
{
- return conn->c_net;
+ return read_pnet(&conn->c_net);
}
static inline
void rds_conn_net_set(struct rds_connection *conn, struct net *net)
{
- conn->c_net = get_net(net);
+ write_pnet(&conn->c_net, net);
}
#define RDS_FLAG_CONG_BITMAP 0x01
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 6b7ee71f40c6..39f502d47969 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -306,7 +306,8 @@ static void rds_tcp_conn_free(void *arg)
rdsdebug("freeing tc %p\n", tc);
spin_lock_irqsave(&rds_tcp_conn_lock, flags);
- list_del(&tc->t_tcp_node);
+ if (!tc->t_tcp_node_detached)
+ list_del(&tc->t_tcp_node);
spin_unlock_irqrestore(&rds_tcp_conn_lock, flags);
kmem_cache_free(rds_tcp_conn_slab, tc);
@@ -495,27 +496,6 @@ static struct pernet_operations rds_tcp_net_ops = {
.size = sizeof(struct rds_tcp_net),
};
-/* explicitly send a RST on each socket, thereby releasing any socket refcnts
- * that may otherwise hold up netns deletion.
- */
-static void rds_tcp_conn_paths_destroy(struct rds_connection *conn)
-{
- struct rds_conn_path *cp;
- struct rds_tcp_connection *tc;
- int i;
- struct sock *sk;
-
- for (i = 0; i < RDS_MPATH_WORKERS; i++) {
- cp = &conn->c_path[i];
- tc = cp->cp_transport_data;
- if (!tc->t_sock)
- continue;
- sk = tc->t_sock->sk;
- sk->sk_prot->disconnect(sk, 0);
- tcp_done(sk);
- }
-}
-
static void rds_tcp_kill_sock(struct net *net)
{
struct rds_tcp_connection *tc, *_tc;
@@ -527,18 +507,20 @@ static void rds_tcp_kill_sock(struct net *net)
rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
spin_lock_irq(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
- struct net *c_net = tc->t_cpath->cp_conn->c_net;
+ struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
if (net != c_net || !tc->t_sock)
continue;
- if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn))
+ if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) {
list_move_tail(&tc->t_tcp_node, &tmp_list);
+ } else {
+ list_del(&tc->t_tcp_node);
+ tc->t_tcp_node_detached = true;
+ }
}
spin_unlock_irq(&rds_tcp_conn_lock);
- list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) {
- rds_tcp_conn_paths_destroy(tc->t_cpath->cp_conn);
+ list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node)
rds_conn_destroy(tc->t_cpath->cp_conn);
- }
}
void *rds_tcp_listen_sock_def_readable(struct net *net)
@@ -586,7 +568,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
spin_lock_irq(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
- struct net *c_net = tc->t_cpath->cp_conn->c_net;
+ struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
if (net != c_net || !tc->t_sock)
continue;
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 1aafbf7c3011..e7858ee8ed8b 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -12,6 +12,7 @@ struct rds_tcp_incoming {
struct rds_tcp_connection {
struct list_head t_tcp_node;
+ bool t_tcp_node_detached;
struct rds_conn_path *t_cpath;
/* t_conn_path_lock synchronizes the connection establishment between
* rds_tcp_accept_one and rds_tcp_conn_path_connect
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 4d33a50a8a6d..52622a3d2517 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -99,7 +99,7 @@ int __tcf_idr_release(struct tc_action *p, bool bind, bool strict)
p->tcfa_refcnt--;
if (p->tcfa_bindcnt <= 0 && p->tcfa_refcnt <= 0) {
if (p->ops->cleanup)
- p->ops->cleanup(p, bind);
+ p->ops->cleanup(p);
tcf_idr_remove(p->idrinfo, p);
ret = ACT_P_DELETED;
}
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 5ef8ce8c83d4..b3f2c15affa7 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -357,7 +357,7 @@ out:
return ret;
}
-static void tcf_bpf_cleanup(struct tc_action *act, int bind)
+static void tcf_bpf_cleanup(struct tc_action *act)
{
struct tcf_bpf_cfg tmp;
@@ -401,16 +401,14 @@ static __net_init int bpf_init_net(struct net *net)
return tc_action_net_init(tn, &act_bpf_ops);
}
-static void __net_exit bpf_exit_net(struct net *net)
+static void __net_exit bpf_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, bpf_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, bpf_net_id);
}
static struct pernet_operations bpf_net_ops = {
.init = bpf_init_net,
- .exit = bpf_exit_net,
+ .exit_batch = bpf_exit_net,
.id = &bpf_net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 10b7a8855a6c..2b15ba84e0c8 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -209,16 +209,14 @@ static __net_init int connmark_init_net(struct net *net)
return tc_action_net_init(tn, &act_connmark_ops);
}
-static void __net_exit connmark_exit_net(struct net *net)
+static void __net_exit connmark_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, connmark_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, connmark_net_id);
}
static struct pernet_operations connmark_net_ops = {
.init = connmark_init_net,
- .exit = connmark_exit_net,
+ .exit_batch = connmark_exit_net,
.id = &connmark_net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index d836f998117b..af4b8ec60d9a 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -635,16 +635,14 @@ static __net_init int csum_init_net(struct net *net)
return tc_action_net_init(tn, &act_csum_ops);
}
-static void __net_exit csum_exit_net(struct net *net)
+static void __net_exit csum_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, csum_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, csum_net_id);
}
static struct pernet_operations csum_net_ops = {
.init = csum_init_net,
- .exit = csum_exit_net,
+ .exit_batch = csum_exit_net,
.id = &csum_net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index e29a48ef7fc3..9d632e92cad0 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -235,16 +235,14 @@ static __net_init int gact_init_net(struct net *net)
return tc_action_net_init(tn, &act_gact_ops);
}
-static void __net_exit gact_exit_net(struct net *net)
+static void __net_exit gact_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, gact_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, gact_net_id);
}
static struct pernet_operations gact_net_ops = {
.init = gact_init_net,
- .exit = gact_exit_net,
+ .exit_batch = gact_exit_net,
.id = &gact_net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 3007cb1310ea..5954e992685a 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -387,7 +387,7 @@ out_nlmsg_trim:
}
/* under ife->tcf_lock */
-static void _tcf_ife_cleanup(struct tc_action *a, int bind)
+static void _tcf_ife_cleanup(struct tc_action *a)
{
struct tcf_ife_info *ife = to_ife(a);
struct tcf_meta_info *e, *n;
@@ -405,13 +405,13 @@ static void _tcf_ife_cleanup(struct tc_action *a, int bind)
}
}
-static void tcf_ife_cleanup(struct tc_action *a, int bind)
+static void tcf_ife_cleanup(struct tc_action *a)
{
struct tcf_ife_info *ife = to_ife(a);
struct tcf_ife_params *p;
spin_lock_bh(&ife->tcf_lock);
- _tcf_ife_cleanup(a, bind);
+ _tcf_ife_cleanup(a);
spin_unlock_bh(&ife->tcf_lock);
p = rcu_dereference_protected(ife->params, 1);
@@ -546,7 +546,7 @@ metadata_parse_err:
if (exists)
tcf_idr_release(*a, bind);
if (ret == ACT_P_CREATED)
- _tcf_ife_cleanup(*a, bind);
+ _tcf_ife_cleanup(*a);
if (exists)
spin_unlock_bh(&ife->tcf_lock);
@@ -567,7 +567,7 @@ metadata_parse_err:
err = use_all_metadata(ife);
if (err) {
if (ret == ACT_P_CREATED)
- _tcf_ife_cleanup(*a, bind);
+ _tcf_ife_cleanup(*a);
if (exists)
spin_unlock_bh(&ife->tcf_lock);
@@ -858,16 +858,14 @@ static __net_init int ife_init_net(struct net *net)
return tc_action_net_init(tn, &act_ife_ops);
}
-static void __net_exit ife_exit_net(struct net *net)
+static void __net_exit ife_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, ife_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, ife_net_id);
}
static struct pernet_operations ife_net_ops = {
.init = ife_init_net,
- .exit = ife_exit_net,
+ .exit_batch = ife_exit_net,
.id = &ife_net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index d9e399a7e3d5..06e380ae0928 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -77,7 +77,7 @@ static void ipt_destroy_target(struct xt_entry_target *t)
module_put(par.target->me);
}
-static void tcf_ipt_release(struct tc_action *a, int bind)
+static void tcf_ipt_release(struct tc_action *a)
{
struct tcf_ipt *ipt = to_ipt(a);
ipt_destroy_target(ipt->tcfi_t);
@@ -337,16 +337,14 @@ static __net_init int ipt_init_net(struct net *net)
return tc_action_net_init(tn, &act_ipt_ops);
}
-static void __net_exit ipt_exit_net(struct net *net)
+static void __net_exit ipt_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, ipt_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, ipt_net_id);
}
static struct pernet_operations ipt_net_ops = {
.init = ipt_init_net,
- .exit = ipt_exit_net,
+ .exit_batch = ipt_exit_net,
.id = &ipt_net_id,
.size = sizeof(struct tc_action_net),
};
@@ -387,16 +385,14 @@ static __net_init int xt_init_net(struct net *net)
return tc_action_net_init(tn, &act_xt_ops);
}
-static void __net_exit xt_exit_net(struct net *net)
+static void __net_exit xt_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, xt_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, xt_net_id);
}
static struct pernet_operations xt_net_ops = {
.init = xt_init_net,
- .exit = xt_exit_net,
+ .exit_batch = xt_exit_net,
.id = &xt_net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 8b3e59388480..37e5e4decbd6 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -29,7 +29,6 @@
#include <net/tc_act/tc_mirred.h>
static LIST_HEAD(mirred_list);
-static DEFINE_SPINLOCK(mirred_list_lock);
static bool tcf_mirred_is_act_redirect(int action)
{
@@ -50,18 +49,15 @@ static bool tcf_mirred_act_wants_ingress(int action)
}
}
-static void tcf_mirred_release(struct tc_action *a, int bind)
+static void tcf_mirred_release(struct tc_action *a)
{
struct tcf_mirred *m = to_mirred(a);
struct net_device *dev;
- /* We could be called either in a RCU callback or with RTNL lock held. */
- spin_lock_bh(&mirred_list_lock);
list_del(&m->tcfm_list);
- dev = rcu_dereference_protected(m->tcfm_dev, 1);
+ dev = rtnl_dereference(m->tcfm_dev);
if (dev)
dev_put(dev);
- spin_unlock_bh(&mirred_list_lock);
}
static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
@@ -139,8 +135,6 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
m->tcf_action = parm->action;
m->tcfm_eaction = parm->eaction;
if (dev != NULL) {
- m->tcfm_ifindex = parm->ifindex;
- m->net = net;
if (ret != ACT_P_CREATED)
dev_put(rcu_dereference_protected(m->tcfm_dev, 1));
dev_hold(dev);
@@ -149,9 +143,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
}
if (ret == ACT_P_CREATED) {
- spin_lock_bh(&mirred_list_lock);
list_add(&m->tcfm_list, &mirred_list);
- spin_unlock_bh(&mirred_list_lock);
tcf_idr_insert(tn, *a);
}
@@ -247,13 +239,14 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_mirred *m = to_mirred(a);
+ struct net_device *dev = rtnl_dereference(m->tcfm_dev);
struct tc_mirred opt = {
.index = m->tcf_index,
.action = m->tcf_action,
.refcnt = m->tcf_refcnt - ref,
.bindcnt = m->tcf_bindcnt - bind,
.eaction = m->tcfm_eaction,
- .ifindex = m->tcfm_ifindex,
+ .ifindex = dev ? dev->ifindex : 0,
};
struct tcf_t t;
@@ -294,7 +287,6 @@ static int mirred_device_event(struct notifier_block *unused,
ASSERT_RTNL();
if (event == NETDEV_UNREGISTER) {
- spin_lock_bh(&mirred_list_lock);
list_for_each_entry(m, &mirred_list, tcfm_list) {
if (rcu_access_pointer(m->tcfm_dev) == dev) {
dev_put(dev);
@@ -304,7 +296,6 @@ static int mirred_device_event(struct notifier_block *unused,
RCU_INIT_POINTER(m->tcfm_dev, NULL);
}
}
- spin_unlock_bh(&mirred_list_lock);
}
return NOTIFY_DONE;
@@ -318,7 +309,7 @@ static struct net_device *tcf_mirred_get_dev(const struct tc_action *a)
{
struct tcf_mirred *m = to_mirred(a);
- return __dev_get_by_index(m->net, m->tcfm_ifindex);
+ return rtnl_dereference(m->tcfm_dev);
}
static struct tc_action_ops act_mirred_ops = {
@@ -343,16 +334,14 @@ static __net_init int mirred_init_net(struct net *net)
return tc_action_net_init(tn, &act_mirred_ops);
}
-static void __net_exit mirred_exit_net(struct net *net)
+static void __net_exit mirred_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, mirred_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, mirred_net_id);
}
static struct pernet_operations mirred_net_ops = {
.init = mirred_init_net,
- .exit = mirred_exit_net,
+ .exit_batch = mirred_exit_net,
.id = &mirred_net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index c365d01b99c8..98c6a4b2f523 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -310,16 +310,14 @@ static __net_init int nat_init_net(struct net *net)
return tc_action_net_init(tn, &act_nat_ops);
}
-static void __net_exit nat_exit_net(struct net *net)
+static void __net_exit nat_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, nat_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, nat_net_id);
}
static struct pernet_operations nat_net_ops = {
.init = nat_init_net,
- .exit = nat_exit_net,
+ .exit_batch = nat_exit_net,
.id = &nat_net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 491fe5deb09e..349beaffb29e 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -216,7 +216,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
return ret;
}
-static void tcf_pedit_cleanup(struct tc_action *a, int bind)
+static void tcf_pedit_cleanup(struct tc_action *a)
{
struct tcf_pedit *p = to_pedit(a);
struct tc_pedit_key *keys = p->tcfp_keys;
@@ -453,16 +453,14 @@ static __net_init int pedit_init_net(struct net *net)
return tc_action_net_init(tn, &act_pedit_ops);
}
-static void __net_exit pedit_exit_net(struct net *net)
+static void __net_exit pedit_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, pedit_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, pedit_net_id);
}
static struct pernet_operations pedit_net_ops = {
.init = pedit_init_net,
- .exit = pedit_exit_net,
+ .exit_batch = pedit_exit_net,
.id = &pedit_net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 3bb2ebf9e9ae..bf483db993a1 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -334,16 +334,14 @@ static __net_init int police_init_net(struct net *net)
return tc_action_net_init(tn, &act_police_ops);
}
-static void __net_exit police_exit_net(struct net *net)
+static void __net_exit police_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, police_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, police_net_id);
}
static struct pernet_operations police_net_ops = {
.init = police_init_net,
- .exit = police_exit_net,
+ .exit_batch = police_exit_net,
.id = &police_net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 9438969290a6..1ba0df238756 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -96,7 +96,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
return ret;
}
-static void tcf_sample_cleanup(struct tc_action *a, int bind)
+static void tcf_sample_cleanup(struct tc_action *a)
{
struct tcf_sample *s = to_sample(a);
struct psample_group *psample_group;
@@ -236,16 +236,14 @@ static __net_init int sample_init_net(struct net *net)
return tc_action_net_init(tn, &act_sample_ops);
}
-static void __net_exit sample_exit_net(struct net *net)
+static void __net_exit sample_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, sample_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, sample_net_id);
}
static struct pernet_operations sample_net_ops = {
.init = sample_init_net,
- .exit = sample_exit_net,
+ .exit_batch = sample_exit_net,
.id = &sample_net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index e7b57e5071a3..425eac11f6da 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -47,7 +47,7 @@ static int tcf_simp(struct sk_buff *skb, const struct tc_action *a,
return d->tcf_action;
}
-static void tcf_simp_release(struct tc_action *a, int bind)
+static void tcf_simp_release(struct tc_action *a)
{
struct tcf_defact *d = to_defact(a);
kfree(d->tcfd_defdata);
@@ -204,16 +204,14 @@ static __net_init int simp_init_net(struct net *net)
return tc_action_net_init(tn, &act_simp_ops);
}
-static void __net_exit simp_exit_net(struct net *net)
+static void __net_exit simp_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, simp_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, simp_net_id);
}
static struct pernet_operations simp_net_ops = {
.init = simp_init_net,
- .exit = simp_exit_net,
+ .exit_batch = simp_exit_net,
.id = &simp_net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 59949d61f20d..5a3f691bb545 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -241,16 +241,14 @@ static __net_init int skbedit_init_net(struct net *net)
return tc_action_net_init(tn, &act_skbedit_ops);
}
-static void __net_exit skbedit_exit_net(struct net *net)
+static void __net_exit skbedit_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, skbedit_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, skbedit_net_id);
}
static struct pernet_operations skbedit_net_ops = {
.init = skbedit_init_net,
- .exit = skbedit_exit_net,
+ .exit_batch = skbedit_exit_net,
.id = &skbedit_net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index b642ad3d39dd..fa975262dbac 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -184,7 +184,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
return ret;
}
-static void tcf_skbmod_cleanup(struct tc_action *a, int bind)
+static void tcf_skbmod_cleanup(struct tc_action *a)
{
struct tcf_skbmod *d = to_skbmod(a);
struct tcf_skbmod_params *p;
@@ -266,16 +266,14 @@ static __net_init int skbmod_init_net(struct net *net)
return tc_action_net_init(tn, &act_skbmod_ops);
}
-static void __net_exit skbmod_exit_net(struct net *net)
+static void __net_exit skbmod_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, skbmod_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, skbmod_net_id);
}
static struct pernet_operations skbmod_net_ops = {
.init = skbmod_init_net,
- .exit = skbmod_exit_net,
+ .exit_batch = skbmod_exit_net,
.id = &skbmod_net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 30c96274c638..0e23aac09ad6 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -201,7 +201,7 @@ err_out:
return ret;
}
-static void tunnel_key_release(struct tc_action *a, int bind)
+static void tunnel_key_release(struct tc_action *a)
{
struct tcf_tunnel_key *t = to_tunnel_key(a);
struct tcf_tunnel_key_params *params;
@@ -325,16 +325,14 @@ static __net_init int tunnel_key_init_net(struct net *net)
return tc_action_net_init(tn, &act_tunnel_key_ops);
}
-static void __net_exit tunnel_key_exit_net(struct net *net)
+static void __net_exit tunnel_key_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, tunnel_key_net_id);
}
static struct pernet_operations tunnel_key_net_ops = {
.init = tunnel_key_init_net,
- .exit = tunnel_key_exit_net,
+ .exit_batch = tunnel_key_exit_net,
.id = &tunnel_key_net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 97f717a13ad5..e1a1b3f3983a 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -219,7 +219,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
return ret;
}
-static void tcf_vlan_cleanup(struct tc_action *a, int bind)
+static void tcf_vlan_cleanup(struct tc_action *a)
{
struct tcf_vlan *v = to_vlan(a);
struct tcf_vlan_params *p;
@@ -301,16 +301,14 @@ static __net_init int vlan_init_net(struct net *net)
return tc_action_net_init(tn, &act_vlan_ops);
}
-static void __net_exit vlan_exit_net(struct net *net)
+static void __net_exit vlan_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, vlan_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, vlan_net_id);
}
static struct pernet_operations vlan_net_ops = {
.init = vlan_init_net,
- .exit = vlan_exit_net,
+ .exit_batch = vlan_exit_net,
.id = &vlan_net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index b91ea03e3afa..32b1ea7cf863 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -217,8 +217,12 @@ static void tcf_chain_flush(struct tcf_chain *chain)
static void tcf_chain_destroy(struct tcf_chain *chain)
{
+ struct tcf_block *block = chain->block;
+
list_del(&chain->list);
kfree(chain);
+ if (list_empty(&block->chain_list))
+ kfree(block);
}
static void tcf_chain_hold(struct tcf_chain *chain)
@@ -329,49 +333,34 @@ int tcf_block_get(struct tcf_block **p_block,
}
EXPORT_SYMBOL(tcf_block_get);
-static void tcf_block_put_final(struct work_struct *work)
-{
- struct tcf_block *block = container_of(work, struct tcf_block, work);
- struct tcf_chain *chain, *tmp;
-
- rtnl_lock();
-
- /* At this point, all the chains should have refcnt == 1. */
- list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
- tcf_chain_put(chain);
- rtnl_unlock();
- kfree(block);
-}
-
/* XXX: Standalone actions are not allowed to jump to any chain, and bound
* actions should be all removed after flushing.
*/
void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
struct tcf_block_ext_info *ei)
{
- struct tcf_chain *chain;
+ struct tcf_chain *chain, *tmp;
- if (!block)
- return;
- /* Hold a refcnt for all chains, except 0, so that they don't disappear
+ /* Hold a refcnt for all chains, so that they don't disappear
* while we are iterating.
*/
+ if (!block)
+ return;
list_for_each_entry(chain, &block->chain_list, list)
- if (chain->index)
- tcf_chain_hold(chain);
+ tcf_chain_hold(chain);
list_for_each_entry(chain, &block->chain_list, list)
tcf_chain_flush(chain);
tcf_block_offload_unbind(block, q, ei);
- INIT_WORK(&block->work, tcf_block_put_final);
- /* Wait for existing RCU callbacks to cool down, make sure their works
- * have been queued before this. We can not flush pending works here
- * because we are holding the RTNL lock.
- */
- rcu_barrier();
- tcf_queue_work(&block->work);
+ /* At this point, all the chains should have refcnt >= 1. */
+ list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
+ tcf_chain_put(chain);
+
+ /* Finally, put chain 0 and allow block to be freed. */
+ chain = list_first_entry(&block->chain_list, struct tcf_chain, list);
+ tcf_chain_put(chain);
}
EXPORT_SYMBOL(tcf_block_put_ext);
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 543a3e875d05..6132a7317efa 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -166,6 +166,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
* so do it rather here.
*/
skb_key.basic.n_proto = skb->protocol;
+ skb_flow_dissect_tunnel_info(skb, &head->dissector, &skb_key);
skb_flow_dissect(skb, &head->dissector, &skb_key, 0);
fl_set_masked_key(&skb_mkey, &skb_key, &head->mask);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 0f1eab99ff4e..74c22b4e365e 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -799,7 +799,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
goto nla_put_failure;
if (q->ops->dump && q->ops->dump(q, skb) < 0)
goto nla_put_failure;
- qlen = q->q.qlen;
+
+ qlen = qdisc_qlen_sum(q);
stab = rtnl_dereference(q->stab);
if (stab && qdisc_dump_stab(skb, stab) < 0)
@@ -956,6 +957,11 @@ skip:
} else {
const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
+ /* Only support running class lockless if parent is lockless */
+ if (new && (new->flags & TCQ_F_NOLOCK) &&
+ parent && !(parent->flags & TCQ_F_NOLOCK))
+ new->flags &= ~TCQ_F_NOLOCK;
+
err = -EOPNOTSUPP;
if (cops && cops->graft) {
unsigned long cl = cops->find(parent, classid);
@@ -1022,7 +1028,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
#endif
err = -ENOENT;
- if (ops == NULL)
+ if (!ops)
goto err_out;
sch = qdisc_alloc(dev_queue, ops);
@@ -1062,54 +1068,60 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
}
- if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
- if (qdisc_is_percpu_stats(sch)) {
- sch->cpu_bstats =
- netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
- if (!sch->cpu_bstats)
- goto err_out4;
+ if (ops->init) {
+ err = ops->init(sch, tca[TCA_OPTIONS]);
+ if (err != 0)
+ goto err_out5;
+ }
- sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
- if (!sch->cpu_qstats)
- goto err_out4;
- }
+ if (qdisc_is_percpu_stats(sch)) {
+ sch->cpu_bstats =
+ netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
+ if (!sch->cpu_bstats)
+ goto err_out4;
- if (tca[TCA_STAB]) {
- stab = qdisc_get_stab(tca[TCA_STAB]);
- if (IS_ERR(stab)) {
- err = PTR_ERR(stab);
- goto err_out4;
- }
- rcu_assign_pointer(sch->stab, stab);
- }
- if (tca[TCA_RATE]) {
- seqcount_t *running;
-
- err = -EOPNOTSUPP;
- if (sch->flags & TCQ_F_MQROOT)
- goto err_out4;
+ sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
+ if (!sch->cpu_qstats)
+ goto err_out4;
+ }
- if ((sch->parent != TC_H_ROOT) &&
- !(sch->flags & TCQ_F_INGRESS) &&
- (!p || !(p->flags & TCQ_F_MQROOT)))
- running = qdisc_root_sleeping_running(sch);
- else
- running = &sch->running;
-
- err = gen_new_estimator(&sch->bstats,
- sch->cpu_bstats,
- &sch->rate_est,
- NULL,
- running,
- tca[TCA_RATE]);
- if (err)
- goto err_out4;
+ if (tca[TCA_STAB]) {
+ stab = qdisc_get_stab(tca[TCA_STAB]);
+ if (IS_ERR(stab)) {
+ err = PTR_ERR(stab);
+ goto err_out4;
}
+ rcu_assign_pointer(sch->stab, stab);
+ }
+ if (tca[TCA_RATE]) {
+ seqcount_t *running;
- qdisc_hash_add(sch, false);
+ err = -EOPNOTSUPP;
+ if (sch->flags & TCQ_F_MQROOT)
+ goto err_out4;
- return sch;
+ if (sch->parent != TC_H_ROOT &&
+ !(sch->flags & TCQ_F_INGRESS) &&
+ (!p || !(p->flags & TCQ_F_MQROOT)))
+ running = qdisc_root_sleeping_running(sch);
+ else
+ running = &sch->running;
+
+ err = gen_new_estimator(&sch->bstats,
+ sch->cpu_bstats,
+ &sch->rate_est,
+ NULL,
+ running,
+ tca[TCA_RATE]);
+ if (err)
+ goto err_out4;
}
+
+ qdisc_hash_add(sch, false);
+
+ return sch;
+
+err_out5:
/* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
if (ops->destroy)
ops->destroy(sch);
@@ -1141,7 +1153,7 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
int err = 0;
if (tca[TCA_OPTIONS]) {
- if (sch->ops->change == NULL)
+ if (!sch->ops->change)
return -EINVAL;
err = sch->ops->change(sch, tca[TCA_OPTIONS]);
if (err)
@@ -1346,7 +1358,8 @@ replay:
goto create_n_graft;
if (n->nlmsg_flags & NLM_F_EXCL)
return -EEXIST;
- if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
+ if (tca[TCA_KIND] &&
+ nla_strcmp(tca[TCA_KIND], q->ops->id))
return -EINVAL;
if (q == p ||
(p && check_loop(q, p, 0)))
@@ -1391,7 +1404,7 @@ replay:
}
/* Change qdisc parameters */
- if (q == NULL)
+ if (!q)
return -ENOENT;
if (n->nlmsg_flags & NLM_F_EXCL)
return -EEXIST;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index cd1b200acae7..981c08fe810b 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -26,6 +26,7 @@
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/if_vlan.h>
+#include <linux/skb_array.h>
#include <linux/if_macvlan.h>
#include <net/sch_generic.h>
#include <net/pkt_sched.h>
@@ -47,9 +48,70 @@ EXPORT_SYMBOL(default_qdisc_ops);
* - updates to tree and tree walking are only done under the rtnl mutex.
*/
-static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
+static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q)
+{
+ const struct netdev_queue *txq = q->dev_queue;
+ spinlock_t *lock = NULL;
+ struct sk_buff *skb;
+
+ if (q->flags & TCQ_F_NOLOCK) {
+ lock = qdisc_lock(q);
+ spin_lock(lock);
+ }
+
+ skb = skb_peek(&q->skb_bad_txq);
+ if (skb) {
+ /* check the reason of requeuing without tx lock first */
+ txq = skb_get_tx_queue(txq->dev, skb);
+ if (!netif_xmit_frozen_or_stopped(txq)) {
+ skb = __skb_dequeue(&q->skb_bad_txq);
+ if (qdisc_is_percpu_stats(q)) {
+ qdisc_qstats_cpu_backlog_dec(q, skb);
+ qdisc_qstats_cpu_qlen_dec(q);
+ } else {
+ qdisc_qstats_backlog_dec(q, skb);
+ q->q.qlen--;
+ }
+ } else {
+ skb = NULL;
+ }
+ }
+
+ if (lock)
+ spin_unlock(lock);
+
+ return skb;
+}
+
+static inline struct sk_buff *qdisc_dequeue_skb_bad_txq(struct Qdisc *q)
+{
+ struct sk_buff *skb = skb_peek(&q->skb_bad_txq);
+
+ if (unlikely(skb))
+ skb = __skb_dequeue_bad_txq(q);
+
+ return skb;
+}
+
+static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
+ struct sk_buff *skb)
{
- q->gso_skb = skb;
+ spinlock_t *lock = NULL;
+
+ if (q->flags & TCQ_F_NOLOCK) {
+ lock = qdisc_lock(q);
+ spin_lock(lock);
+ }
+
+ __skb_queue_tail(&q->skb_bad_txq, skb);
+
+ if (lock)
+ spin_unlock(lock);
+}
+
+static inline int __dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
+{
+ __skb_queue_head(&q->gso_skb, skb);
q->qstats.requeues++;
qdisc_qstats_backlog_inc(q, skb);
q->q.qlen++; /* it's still part of the queue */
@@ -58,6 +120,30 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
return 0;
}
+static inline int dev_requeue_skb_locked(struct sk_buff *skb, struct Qdisc *q)
+{
+ spinlock_t *lock = qdisc_lock(q);
+
+ spin_lock(lock);
+ __skb_queue_tail(&q->gso_skb, skb);
+ spin_unlock(lock);
+
+ qdisc_qstats_cpu_requeues_inc(q);
+ qdisc_qstats_cpu_backlog_inc(q, skb);
+ qdisc_qstats_cpu_qlen_inc(q);
+ __netif_schedule(q);
+
+ return 0;
+}
+
+static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
+{
+ if (q->flags & TCQ_F_NOLOCK)
+ return dev_requeue_skb_locked(skb, q);
+ else
+ return __dev_requeue_skb(skb, q);
+}
+
static void try_bulk_dequeue_skb(struct Qdisc *q,
struct sk_buff *skb,
const struct netdev_queue *txq,
@@ -95,9 +181,15 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
if (!nskb)
break;
if (unlikely(skb_get_queue_mapping(nskb) != mapping)) {
- q->skb_bad_txq = nskb;
- qdisc_qstats_backlog_inc(q, nskb);
- q->q.qlen++;
+ qdisc_enqueue_skb_bad_txq(q, nskb);
+
+ if (qdisc_is_percpu_stats(q)) {
+ qdisc_qstats_cpu_backlog_inc(q, nskb);
+ qdisc_qstats_cpu_qlen_inc(q);
+ } else {
+ qdisc_qstats_backlog_inc(q, nskb);
+ q->q.qlen++;
+ }
break;
}
skb->next = nskb;
@@ -113,40 +205,60 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
int *packets)
{
- struct sk_buff *skb = q->gso_skb;
const struct netdev_queue *txq = q->dev_queue;
+ struct sk_buff *skb = NULL;
*packets = 1;
- if (unlikely(skb)) {
+ if (unlikely(!skb_queue_empty(&q->gso_skb))) {
+ spinlock_t *lock = NULL;
+
+ if (q->flags & TCQ_F_NOLOCK) {
+ lock = qdisc_lock(q);
+ spin_lock(lock);
+ }
+
+ skb = skb_peek(&q->gso_skb);
+
+ /* skb may be null if another cpu pulls gso_skb off in between
+ * empty check and lock.
+ */
+ if (!skb) {
+ if (lock)
+ spin_unlock(lock);
+ goto validate;
+ }
+
/* skb in gso_skb were already validated */
*validate = false;
/* check the reason of requeuing without tx lock first */
txq = skb_get_tx_queue(txq->dev, skb);
if (!netif_xmit_frozen_or_stopped(txq)) {
- q->gso_skb = NULL;
- qdisc_qstats_backlog_dec(q, skb);
- q->q.qlen--;
- } else
+ skb = __skb_dequeue(&q->gso_skb);
+ if (qdisc_is_percpu_stats(q)) {
+ qdisc_qstats_cpu_backlog_dec(q, skb);
+ qdisc_qstats_cpu_qlen_dec(q);
+ } else {
+ qdisc_qstats_backlog_dec(q, skb);
+ q->q.qlen--;
+ }
+ } else {
skb = NULL;
- goto trace;
- }
- *validate = true;
- skb = q->skb_bad_txq;
- if (unlikely(skb)) {
- /* check the reason of requeuing without tx lock first */
- txq = skb_get_tx_queue(txq->dev, skb);
- if (!netif_xmit_frozen_or_stopped(txq)) {
- q->skb_bad_txq = NULL;
- qdisc_qstats_backlog_dec(q, skb);
- q->q.qlen--;
- goto bulk;
}
- skb = NULL;
+ if (lock)
+ spin_unlock(lock);
goto trace;
}
- if (!(q->flags & TCQ_F_ONETXQUEUE) ||
- !netif_xmit_frozen_or_stopped(txq))
- skb = q->dequeue(q);
+validate:
+ *validate = true;
+
+ if ((q->flags & TCQ_F_ONETXQUEUE) &&
+ netif_xmit_frozen_or_stopped(txq))
+ return skb;
+
+ skb = qdisc_dequeue_skb_bad_txq(q);
+ if (unlikely(skb))
+ goto bulk;
+ skb = q->dequeue(q);
if (skb) {
bulk:
if (qdisc_may_bulk(q))
@@ -165,17 +277,18 @@ trace:
* only one CPU can execute this function.
*
* Returns to the caller:
- * 0 - queue is empty or throttled.
- * >0 - queue is not empty.
+ * false - hardware queue frozen backoff
+ * true - feel free to send more pkts
*/
-int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
- struct net_device *dev, struct netdev_queue *txq,
- spinlock_t *root_lock, bool validate)
+bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
+ struct net_device *dev, struct netdev_queue *txq,
+ spinlock_t *root_lock, bool validate)
{
int ret = NETDEV_TX_BUSY;
/* And release qdisc */
- spin_unlock(root_lock);
+ if (root_lock)
+ spin_unlock(root_lock);
/* Note that we validate skb (GSO, checksum, ...) outside of locks */
if (validate)
@@ -188,27 +301,28 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
HARD_TX_UNLOCK(dev, txq);
} else {
- spin_lock(root_lock);
- return qdisc_qlen(q);
+ if (root_lock)
+ spin_lock(root_lock);
+ return true;
}
- spin_lock(root_lock);
- if (dev_xmit_complete(ret)) {
- /* Driver sent out skb successfully or skb was consumed */
- ret = qdisc_qlen(q);
- } else {
+ if (root_lock)
+ spin_lock(root_lock);
+
+ if (!dev_xmit_complete(ret)) {
/* Driver returned NETDEV_TX_BUSY - requeue skb */
if (unlikely(ret != NETDEV_TX_BUSY))
net_warn_ratelimited("BUG %s code %d qlen %d\n",
dev->name, ret, q->q.qlen);
- ret = dev_requeue_skb(skb, q);
+ dev_requeue_skb(skb, q);
+ return false;
}
if (ret && netif_xmit_frozen_or_stopped(txq))
- ret = 0;
+ return false;
- return ret;
+ return true;
}
/*
@@ -230,20 +344,22 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
* >0 - queue is not empty.
*
*/
-static inline int qdisc_restart(struct Qdisc *q, int *packets)
+static inline bool qdisc_restart(struct Qdisc *q, int *packets)
{
+ spinlock_t *root_lock = NULL;
struct netdev_queue *txq;
struct net_device *dev;
- spinlock_t *root_lock;
struct sk_buff *skb;
bool validate;
/* Dequeue packet */
skb = dequeue_skb(q, &validate, packets);
if (unlikely(!skb))
- return 0;
+ return false;
+
+ if (!(q->flags & TCQ_F_NOLOCK))
+ root_lock = qdisc_lock(q);
- root_lock = qdisc_lock(q);
dev = qdisc_dev(q);
txq = skb_get_tx_queue(dev, skb);
@@ -267,8 +383,6 @@ void __qdisc_run(struct Qdisc *q)
break;
}
}
-
- qdisc_run_end(q);
}
unsigned long dev_trans_start(struct net_device *dev)
@@ -468,93 +582,93 @@ static const u8 prio2band[TC_PRIO_MAX + 1] = {
/*
* Private data for a pfifo_fast scheduler containing:
- * - queues for the three band
- * - bitmap indicating which of the bands contain skbs
+ * - rings for priority bands
*/
struct pfifo_fast_priv {
- u32 bitmap;
- struct qdisc_skb_head q[PFIFO_FAST_BANDS];
+ struct skb_array q[PFIFO_FAST_BANDS];
};
-/*
- * Convert a bitmap to the first band number where an skb is queued, where:
- * bitmap=0 means there are no skbs on any band.
- * bitmap=1 means there is an skb on band 0.
- * bitmap=7 means there are skbs on all 3 bands, etc.
- */
-static const int bitmap2band[] = {-1, 0, 1, 0, 2, 0, 1, 0};
-
-static inline struct qdisc_skb_head *band2list(struct pfifo_fast_priv *priv,
- int band)
+static inline struct skb_array *band2list(struct pfifo_fast_priv *priv,
+ int band)
{
- return priv->q + band;
+ return &priv->q[band];
}
static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
struct sk_buff **to_free)
{
- if (qdisc->q.qlen < qdisc_dev(qdisc)->tx_queue_len) {
- int band = prio2band[skb->priority & TC_PRIO_MAX];
- struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
- struct qdisc_skb_head *list = band2list(priv, band);
-
- priv->bitmap |= (1 << band);
- qdisc->q.qlen++;
- return __qdisc_enqueue_tail(skb, qdisc, list);
- }
+ int band = prio2band[skb->priority & TC_PRIO_MAX];
+ struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
+ struct skb_array *q = band2list(priv, band);
+ int err;
+
+ err = skb_array_produce(q, skb);
- return qdisc_drop(skb, qdisc, to_free);
+ if (unlikely(err))
+ return qdisc_drop_cpu(skb, qdisc, to_free);
+
+ qdisc_qstats_cpu_qlen_inc(qdisc);
+ qdisc_qstats_cpu_backlog_inc(qdisc, skb);
+ return NET_XMIT_SUCCESS;
}
static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
{
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
- int band = bitmap2band[priv->bitmap];
-
- if (likely(band >= 0)) {
- struct qdisc_skb_head *qh = band2list(priv, band);
- struct sk_buff *skb = __qdisc_dequeue_head(qh);
+ struct sk_buff *skb = NULL;
+ int band;
- if (likely(skb != NULL)) {
- qdisc_qstats_backlog_dec(qdisc, skb);
- qdisc_bstats_update(qdisc, skb);
- }
+ for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
+ struct skb_array *q = band2list(priv, band);
- qdisc->q.qlen--;
- if (qh->qlen == 0)
- priv->bitmap &= ~(1 << band);
+ if (__skb_array_empty(q))
+ continue;
- return skb;
+ skb = skb_array_consume_bh(q);
+ }
+ if (likely(skb)) {
+ qdisc_qstats_cpu_backlog_dec(qdisc, skb);
+ qdisc_bstats_cpu_update(qdisc, skb);
+ qdisc_qstats_cpu_qlen_dec(qdisc);
}
- return NULL;
+ return skb;
}
static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc)
{
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
- int band = bitmap2band[priv->bitmap];
+ struct sk_buff *skb = NULL;
+ int band;
- if (band >= 0) {
- struct qdisc_skb_head *qh = band2list(priv, band);
+ for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
+ struct skb_array *q = band2list(priv, band);
- return qh->head;
+ skb = __skb_array_peek(q);
}
- return NULL;
+ return skb;
}
static void pfifo_fast_reset(struct Qdisc *qdisc)
{
- int prio;
+ int i, band;
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
- for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
- __qdisc_reset_queue(band2list(priv, prio));
+ for (band = 0; band < PFIFO_FAST_BANDS; band++) {
+ struct skb_array *q = band2list(priv, band);
+ struct sk_buff *skb;
- priv->bitmap = 0;
- qdisc->qstats.backlog = 0;
- qdisc->q.qlen = 0;
+ while ((skb = skb_array_consume_bh(q)) != NULL)
+ kfree_skb(skb);
+ }
+
+ for_each_possible_cpu(i) {
+ struct gnet_stats_queue *q = per_cpu_ptr(qdisc->cpu_qstats, i);
+
+ q->backlog = 0;
+ q->qlen = 0;
+ }
}
static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
@@ -572,17 +686,48 @@ nla_put_failure:
static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
{
- int prio;
+ unsigned int qlen = qdisc_dev(qdisc)->tx_queue_len;
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
+ int prio;
- for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
- qdisc_skb_head_init(band2list(priv, prio));
+ /* guard against zero length rings */
+ if (!qlen)
+ return -EINVAL;
+
+ for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
+ struct skb_array *q = band2list(priv, prio);
+ int err;
+
+ err = skb_array_init(q, qlen, GFP_KERNEL);
+ if (err)
+ return -ENOMEM;
+ }
/* Can by-pass the queue discipline */
qdisc->flags |= TCQ_F_CAN_BYPASS;
return 0;
}
+static void pfifo_fast_destroy(struct Qdisc *sch)
+{
+ struct pfifo_fast_priv *priv = qdisc_priv(sch);
+ int prio;
+
+ for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
+ struct skb_array *q = band2list(priv, prio);
+
+ /* NULL ring is possible if destroy path is due to a failed
+ * skb_array_init() in pfifo_fast_init() case.
+ */
+ if (!&q->ring.queue)
+ continue;
+ /* Destroy ring but no need to kfree_skb because a call to
+ * pfifo_fast_reset() has already done that work.
+ */
+ ptr_ring_cleanup(&q->ring, NULL);
+ }
+}
+
struct Qdisc_ops pfifo_fast_ops __read_mostly = {
.id = "pfifo_fast",
.priv_size = sizeof(struct pfifo_fast_priv),
@@ -590,9 +735,11 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
.dequeue = pfifo_fast_dequeue,
.peek = pfifo_fast_peek,
.init = pfifo_fast_init,
+ .destroy = pfifo_fast_destroy,
.reset = pfifo_fast_reset,
.dump = pfifo_fast_dump,
.owner = THIS_MODULE,
+ .static_flags = TCQ_F_NOLOCK | TCQ_F_CPUSTATS,
};
EXPORT_SYMBOL(pfifo_fast_ops);
@@ -630,9 +777,24 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
sch->padded = (char *) sch - (char *) p;
}
+ __skb_queue_head_init(&sch->gso_skb);
+ __skb_queue_head_init(&sch->skb_bad_txq);
qdisc_skb_head_init(&sch->q);
spin_lock_init(&sch->q.lock);
+ if (ops->static_flags & TCQ_F_CPUSTATS) {
+ sch->cpu_bstats =
+ netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
+ if (!sch->cpu_bstats)
+ goto errout1;
+
+ sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
+ if (!sch->cpu_qstats) {
+ free_percpu(sch->cpu_bstats);
+ goto errout1;
+ }
+ }
+
spin_lock_init(&sch->busylock);
lockdep_set_class(&sch->busylock,
dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
@@ -642,6 +804,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
dev->qdisc_running_key ?: &qdisc_running_key);
sch->ops = ops;
+ sch->flags = ops->static_flags;
sch->enqueue = ops->enqueue;
sch->dequeue = ops->dequeue;
sch->dev_queue = dev_queue;
@@ -649,6 +812,8 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
refcount_set(&sch->refcnt, 1);
return sch;
+errout1:
+ kfree(p);
errout:
return ERR_PTR(err);
}
@@ -682,17 +847,21 @@ EXPORT_SYMBOL(qdisc_create_dflt);
void qdisc_reset(struct Qdisc *qdisc)
{
const struct Qdisc_ops *ops = qdisc->ops;
+ struct sk_buff *skb, *tmp;
if (ops->reset)
ops->reset(qdisc);
- kfree_skb(qdisc->skb_bad_txq);
- qdisc->skb_bad_txq = NULL;
+ skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) {
+ __skb_unlink(skb, &qdisc->gso_skb);
+ kfree_skb_list(skb);
+ }
- if (qdisc->gso_skb) {
- kfree_skb_list(qdisc->gso_skb);
- qdisc->gso_skb = NULL;
+ skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) {
+ __skb_unlink(skb, &qdisc->skb_bad_txq);
+ kfree_skb_list(skb);
}
+
qdisc->q.qlen = 0;
qdisc->qstats.backlog = 0;
}
@@ -711,6 +880,7 @@ static void qdisc_free(struct Qdisc *qdisc)
void qdisc_destroy(struct Qdisc *qdisc)
{
const struct Qdisc_ops *ops = qdisc->ops;
+ struct sk_buff *skb, *tmp;
if (qdisc->flags & TCQ_F_BUILTIN ||
!refcount_dec_and_test(&qdisc->refcnt))
@@ -730,8 +900,16 @@ void qdisc_destroy(struct Qdisc *qdisc)
module_put(ops->owner);
dev_put(qdisc_dev(qdisc));
- kfree_skb_list(qdisc->gso_skb);
- kfree_skb(qdisc->skb_bad_txq);
+ skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) {
+ __skb_unlink(skb, &qdisc->gso_skb);
+ kfree_skb_list(skb);
+ }
+
+ skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) {
+ __skb_unlink(skb, &qdisc->skb_bad_txq);
+ kfree_skb_list(skb);
+ }
+
qdisc_free(qdisc);
}
EXPORT_SYMBOL(qdisc_destroy);
@@ -746,10 +924,6 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
root_lock = qdisc_lock(oqdisc);
spin_lock_bh(root_lock);
- /* Prune old scheduler */
- if (oqdisc && refcount_read(&oqdisc->refcnt) <= 1)
- qdisc_reset(oqdisc);
-
/* ... and graft new one */
if (qdisc == NULL)
qdisc = &noop_qdisc;
@@ -885,14 +1059,18 @@ static bool some_qdisc_is_busy(struct net_device *dev)
dev_queue = netdev_get_tx_queue(dev, i);
q = dev_queue->qdisc_sleeping;
- root_lock = qdisc_lock(q);
- spin_lock_bh(root_lock);
+ if (q->flags & TCQ_F_NOLOCK) {
+ val = test_bit(__QDISC_STATE_SCHED, &q->state);
+ } else {
+ root_lock = qdisc_lock(q);
+ spin_lock_bh(root_lock);
- val = (qdisc_is_running(q) ||
- test_bit(__QDISC_STATE_SCHED, &q->state));
+ val = (qdisc_is_running(q) ||
+ test_bit(__QDISC_STATE_SCHED, &q->state));
- spin_unlock_bh(root_lock);
+ spin_unlock_bh(root_lock);
+ }
if (val)
return true;
@@ -900,6 +1078,16 @@ static bool some_qdisc_is_busy(struct net_device *dev)
return false;
}
+static void dev_qdisc_reset(struct net_device *dev,
+ struct netdev_queue *dev_queue,
+ void *none)
+{
+ struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
+
+ if (qdisc)
+ qdisc_reset(qdisc);
+}
+
/**
* dev_deactivate_many - deactivate transmissions on several devices
* @head: list of devices to deactivate
@@ -910,7 +1098,6 @@ static bool some_qdisc_is_busy(struct net_device *dev)
void dev_deactivate_many(struct list_head *head)
{
struct net_device *dev;
- bool sync_needed = false;
list_for_each_entry(dev, head, close_list) {
netdev_for_each_tx_queue(dev, dev_deactivate_queue,
@@ -920,20 +1107,25 @@ void dev_deactivate_many(struct list_head *head)
&noop_qdisc);
dev_watchdog_down(dev);
- sync_needed |= !dev->dismantle;
}
/* Wait for outstanding qdisc-less dev_queue_xmit calls.
* This is avoided if all devices are in dismantle phase :
* Caller will call synchronize_net() for us
*/
- if (sync_needed)
- synchronize_net();
+ synchronize_net();
/* Wait for outstanding qdisc_run calls. */
- list_for_each_entry(dev, head, close_list)
+ list_for_each_entry(dev, head, close_list) {
while (some_qdisc_is_busy(dev))
yield();
+ /* The new qdisc is assigned at this point so we can safely
+ * unwind stale skb lists and qdisc statistics
+ */
+ netdev_for_each_tx_queue(dev, dev_qdisc_reset, NULL);
+ if (dev_ingress_queue(dev))
+ dev_qdisc_reset(dev, dev_ingress_queue(dev), NULL);
+ }
}
void dev_deactivate(struct net_device *dev)
@@ -954,6 +1146,8 @@ static void dev_init_scheduler_queue(struct net_device *dev,
rcu_assign_pointer(dev_queue->qdisc, qdisc);
dev_queue->qdisc_sleeping = qdisc;
+ __skb_queue_head_init(&qdisc->gso_skb);
+ __skb_queue_head_init(&qdisc->skb_bad_txq);
}
void dev_init_scheduler(struct net_device *dev)
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 213b586a06a0..8cbb5c829d59 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -17,6 +17,7 @@
#include <linux/skbuff.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/sch_generic.h>
struct mq_sched {
struct Qdisc **qdiscs;
@@ -97,23 +98,42 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
struct net_device *dev = qdisc_dev(sch);
struct Qdisc *qdisc;
unsigned int ntx;
+ __u32 qlen = 0;
sch->q.qlen = 0;
memset(&sch->bstats, 0, sizeof(sch->bstats));
memset(&sch->qstats, 0, sizeof(sch->qstats));
+ /* MQ supports lockless qdiscs. However, statistics accounting needs
+ * to account for all, none, or a mix of locked and unlocked child
+ * qdiscs. Percpu stats are added to counters in-band and locking
+ * qdisc totals are added at end.
+ */
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
spin_lock_bh(qdisc_lock(qdisc));
- sch->q.qlen += qdisc->q.qlen;
- sch->bstats.bytes += qdisc->bstats.bytes;
- sch->bstats.packets += qdisc->bstats.packets;
- sch->qstats.backlog += qdisc->qstats.backlog;
- sch->qstats.drops += qdisc->qstats.drops;
- sch->qstats.requeues += qdisc->qstats.requeues;
- sch->qstats.overlimits += qdisc->qstats.overlimits;
+
+ if (qdisc_is_percpu_stats(qdisc)) {
+ qlen = qdisc_qlen_sum(qdisc);
+ __gnet_stats_copy_basic(NULL, &sch->bstats,
+ qdisc->cpu_bstats,
+ &qdisc->bstats);
+ __gnet_stats_copy_queue(&sch->qstats,
+ qdisc->cpu_qstats,
+ &qdisc->qstats, qlen);
+ } else {
+ sch->q.qlen += qdisc->q.qlen;
+ sch->bstats.bytes += qdisc->bstats.bytes;
+ sch->bstats.packets += qdisc->bstats.packets;
+ sch->qstats.backlog += qdisc->qstats.backlog;
+ sch->qstats.drops += qdisc->qstats.drops;
+ sch->qstats.requeues += qdisc->qstats.requeues;
+ sch->qstats.overlimits += qdisc->qstats.overlimits;
+ }
+
spin_unlock_bh(qdisc_lock(qdisc));
}
+
return 0;
}
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index b85885a9d8a1..8622745f3cd9 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -388,22 +388,40 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
struct nlattr *nla = (struct nlattr *)skb_tail_pointer(skb);
struct tc_mqprio_qopt opt = { 0 };
struct Qdisc *qdisc;
- unsigned int i;
+ unsigned int ntx, tc;
sch->q.qlen = 0;
memset(&sch->bstats, 0, sizeof(sch->bstats));
memset(&sch->qstats, 0, sizeof(sch->qstats));
- for (i = 0; i < dev->num_tx_queues; i++) {
- qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc);
+ /* MQ supports lockless qdiscs. However, statistics accounting needs
+ * to account for all, none, or a mix of locked and unlocked child
+ * qdiscs. Percpu stats are added to counters in-band and locking
+ * qdisc totals are added at end.
+ */
+ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
+ qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
spin_lock_bh(qdisc_lock(qdisc));
- sch->q.qlen += qdisc->q.qlen;
- sch->bstats.bytes += qdisc->bstats.bytes;
- sch->bstats.packets += qdisc->bstats.packets;
- sch->qstats.backlog += qdisc->qstats.backlog;
- sch->qstats.drops += qdisc->qstats.drops;
- sch->qstats.requeues += qdisc->qstats.requeues;
- sch->qstats.overlimits += qdisc->qstats.overlimits;
+
+ if (qdisc_is_percpu_stats(qdisc)) {
+ __u32 qlen = qdisc_qlen_sum(qdisc);
+
+ __gnet_stats_copy_basic(NULL, &sch->bstats,
+ qdisc->cpu_bstats,
+ &qdisc->bstats);
+ __gnet_stats_copy_queue(&sch->qstats,
+ qdisc->cpu_qstats,
+ &qdisc->qstats, qlen);
+ } else {
+ sch->q.qlen += qdisc->q.qlen;
+ sch->bstats.bytes += qdisc->bstats.bytes;
+ sch->bstats.packets += qdisc->bstats.packets;
+ sch->qstats.backlog += qdisc->qstats.backlog;
+ sch->qstats.drops += qdisc->qstats.drops;
+ sch->qstats.requeues += qdisc->qstats.requeues;
+ sch->qstats.overlimits += qdisc->qstats.overlimits;
+ }
+
spin_unlock_bh(qdisc_lock(qdisc));
}
@@ -411,9 +429,9 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
opt.hw = priv->hw_offload;
- for (i = 0; i < netdev_get_num_tc(dev); i++) {
- opt.count[i] = dev->tc_to_txq[i].count;
- opt.offset[i] = dev->tc_to_txq[i].offset;
+ for (tc = 0; tc < netdev_get_num_tc(dev); tc++) {
+ opt.count[tc] = dev->tc_to_txq[tc].count;
+ opt.offset[tc] = dev->tc_to_txq[tc].offset;
}
if (nla_put(skb, TCA_OPTIONS, NLA_ALIGN(sizeof(opt)), &opt))
@@ -495,7 +513,6 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
if (cl >= TC_H_MIN_PRIORITY) {
int i;
__u32 qlen = 0;
- struct Qdisc *qdisc;
struct gnet_stats_queue qstats = {0};
struct gnet_stats_basic_packed bstats = {0};
struct net_device *dev = qdisc_dev(sch);
@@ -511,18 +528,26 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
for (i = tc.offset; i < tc.offset + tc.count; i++) {
struct netdev_queue *q = netdev_get_tx_queue(dev, i);
+ struct Qdisc *qdisc = rtnl_dereference(q->qdisc);
+ struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
+ struct gnet_stats_queue __percpu *cpu_qstats = NULL;
- qdisc = rtnl_dereference(q->qdisc);
spin_lock_bh(qdisc_lock(qdisc));
- qlen += qdisc->q.qlen;
- bstats.bytes += qdisc->bstats.bytes;
- bstats.packets += qdisc->bstats.packets;
- qstats.backlog += qdisc->qstats.backlog;
- qstats.drops += qdisc->qstats.drops;
- qstats.requeues += qdisc->qstats.requeues;
- qstats.overlimits += qdisc->qstats.overlimits;
+ if (qdisc_is_percpu_stats(qdisc)) {
+ cpu_bstats = qdisc->cpu_bstats;
+ cpu_qstats = qdisc->cpu_qstats;
+ }
+
+ qlen = qdisc_qlen_sum(qdisc);
+ __gnet_stats_copy_basic(NULL, &sch->bstats,
+ cpu_bstats, &qdisc->bstats);
+ __gnet_stats_copy_queue(&sch->qstats,
+ cpu_qstats,
+ &qdisc->qstats,
+ qlen);
spin_unlock_bh(qdisc_lock(qdisc));
}
+
/* Reclaim root sleeping lock before completing stats */
if (d->lock)
spin_lock_bh(d->lock);
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index 1ca84a288443..54bd9c1a8aa1 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -14,7 +14,7 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
tsnmap.o bind_addr.o socket.o primitive.o \
output.o input.o debug.o stream.o auth.o \
offload.o stream_sched.o stream_sched_prio.o \
- stream_sched_rr.o
+ stream_sched_rr.o stream_interleave.o
sctp_probe-y := probe.o
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 69394f4d6091..837806dd5799 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -861,7 +861,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
event = sctp_ulpevent_make_peer_addr_change(asoc, &addr,
0, spc_state, error, GFP_ATOMIC);
if (event)
- sctp_ulpq_tail_event(&asoc->ulpq, event);
+ asoc->stream.si->enqueue_event(&asoc->ulpq, event);
}
/* Select new active and retran paths. */
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 7f8baa48e7c2..991a530c6b31 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -124,7 +124,7 @@ static void sctp_datamsg_destroy(struct sctp_datamsg *msg)
ev = sctp_ulpevent_make_send_failed(asoc, chunk, sent,
error, GFP_ATOMIC);
if (ev)
- sctp_ulpq_tail_event(&asoc->ulpq, ev);
+ asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
}
sctp_chunk_put(chunk);
@@ -191,7 +191,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
*/
max_data = asoc->pathmtu -
sctp_sk(asoc->base.sk)->pf->af->net_header_len -
- sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk);
+ sizeof(struct sctphdr) - sctp_datachk_len(&asoc->stream);
max_data = SCTP_TRUNC4(max_data);
/* If the the peer requested that we authenticate DATA chunks
@@ -264,8 +264,8 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
frag |= SCTP_DATA_SACK_IMM;
}
- chunk = sctp_make_datafrag_empty(asoc, sinfo, len, frag,
- 0, GFP_KERNEL);
+ chunk = asoc->stream.si->make_datafrag(asoc, sinfo, len, frag,
+ GFP_KERNEL);
if (!chunk) {
err = -ENOMEM;
goto errout;
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 4a865cd06d76..01a26ee051e3 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -313,6 +313,7 @@ static enum sctp_xmit __sctp_packet_append_chunk(struct sctp_packet *packet,
/* We believe that this chunk is OK to add to the packet */
switch (chunk->chunk_hdr->type) {
case SCTP_CID_DATA:
+ case SCTP_CID_I_DATA:
/* Account for the data being in the packet */
sctp_packet_append_data(packet, chunk);
/* Disallow SACK bundling after DATA. */
@@ -724,7 +725,7 @@ static enum sctp_xmit sctp_packet_can_append_data(struct sctp_packet *packet,
* or delay in hopes of bundling a full sized packet.
*/
if (chunk->skb->len + q->out_qlen > transport->pathmtu -
- packet->overhead - sizeof(struct sctp_data_chunk) - 4)
+ packet->overhead - sctp_datachk_len(&chunk->asoc->stream) - 4)
/* Enough data queued to fill a packet */
return SCTP_XMIT_OK;
@@ -759,7 +760,7 @@ static void sctp_packet_append_data(struct sctp_packet *packet,
asoc->peer.rwnd = rwnd;
sctp_chunk_assign_tsn(chunk);
- sctp_chunk_assign_ssn(chunk);
+ asoc->stream.si->assign_number(chunk);
}
static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet,
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 7d67feeeffc1..af9b5ebcae50 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -67,8 +67,6 @@ static void sctp_mark_missing(struct sctp_outq *q,
__u32 highest_new_tsn,
int count_of_newacks);
-static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 sack_ctsn);
-
static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp);
/* Add data to the front of the queue. */
@@ -591,7 +589,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
* following the procedures outlined in C1 - C5.
*/
if (reason == SCTP_RTXR_T3_RTX)
- sctp_generate_fwdtsn(q, q->asoc->ctsn_ack_point);
+ q->asoc->stream.si->generate_ftsn(q, q->asoc->ctsn_ack_point);
/* Flush the queues only on timeout, since fast_rtx is only
* triggered during sack processing and the queue
@@ -942,6 +940,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
case SCTP_CID_ECN_ECNE:
case SCTP_CID_ASCONF:
case SCTP_CID_FWD_TSN:
+ case SCTP_CID_I_FWD_TSN:
case SCTP_CID_RECONF:
status = sctp_packet_transmit_chunk(packet, chunk,
one_packet, gfp);
@@ -956,7 +955,8 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
* sender MUST assure that at least one T3-rtx
* timer is running.
*/
- if (chunk->chunk_hdr->type == SCTP_CID_FWD_TSN) {
+ if (chunk->chunk_hdr->type == SCTP_CID_FWD_TSN ||
+ chunk->chunk_hdr->type == SCTP_CID_I_FWD_TSN) {
sctp_transport_reset_t3_rtx(transport);
transport->last_time_sent = jiffies;
}
@@ -1372,7 +1372,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk)
asoc->peer.rwnd = sack_a_rwnd;
- sctp_generate_fwdtsn(q, sack_ctsn);
+ asoc->stream.si->generate_ftsn(q, sack_ctsn);
pr_debug("%s: sack cumulative tsn ack:0x%x\n", __func__, sack_ctsn);
pr_debug("%s: cumulative tsn ack of assoc:%p is 0x%x, "
@@ -1795,7 +1795,7 @@ static inline int sctp_get_skip_pos(struct sctp_fwdtsn_skip *skiplist,
}
/* Create and add a fwdtsn chunk to the outq's control queue if needed. */
-static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 ctsn)
+void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 ctsn)
{
struct sctp_association *asoc = q->asoc;
struct sctp_chunk *ftsn_chunk = NULL;
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 26b4be6b4172..4545bc2aff84 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -288,12 +288,8 @@ struct sctp_ht_iter {
static void *sctp_transport_seq_start(struct seq_file *seq, loff_t *pos)
{
struct sctp_ht_iter *iter = seq->private;
- int err = sctp_transport_walk_start(&iter->hti);
- if (err) {
- iter->start_fail = 1;
- return ERR_PTR(err);
- }
+ sctp_transport_walk_start(&iter->hti);
iter->start_fail = 0;
return sctp_transport_get_idx(seq_file_net(seq), &iter->hti, *pos);
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 9bf575f2e8ed..b9b269cf615e 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -228,7 +228,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
struct sctp_inithdr init;
union sctp_params addrs;
struct sctp_sock *sp;
- __u8 extensions[4];
+ __u8 extensions[5];
size_t chunksize;
__be16 types[2];
int num_ext = 0;
@@ -278,6 +278,11 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
if (sp->adaptation_ind)
chunksize += sizeof(aiparam);
+ if (sp->strm_interleave) {
+ extensions[num_ext] = SCTP_CID_I_DATA;
+ num_ext += 1;
+ }
+
chunksize += vparam_len;
/* Account for AUTH related parameters */
@@ -392,7 +397,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
struct sctp_inithdr initack;
union sctp_params addrs;
struct sctp_sock *sp;
- __u8 extensions[4];
+ __u8 extensions[5];
size_t chunksize;
int num_ext = 0;
int cookie_len;
@@ -442,6 +447,11 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
if (sp->adaptation_ind)
chunksize += sizeof(aiparam);
+ if (asoc->intl_enable) {
+ extensions[num_ext] = SCTP_CID_I_DATA;
+ num_ext += 1;
+ }
+
if (asoc->peer.auth_capable) {
auth_random = (struct sctp_paramhdr *)asoc->c.auth_random;
chunksize += ntohs(auth_random->length);
@@ -711,38 +721,31 @@ nodata:
/* Make a DATA chunk for the given association from the provided
* parameters. However, do not populate the data payload.
*/
-struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc,
+struct sctp_chunk *sctp_make_datafrag_empty(const struct sctp_association *asoc,
const struct sctp_sndrcvinfo *sinfo,
- int data_len, __u8 flags, __u16 ssn,
- gfp_t gfp)
+ int len, __u8 flags, gfp_t gfp)
{
struct sctp_chunk *retval;
struct sctp_datahdr dp;
- int chunk_len;
/* We assign the TSN as LATE as possible, not here when
* creating the chunk.
*/
- dp.tsn = 0;
+ memset(&dp, 0, sizeof(dp));
+ dp.ppid = sinfo->sinfo_ppid;
dp.stream = htons(sinfo->sinfo_stream);
- dp.ppid = sinfo->sinfo_ppid;
/* Set the flags for an unordered send. */
- if (sinfo->sinfo_flags & SCTP_UNORDERED) {
+ if (sinfo->sinfo_flags & SCTP_UNORDERED)
flags |= SCTP_DATA_UNORDERED;
- dp.ssn = 0;
- } else
- dp.ssn = htons(ssn);
- chunk_len = sizeof(dp) + data_len;
- retval = sctp_make_data(asoc, flags, chunk_len, gfp);
+ retval = sctp_make_data(asoc, flags, sizeof(dp) + len, gfp);
if (!retval)
- goto nodata;
+ return NULL;
retval->subh.data_hdr = sctp_addto_chunk(retval, sizeof(dp), &dp);
memcpy(&retval->sinfo, sinfo, sizeof(struct sctp_sndrcvinfo));
-nodata:
return retval;
}
@@ -1415,6 +1418,12 @@ static struct sctp_chunk *sctp_make_data(const struct sctp_association *asoc,
return _sctp_make_chunk(asoc, SCTP_CID_DATA, flags, paylen, gfp);
}
+struct sctp_chunk *sctp_make_idata(const struct sctp_association *asoc,
+ __u8 flags, int paylen, gfp_t gfp)
+{
+ return _sctp_make_chunk(asoc, SCTP_CID_I_DATA, flags, paylen, gfp);
+}
+
static struct sctp_chunk *sctp_make_control(const struct sctp_association *asoc,
__u8 type, __u8 flags, int paylen,
gfp_t gfp)
@@ -2032,6 +2041,10 @@ static void sctp_process_ext_param(struct sctp_association *asoc,
if (net->sctp.addip_enable)
asoc->peer.asconf_capable = 1;
break;
+ case SCTP_CID_I_DATA:
+ if (sctp_sk(asoc->base.sk)->strm_interleave)
+ asoc->intl_enable = 1;
+ break;
default:
break;
}
@@ -3523,6 +3536,30 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc,
return retval;
}
+struct sctp_chunk *sctp_make_ifwdtsn(const struct sctp_association *asoc,
+ __u32 new_cum_tsn, size_t nstreams,
+ struct sctp_ifwdtsn_skip *skiplist)
+{
+ struct sctp_chunk *retval = NULL;
+ struct sctp_ifwdtsn_hdr ftsn_hdr;
+ size_t hint;
+
+ hint = (nstreams + 1) * sizeof(__u32);
+
+ retval = sctp_make_control(asoc, SCTP_CID_I_FWD_TSN, 0, hint,
+ GFP_ATOMIC);
+ if (!retval)
+ return NULL;
+
+ ftsn_hdr.new_cum_tsn = htonl(new_cum_tsn);
+ retval->subh.ifwdtsn_hdr =
+ sctp_addto_chunk(retval, sizeof(ftsn_hdr), &ftsn_hdr);
+
+ sctp_addto_chunk(retval, nstreams * sizeof(skiplist[0]), skiplist);
+
+ return retval;
+}
+
/* RE-CONFIG 3.1 (RE-CONFIG chunk)
* 0 1 2 3
* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index df94d77401e7..16ddf2ca1438 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -632,7 +632,7 @@ static void sctp_cmd_assoc_failed(struct sctp_cmd_seq *commands,
struct sctp_chunk *abort;
/* Cancel any partial delivery in progress. */
- sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
+ asoc->stream.si->abort_pd(&asoc->ulpq, GFP_ATOMIC);
if (event_type == SCTP_EVENT_T_CHUNK && subtype.chunk == SCTP_CID_ABORT)
event = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_LOST,
@@ -972,7 +972,7 @@ static void sctp_cmd_process_operr(struct sctp_cmd_seq *cmds,
if (!ev)
return;
- sctp_ulpq_tail_event(&asoc->ulpq, ev);
+ asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
switch (err_hdr->cause) {
case SCTP_ERROR_UNKNOWN_CHUNK:
@@ -1007,18 +1007,6 @@ static void sctp_cmd_process_operr(struct sctp_cmd_seq *cmds,
}
}
-/* Process variable FWDTSN chunk information. */
-static void sctp_cmd_process_fwdtsn(struct sctp_ulpq *ulpq,
- struct sctp_chunk *chunk)
-{
- struct sctp_fwdtsn_skip *skip;
-
- /* Walk through all the skipped SSNs */
- sctp_walk_fwdtsn(skip, chunk) {
- sctp_ulpq_skip(ulpq, ntohs(skip->stream), ntohs(skip->ssn));
- }
-}
-
/* Helper function to remove the association non-primary peer
* transports.
*/
@@ -1058,7 +1046,7 @@ static void sctp_cmd_assoc_change(struct sctp_cmd_seq *commands,
asoc->c.sinit_max_instreams,
NULL, GFP_ATOMIC);
if (ev)
- sctp_ulpq_tail_event(&asoc->ulpq, ev);
+ asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
}
/* Helper function to generate an adaptation indication event */
@@ -1070,7 +1058,7 @@ static void sctp_cmd_adaptation_ind(struct sctp_cmd_seq *commands,
ev = sctp_ulpevent_make_adaptation_indication(asoc, GFP_ATOMIC);
if (ev)
- sctp_ulpq_tail_event(&asoc->ulpq, ev);
+ asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
}
@@ -1368,18 +1356,12 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
break;
case SCTP_CMD_REPORT_FWDTSN:
- /* Move the Cumulattive TSN Ack ahead. */
- sctp_tsnmap_skip(&asoc->peer.tsn_map, cmd->obj.u32);
-
- /* purge the fragmentation queue */
- sctp_ulpq_reasm_flushtsn(&asoc->ulpq, cmd->obj.u32);
-
- /* Abort any in progress partial delivery. */
- sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
+ asoc->stream.si->report_ftsn(&asoc->ulpq, cmd->obj.u32);
break;
case SCTP_CMD_PROCESS_FWDTSN:
- sctp_cmd_process_fwdtsn(&asoc->ulpq, cmd->obj.chunk);
+ asoc->stream.si->handle_ftsn(&asoc->ulpq,
+ cmd->obj.chunk);
break;
case SCTP_CMD_GEN_SACK:
@@ -1483,8 +1465,9 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
pr_debug("%s: sm_sideff: chunk_up:%p, ulpq:%p\n",
__func__, cmd->obj.chunk, &asoc->ulpq);
- sctp_ulpq_tail_data(&asoc->ulpq, cmd->obj.chunk,
- GFP_ATOMIC);
+ asoc->stream.si->ulpevent_data(&asoc->ulpq,
+ cmd->obj.chunk,
+ GFP_ATOMIC);
break;
case SCTP_CMD_EVENT_ULP:
@@ -1492,7 +1475,8 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
pr_debug("%s: sm_sideff: event_up:%p, ulpq:%p\n",
__func__, cmd->obj.ulpevent, &asoc->ulpq);
- sctp_ulpq_tail_event(&asoc->ulpq, cmd->obj.ulpevent);
+ asoc->stream.si->enqueue_event(&asoc->ulpq,
+ cmd->obj.ulpevent);
break;
case SCTP_CMD_REPLY:
@@ -1729,12 +1713,13 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
break;
case SCTP_CMD_PART_DELIVER:
- sctp_ulpq_partial_delivery(&asoc->ulpq, GFP_ATOMIC);
+ asoc->stream.si->start_pd(&asoc->ulpq, GFP_ATOMIC);
break;
case SCTP_CMD_RENEGE:
- sctp_ulpq_renege(&asoc->ulpq, cmd->obj.chunk,
- GFP_ATOMIC);
+ asoc->stream.si->renege_events(&asoc->ulpq,
+ cmd->obj.chunk,
+ GFP_ATOMIC);
break;
case SCTP_CMD_SETUP_T4:
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 8f8ccded13e4..541f34735346 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3013,7 +3013,7 @@ enum sctp_disposition sctp_sf_eat_data_6_2(struct net *net,
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
- if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_data_chunk)))
+ if (!sctp_chunk_length_valid(chunk, sctp_datachk_len(&asoc->stream)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
@@ -3034,7 +3034,7 @@ enum sctp_disposition sctp_sf_eat_data_6_2(struct net *net,
case SCTP_IERROR_PROTO_VIOLATION:
return sctp_sf_abort_violation(net, ep, asoc, chunk, commands,
(u8 *)chunk->subh.data_hdr,
- sizeof(struct sctp_datahdr));
+ sctp_datahdr_len(&asoc->stream));
default:
BUG();
}
@@ -3133,7 +3133,7 @@ enum sctp_disposition sctp_sf_eat_data_fast_4_4(
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
- if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_data_chunk)))
+ if (!sctp_chunk_length_valid(chunk, sctp_datachk_len(&asoc->stream)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
@@ -3150,7 +3150,7 @@ enum sctp_disposition sctp_sf_eat_data_fast_4_4(
case SCTP_IERROR_PROTO_VIOLATION:
return sctp_sf_abort_violation(net, ep, asoc, chunk, commands,
(u8 *)chunk->subh.data_hdr,
- sizeof(struct sctp_datahdr));
+ sctp_datahdr_len(&asoc->stream));
default:
BUG();
}
@@ -3957,7 +3957,6 @@ enum sctp_disposition sctp_sf_eat_fwd_tsn(struct net *net,
{
struct sctp_fwdtsn_hdr *fwdtsn_hdr;
struct sctp_chunk *chunk = arg;
- struct sctp_fwdtsn_skip *skip;
__u16 len;
__u32 tsn;
@@ -3971,7 +3970,7 @@ enum sctp_disposition sctp_sf_eat_fwd_tsn(struct net *net,
return sctp_sf_unk_chunk(net, ep, asoc, type, arg, commands);
/* Make sure that the FORWARD_TSN chunk has valid length. */
- if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_fwdtsn_chunk)))
+ if (!sctp_chunk_length_valid(chunk, sctp_ftsnchk_len(&asoc->stream)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
@@ -3990,14 +3989,11 @@ enum sctp_disposition sctp_sf_eat_fwd_tsn(struct net *net,
if (sctp_tsnmap_check(&asoc->peer.tsn_map, tsn) < 0)
goto discard_noforce;
- /* Silently discard the chunk if stream-id is not valid */
- sctp_walk_fwdtsn(skip, chunk) {
- if (ntohs(skip->stream) >= asoc->stream.incnt)
- goto discard_noforce;
- }
+ if (!asoc->stream.si->validate_ftsn(chunk))
+ goto discard_noforce;
sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_FWDTSN, SCTP_U32(tsn));
- if (len > sizeof(struct sctp_fwdtsn_hdr))
+ if (len > sctp_ftsnhdr_len(&asoc->stream))
sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_FWDTSN,
SCTP_CHUNK(chunk));
@@ -4028,7 +4024,6 @@ enum sctp_disposition sctp_sf_eat_fwd_tsn_fast(
{
struct sctp_fwdtsn_hdr *fwdtsn_hdr;
struct sctp_chunk *chunk = arg;
- struct sctp_fwdtsn_skip *skip;
__u16 len;
__u32 tsn;
@@ -4042,7 +4037,7 @@ enum sctp_disposition sctp_sf_eat_fwd_tsn_fast(
return sctp_sf_unk_chunk(net, ep, asoc, type, arg, commands);
/* Make sure that the FORWARD_TSN chunk has a valid length. */
- if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_fwdtsn_chunk)))
+ if (!sctp_chunk_length_valid(chunk, sctp_ftsnchk_len(&asoc->stream)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
@@ -4061,14 +4056,11 @@ enum sctp_disposition sctp_sf_eat_fwd_tsn_fast(
if (sctp_tsnmap_check(&asoc->peer.tsn_map, tsn) < 0)
goto gen_shutdown;
- /* Silently discard the chunk if stream-id is not valid */
- sctp_walk_fwdtsn(skip, chunk) {
- if (ntohs(skip->stream) >= asoc->stream.incnt)
- goto gen_shutdown;
- }
+ if (!asoc->stream.si->validate_ftsn(chunk))
+ goto gen_shutdown;
sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_FWDTSN, SCTP_U32(tsn));
- if (len > sizeof(struct sctp_fwdtsn_hdr))
+ if (len > sctp_ftsnhdr_len(&asoc->stream))
sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_FWDTSN,
SCTP_CHUNK(chunk));
@@ -6244,14 +6236,12 @@ static int sctp_eat_data(const struct sctp_association *asoc,
struct sctp_chunk *err;
enum sctp_verb deliver;
size_t datalen;
- u8 ordered = 0;
- u16 ssn, sid;
__u32 tsn;
int tmp;
data_hdr = (struct sctp_datahdr *)chunk->skb->data;
chunk->subh.data_hdr = data_hdr;
- skb_pull(chunk->skb, sizeof(*data_hdr));
+ skb_pull(chunk->skb, sctp_datahdr_len(&asoc->stream));
tsn = ntohl(data_hdr->tsn);
pr_debug("%s: TSN 0x%x\n", __func__, tsn);
@@ -6299,7 +6289,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
* Actually, allow a little bit of overflow (up to a MTU).
*/
datalen = ntohs(chunk->chunk_hdr->length);
- datalen -= sizeof(struct sctp_data_chunk);
+ datalen -= sctp_datachk_len(&asoc->stream);
deliver = SCTP_CMD_CHUNK_ULP;
@@ -6394,7 +6384,6 @@ static int sctp_eat_data(const struct sctp_association *asoc,
SCTP_INC_STATS(net, SCTP_MIB_INORDERCHUNKS);
if (chunk->asoc)
chunk->asoc->stats.iodchunks++;
- ordered = 1;
}
/* RFC 2960 6.5 Stream Identifier and Stream Sequence Number
@@ -6405,8 +6394,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
* with cause set to "Invalid Stream Identifier" (See Section 3.3.10)
* and discard the DATA chunk.
*/
- sid = ntohs(data_hdr->stream);
- if (sid >= asoc->stream.incnt) {
+ if (ntohs(data_hdr->stream) >= asoc->stream.incnt) {
/* Mark tsn as received even though we drop it */
sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn));
@@ -6427,8 +6415,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
* SSN is smaller then the next expected one. If it is, it wrapped
* and is invalid.
*/
- ssn = ntohs(data_hdr->ssn);
- if (ordered && SSN_lt(ssn, sctp_ssn_peek(&asoc->stream, in, sid)))
+ if (!asoc->stream.si->validate_data(chunk))
return SCTP_IERROR_PROTO_VIOLATION;
/* Send the data up to the user. Note: Schedule the
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 79b6bee5b768..691d9dc620e3 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -985,11 +985,14 @@ static const struct sctp_sm_table_entry *sctp_chunk_event_lookup(
if (state > SCTP_STATE_MAX)
return &bug;
+ if (cid == SCTP_CID_I_DATA)
+ cid = SCTP_CID_DATA;
+
if (cid <= SCTP_CID_BASE_MAX)
return &chunk_event_table[cid][state];
if (net->sctp.prsctp_enable) {
- if (cid == SCTP_CID_FWD_TSN)
+ if (cid == SCTP_CID_FWD_TSN || cid == SCTP_CID_I_FWD_TSN)
return &prsctp_chunk_event_table[0][state];
}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 3253f724a995..5e4100df7bae 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -201,6 +201,22 @@ static void sctp_for_each_tx_datachunk(struct sctp_association *asoc,
cb(chunk);
}
+static void sctp_for_each_rx_skb(struct sctp_association *asoc, struct sock *sk,
+ void (*cb)(struct sk_buff *, struct sock *))
+
+{
+ struct sk_buff *skb, *tmp;
+
+ sctp_skb_for_each(skb, &asoc->ulpq.lobby, tmp)
+ cb(skb, sk);
+
+ sctp_skb_for_each(skb, &asoc->ulpq.reasm, tmp)
+ cb(skb, sk);
+
+ sctp_skb_for_each(skb, &asoc->ulpq.reasm_uo, tmp)
+ cb(skb, sk);
+}
+
/* Verify that this is a valid address. */
static inline int sctp_verify_addr(struct sock *sk, union sctp_addr *addr,
int len)
@@ -1554,6 +1570,7 @@ static void sctp_close(struct sock *sk, long timeout)
if (data_was_unread || !skb_queue_empty(&asoc->ulpq.lobby) ||
!skb_queue_empty(&asoc->ulpq.reasm) ||
+ !skb_queue_empty(&asoc->ulpq.reasm_uo) ||
(sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime)) {
struct sctp_chunk *chunk;
@@ -2002,7 +2019,20 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
if (err < 0)
goto out_free;
- wait_connect = true;
+ /* If stream interleave is enabled, wait_connect has to be
+ * done earlier than data enqueue, as it needs to make data
+ * or idata according to asoc->intl_enable which is set
+ * after connection is done.
+ */
+ if (sctp_sk(asoc->base.sk)->strm_interleave) {
+ timeo = sock_sndtimeo(sk, 0);
+ err = sctp_wait_for_connect(asoc, &timeo);
+ if (err)
+ goto out_unlock;
+ } else {
+ wait_connect = true;
+ }
+
pr_debug("%s: we associated primitively\n", __func__);
}
@@ -2281,7 +2311,7 @@ static int sctp_setsockopt_events(struct sock *sk, char __user *optval,
if (!event)
return -ENOMEM;
- sctp_ulpq_tail_event(&asoc->ulpq, event);
+ asoc->stream.si->enqueue_event(&asoc->ulpq, event);
}
}
@@ -3180,7 +3210,7 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
if (val == 0) {
val = asoc->pathmtu - sp->pf->af->net_header_len;
val -= sizeof(struct sctphdr) +
- sizeof(struct sctp_data_chunk);
+ sctp_datachk_len(&asoc->stream);
}
asoc->user_frag = val;
asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu);
@@ -3350,7 +3380,10 @@ static int sctp_setsockopt_fragment_interleave(struct sock *sk,
if (get_user(val, (int __user *)optval))
return -EFAULT;
- sctp_sk(sk)->frag_interleave = (val == 0) ? 0 : 1;
+ sctp_sk(sk)->frag_interleave = !!val;
+
+ if (!sctp_sk(sk)->frag_interleave)
+ sctp_sk(sk)->strm_interleave = 0;
return 0;
}
@@ -4023,6 +4056,40 @@ out:
return retval;
}
+static int sctp_setsockopt_interleaving_supported(struct sock *sk,
+ char __user *optval,
+ unsigned int optlen)
+{
+ struct sctp_sock *sp = sctp_sk(sk);
+ struct net *net = sock_net(sk);
+ struct sctp_assoc_value params;
+ int retval = -EINVAL;
+
+ if (optlen < sizeof(params))
+ goto out;
+
+ optlen = sizeof(params);
+ if (copy_from_user(&params, optval, optlen)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ if (params.assoc_id)
+ goto out;
+
+ if (!net->sctp.intl_enable || !sp->frag_interleave) {
+ retval = -EPERM;
+ goto out;
+ }
+
+ sp->strm_interleave = !!params.assoc_value;
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
/* API 6.2 setsockopt(), getsockopt()
*
* Applications use setsockopt() and getsockopt() to set or retrieve
@@ -4210,6 +4277,10 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
case SCTP_STREAM_SCHEDULER_VALUE:
retval = sctp_setsockopt_scheduler_value(sk, optval, optlen);
break;
+ case SCTP_INTERLEAVING_SUPPORTED:
+ retval = sctp_setsockopt_interleaving_supported(sk, optval,
+ optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
@@ -4680,20 +4751,11 @@ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc,
EXPORT_SYMBOL_GPL(sctp_get_sctp_info);
/* use callback to avoid exporting the core structure */
-int sctp_transport_walk_start(struct rhashtable_iter *iter)
+void sctp_transport_walk_start(struct rhashtable_iter *iter)
{
- int err;
-
rhltable_walk_enter(&sctp_transport_hashtable, iter);
- err = rhashtable_walk_start(iter);
- if (err && err != -EAGAIN) {
- rhashtable_walk_stop(iter);
- rhashtable_walk_exit(iter);
- return err;
- }
-
- return 0;
+ rhashtable_walk_start(iter);
}
void sctp_transport_walk_stop(struct rhashtable_iter *iter)
@@ -4784,12 +4846,10 @@ int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
struct net *net, int *pos, void *p) {
struct rhashtable_iter hti;
struct sctp_transport *tsp;
- int ret;
+ int ret = 0;
again:
- ret = sctp_transport_walk_start(&hti);
- if (ret)
- return ret;
+ sctp_transport_walk_start(&hti);
tsp = sctp_transport_get_idx(net, &hti, *pos + 1);
for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) {
@@ -6984,6 +7044,47 @@ out:
return retval;
}
+static int sctp_getsockopt_interleaving_supported(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_assoc_value params;
+ struct sctp_association *asoc;
+ int retval = -EFAULT;
+
+ if (len < sizeof(params)) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ len = sizeof(params);
+ if (copy_from_user(&params, optval, len))
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (asoc) {
+ params.assoc_value = asoc->intl_enable;
+ } else if (!params.assoc_id) {
+ struct sctp_sock *sp = sctp_sk(sk);
+
+ params.assoc_value = sp->strm_interleave;
+ } else {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ if (put_user(len, optlen))
+ goto out;
+
+ if (copy_to_user(optval, &params, len))
+ goto out;
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
static int sctp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
@@ -7174,6 +7275,10 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
retval = sctp_getsockopt_scheduler_value(sk, len, optval,
optlen);
break;
+ case SCTP_INTERLEAVING_SUPPORTED:
+ retval = sctp_getsockopt_interleaving_supported(sk, len, optval,
+ optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
@@ -8411,11 +8516,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
}
- sctp_skb_for_each(skb, &assoc->ulpq.reasm, tmp)
- sctp_skb_set_owner_r_frag(skb, newsk);
-
- sctp_skb_for_each(skb, &assoc->ulpq.lobby, tmp)
- sctp_skb_set_owner_r_frag(skb, newsk);
+ sctp_for_each_rx_skb(assoc, newsk, sctp_skb_set_owner_r_frag);
/* Set the type of socket to indicate that it is peeled off from the
* original UDP-style socket or created with the accept() call on a
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 76ea66be0bbe..06b644dd858c 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -167,6 +167,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
sched->init(stream);
in:
+ sctp_stream_interleave_init(stream);
if (!incnt)
goto out;
@@ -215,11 +216,13 @@ void sctp_stream_clear(struct sctp_stream *stream)
{
int i;
- for (i = 0; i < stream->outcnt; i++)
- stream->out[i].ssn = 0;
+ for (i = 0; i < stream->outcnt; i++) {
+ stream->out[i].mid = 0;
+ stream->out[i].mid_uo = 0;
+ }
for (i = 0; i < stream->incnt; i++)
- stream->in[i].ssn = 0;
+ stream->in[i].mid = 0;
}
void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new)
@@ -606,10 +609,10 @@ struct sctp_chunk *sctp_process_strreset_outreq(
}
for (i = 0; i < nums; i++)
- stream->in[ntohs(str_p[i])].ssn = 0;
+ stream->in[ntohs(str_p[i])].mid = 0;
} else {
for (i = 0; i < stream->incnt; i++)
- stream->in[i].ssn = 0;
+ stream->in[i].mid = 0;
}
result = SCTP_STRRESET_PERFORMED;
@@ -753,8 +756,7 @@ struct sctp_chunk *sctp_process_strreset_tsnreq(
* performed.
*/
max_tsn_seen = sctp_tsnmap_get_max_tsn_seen(&asoc->peer.tsn_map);
- sctp_ulpq_reasm_flushtsn(&asoc->ulpq, max_tsn_seen);
- sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
+ asoc->stream.si->report_ftsn(&asoc->ulpq, max_tsn_seen);
/* G1: Compute an appropriate value for the Receiver's Next TSN -- the
* TSN that the peer should use to send the next DATA chunk. The
@@ -783,10 +785,12 @@ struct sctp_chunk *sctp_process_strreset_tsnreq(
/* G5: The next expected and outgoing SSNs MUST be reset to 0 for all
* incoming and outgoing streams.
*/
- for (i = 0; i < stream->outcnt; i++)
- stream->out[i].ssn = 0;
+ for (i = 0; i < stream->outcnt; i++) {
+ stream->out[i].mid = 0;
+ stream->out[i].mid_uo = 0;
+ }
for (i = 0; i < stream->incnt; i++)
- stream->in[i].ssn = 0;
+ stream->in[i].mid = 0;
result = SCTP_STRRESET_PERFORMED;
@@ -976,11 +980,15 @@ struct sctp_chunk *sctp_process_strreset_resp(
if (result == SCTP_STRRESET_PERFORMED) {
if (nums) {
- for (i = 0; i < nums; i++)
- stream->out[ntohs(str_p[i])].ssn = 0;
+ for (i = 0; i < nums; i++) {
+ stream->out[ntohs(str_p[i])].mid = 0;
+ stream->out[ntohs(str_p[i])].mid_uo = 0;
+ }
} else {
- for (i = 0; i < stream->outcnt; i++)
- stream->out[i].ssn = 0;
+ for (i = 0; i < stream->outcnt; i++) {
+ stream->out[i].mid = 0;
+ stream->out[i].mid_uo = 0;
+ }
}
flags = SCTP_STREAM_RESET_OUTGOING_SSN;
@@ -1023,8 +1031,7 @@ struct sctp_chunk *sctp_process_strreset_resp(
&asoc->peer.tsn_map);
LIST_HEAD(temp);
- sctp_ulpq_reasm_flushtsn(&asoc->ulpq, mtsn);
- sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
+ asoc->stream.si->report_ftsn(&asoc->ulpq, mtsn);
sctp_tsnmap_init(&asoc->peer.tsn_map,
SCTP_TSN_MAP_INITIAL,
@@ -1042,10 +1049,12 @@ struct sctp_chunk *sctp_process_strreset_resp(
asoc->ctsn_ack_point = asoc->next_tsn - 1;
asoc->adv_peer_ack_point = asoc->ctsn_ack_point;
- for (i = 0; i < stream->outcnt; i++)
- stream->out[i].ssn = 0;
+ for (i = 0; i < stream->outcnt; i++) {
+ stream->out[i].mid = 0;
+ stream->out[i].mid_uo = 0;
+ }
for (i = 0; i < stream->incnt; i++)
- stream->in[i].ssn = 0;
+ stream->in[i].mid = 0;
}
for (i = 0; i < stream->outcnt; i++)
diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c
new file mode 100644
index 000000000000..8c7cf8f08711
--- /dev/null
+++ b/net/sctp/stream_interleave.c
@@ -0,0 +1,1334 @@
+/* SCTP kernel implementation
+ * (C) Copyright Red Hat Inc. 2017
+ *
+ * This file is part of the SCTP kernel implementation
+ *
+ * These functions manipulate sctp stream queue/scheduling.
+ *
+ * This SCTP implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This SCTP implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email addresched(es):
+ * lksctp developers <linux-sctp@vger.kernel.org>
+ *
+ * Written or modified by:
+ * Xin Long <lucien.xin@gmail.com>
+ */
+
+#include <net/busy_poll.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+#include <net/sctp/ulpevent.h>
+#include <linux/sctp.h>
+
+static struct sctp_chunk *sctp_make_idatafrag_empty(
+ const struct sctp_association *asoc,
+ const struct sctp_sndrcvinfo *sinfo,
+ int len, __u8 flags, gfp_t gfp)
+{
+ struct sctp_chunk *retval;
+ struct sctp_idatahdr dp;
+
+ memset(&dp, 0, sizeof(dp));
+ dp.stream = htons(sinfo->sinfo_stream);
+
+ if (sinfo->sinfo_flags & SCTP_UNORDERED)
+ flags |= SCTP_DATA_UNORDERED;
+
+ retval = sctp_make_idata(asoc, flags, sizeof(dp) + len, gfp);
+ if (!retval)
+ return NULL;
+
+ retval->subh.idata_hdr = sctp_addto_chunk(retval, sizeof(dp), &dp);
+ memcpy(&retval->sinfo, sinfo, sizeof(struct sctp_sndrcvinfo));
+
+ return retval;
+}
+
+static void sctp_chunk_assign_mid(struct sctp_chunk *chunk)
+{
+ struct sctp_stream *stream;
+ struct sctp_chunk *lchunk;
+ __u32 cfsn = 0;
+ __u16 sid;
+
+ if (chunk->has_mid)
+ return;
+
+ sid = sctp_chunk_stream_no(chunk);
+ stream = &chunk->asoc->stream;
+
+ list_for_each_entry(lchunk, &chunk->msg->chunks, frag_list) {
+ struct sctp_idatahdr *hdr;
+ __u32 mid;
+
+ lchunk->has_mid = 1;
+
+ hdr = lchunk->subh.idata_hdr;
+
+ if (lchunk->chunk_hdr->flags & SCTP_DATA_FIRST_FRAG)
+ hdr->ppid = lchunk->sinfo.sinfo_ppid;
+ else
+ hdr->fsn = htonl(cfsn++);
+
+ if (lchunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) {
+ mid = lchunk->chunk_hdr->flags & SCTP_DATA_LAST_FRAG ?
+ sctp_mid_uo_next(stream, out, sid) :
+ sctp_mid_uo_peek(stream, out, sid);
+ } else {
+ mid = lchunk->chunk_hdr->flags & SCTP_DATA_LAST_FRAG ?
+ sctp_mid_next(stream, out, sid) :
+ sctp_mid_peek(stream, out, sid);
+ }
+ hdr->mid = htonl(mid);
+ }
+}
+
+static bool sctp_validate_data(struct sctp_chunk *chunk)
+{
+ const struct sctp_stream *stream;
+ __u16 sid, ssn;
+
+ if (chunk->chunk_hdr->type != SCTP_CID_DATA)
+ return false;
+
+ if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
+ return true;
+
+ stream = &chunk->asoc->stream;
+ sid = sctp_chunk_stream_no(chunk);
+ ssn = ntohs(chunk->subh.data_hdr->ssn);
+
+ return !SSN_lt(ssn, sctp_ssn_peek(stream, in, sid));
+}
+
+static bool sctp_validate_idata(struct sctp_chunk *chunk)
+{
+ struct sctp_stream *stream;
+ __u32 mid;
+ __u16 sid;
+
+ if (chunk->chunk_hdr->type != SCTP_CID_I_DATA)
+ return false;
+
+ if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
+ return true;
+
+ stream = &chunk->asoc->stream;
+ sid = sctp_chunk_stream_no(chunk);
+ mid = ntohl(chunk->subh.idata_hdr->mid);
+
+ return !MID_lt(mid, sctp_mid_peek(stream, in, sid));
+}
+
+static void sctp_intl_store_reasm(struct sctp_ulpq *ulpq,
+ struct sctp_ulpevent *event)
+{
+ struct sctp_ulpevent *cevent;
+ struct sk_buff *pos;
+
+ pos = skb_peek_tail(&ulpq->reasm);
+ if (!pos) {
+ __skb_queue_tail(&ulpq->reasm, sctp_event2skb(event));
+ return;
+ }
+
+ cevent = sctp_skb2event(pos);
+
+ if (event->stream == cevent->stream &&
+ event->mid == cevent->mid &&
+ (cevent->msg_flags & SCTP_DATA_FIRST_FRAG ||
+ (!(event->msg_flags & SCTP_DATA_FIRST_FRAG) &&
+ event->fsn > cevent->fsn))) {
+ __skb_queue_tail(&ulpq->reasm, sctp_event2skb(event));
+ return;
+ }
+
+ if ((event->stream == cevent->stream &&
+ MID_lt(cevent->mid, event->mid)) ||
+ event->stream > cevent->stream) {
+ __skb_queue_tail(&ulpq->reasm, sctp_event2skb(event));
+ return;
+ }
+
+ skb_queue_walk(&ulpq->reasm, pos) {
+ cevent = sctp_skb2event(pos);
+
+ if (event->stream < cevent->stream ||
+ (event->stream == cevent->stream &&
+ MID_lt(event->mid, cevent->mid)))
+ break;
+
+ if (event->stream == cevent->stream &&
+ event->mid == cevent->mid &&
+ !(cevent->msg_flags & SCTP_DATA_FIRST_FRAG) &&
+ (event->msg_flags & SCTP_DATA_FIRST_FRAG ||
+ event->fsn < cevent->fsn))
+ break;
+ }
+
+ __skb_queue_before(&ulpq->reasm, pos, sctp_event2skb(event));
+}
+
+static struct sctp_ulpevent *sctp_intl_retrieve_partial(
+ struct sctp_ulpq *ulpq,
+ struct sctp_ulpevent *event)
+{
+ struct sk_buff *first_frag = NULL;
+ struct sk_buff *last_frag = NULL;
+ struct sctp_ulpevent *retval;
+ struct sctp_stream_in *sin;
+ struct sk_buff *pos;
+ __u32 next_fsn = 0;
+ int is_last = 0;
+
+ sin = sctp_stream_in(ulpq->asoc, event->stream);
+
+ skb_queue_walk(&ulpq->reasm, pos) {
+ struct sctp_ulpevent *cevent = sctp_skb2event(pos);
+
+ if (cevent->stream < event->stream)
+ continue;
+
+ if (cevent->stream > event->stream ||
+ cevent->mid != sin->mid)
+ break;
+
+ switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) {
+ case SCTP_DATA_FIRST_FRAG:
+ goto out;
+ case SCTP_DATA_MIDDLE_FRAG:
+ if (!first_frag) {
+ if (cevent->fsn == sin->fsn) {
+ first_frag = pos;
+ last_frag = pos;
+ next_fsn = cevent->fsn + 1;
+ }
+ } else if (cevent->fsn == next_fsn) {
+ last_frag = pos;
+ next_fsn++;
+ } else {
+ goto out;
+ }
+ break;
+ case SCTP_DATA_LAST_FRAG:
+ if (!first_frag) {
+ if (cevent->fsn == sin->fsn) {
+ first_frag = pos;
+ last_frag = pos;
+ next_fsn = 0;
+ is_last = 1;
+ }
+ } else if (cevent->fsn == next_fsn) {
+ last_frag = pos;
+ next_fsn = 0;
+ is_last = 1;
+ }
+ goto out;
+ default:
+ goto out;
+ }
+ }
+
+out:
+ if (!first_frag)
+ return NULL;
+
+ retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk),
+ &ulpq->reasm, first_frag,
+ last_frag);
+ if (retval) {
+ sin->fsn = next_fsn;
+ if (is_last) {
+ retval->msg_flags |= MSG_EOR;
+ sin->pd_mode = 0;
+ }
+ }
+
+ return retval;
+}
+
+static struct sctp_ulpevent *sctp_intl_retrieve_reassembled(
+ struct sctp_ulpq *ulpq,
+ struct sctp_ulpevent *event)
+{
+ struct sctp_association *asoc = ulpq->asoc;
+ struct sk_buff *pos, *first_frag = NULL;
+ struct sctp_ulpevent *retval = NULL;
+ struct sk_buff *pd_first = NULL;
+ struct sk_buff *pd_last = NULL;
+ struct sctp_stream_in *sin;
+ __u32 next_fsn = 0;
+ __u32 pd_point = 0;
+ __u32 pd_len = 0;
+ __u32 mid = 0;
+
+ sin = sctp_stream_in(ulpq->asoc, event->stream);
+
+ skb_queue_walk(&ulpq->reasm, pos) {
+ struct sctp_ulpevent *cevent = sctp_skb2event(pos);
+
+ if (cevent->stream < event->stream)
+ continue;
+ if (cevent->stream > event->stream)
+ break;
+
+ if (MID_lt(cevent->mid, event->mid))
+ continue;
+ if (MID_lt(event->mid, cevent->mid))
+ break;
+
+ switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) {
+ case SCTP_DATA_FIRST_FRAG:
+ if (cevent->mid == sin->mid) {
+ pd_first = pos;
+ pd_last = pos;
+ pd_len = pos->len;
+ }
+
+ first_frag = pos;
+ next_fsn = 0;
+ mid = cevent->mid;
+ break;
+
+ case SCTP_DATA_MIDDLE_FRAG:
+ if (first_frag && cevent->mid == mid &&
+ cevent->fsn == next_fsn) {
+ next_fsn++;
+ if (pd_first) {
+ pd_last = pos;
+ pd_len += pos->len;
+ }
+ } else {
+ first_frag = NULL;
+ }
+ break;
+
+ case SCTP_DATA_LAST_FRAG:
+ if (first_frag && cevent->mid == mid &&
+ cevent->fsn == next_fsn)
+ goto found;
+ else
+ first_frag = NULL;
+ break;
+ }
+ }
+
+ if (!pd_first)
+ goto out;
+
+ pd_point = sctp_sk(asoc->base.sk)->pd_point;
+ if (pd_point && pd_point <= pd_len) {
+ retval = sctp_make_reassembled_event(sock_net(asoc->base.sk),
+ &ulpq->reasm,
+ pd_first, pd_last);
+ if (retval) {
+ sin->fsn = next_fsn;
+ sin->pd_mode = 1;
+ }
+ }
+ goto out;
+
+found:
+ retval = sctp_make_reassembled_event(sock_net(asoc->base.sk),
+ &ulpq->reasm,
+ first_frag, pos);
+ if (retval)
+ retval->msg_flags |= MSG_EOR;
+
+out:
+ return retval;
+}
+
+static struct sctp_ulpevent *sctp_intl_reasm(struct sctp_ulpq *ulpq,
+ struct sctp_ulpevent *event)
+{
+ struct sctp_ulpevent *retval = NULL;
+ struct sctp_stream_in *sin;
+
+ if (SCTP_DATA_NOT_FRAG == (event->msg_flags & SCTP_DATA_FRAG_MASK)) {
+ event->msg_flags |= MSG_EOR;
+ return event;
+ }
+
+ sctp_intl_store_reasm(ulpq, event);
+
+ sin = sctp_stream_in(ulpq->asoc, event->stream);
+ if (sin->pd_mode && event->mid == sin->mid &&
+ event->fsn == sin->fsn)
+ retval = sctp_intl_retrieve_partial(ulpq, event);
+
+ if (!retval)
+ retval = sctp_intl_retrieve_reassembled(ulpq, event);
+
+ return retval;
+}
+
+static void sctp_intl_store_ordered(struct sctp_ulpq *ulpq,
+ struct sctp_ulpevent *event)
+{
+ struct sctp_ulpevent *cevent;
+ struct sk_buff *pos;
+
+ pos = skb_peek_tail(&ulpq->lobby);
+ if (!pos) {
+ __skb_queue_tail(&ulpq->lobby, sctp_event2skb(event));
+ return;
+ }
+
+ cevent = (struct sctp_ulpevent *)pos->cb;
+ if (event->stream == cevent->stream &&
+ MID_lt(cevent->mid, event->mid)) {
+ __skb_queue_tail(&ulpq->lobby, sctp_event2skb(event));
+ return;
+ }
+
+ if (event->stream > cevent->stream) {
+ __skb_queue_tail(&ulpq->lobby, sctp_event2skb(event));
+ return;
+ }
+
+ skb_queue_walk(&ulpq->lobby, pos) {
+ cevent = (struct sctp_ulpevent *)pos->cb;
+
+ if (cevent->stream > event->stream)
+ break;
+
+ if (cevent->stream == event->stream &&
+ MID_lt(event->mid, cevent->mid))
+ break;
+ }
+
+ __skb_queue_before(&ulpq->lobby, pos, sctp_event2skb(event));
+}
+
+static void sctp_intl_retrieve_ordered(struct sctp_ulpq *ulpq,
+ struct sctp_ulpevent *event)
+{
+ struct sk_buff_head *event_list;
+ struct sctp_stream *stream;
+ struct sk_buff *pos, *tmp;
+ __u16 sid = event->stream;
+
+ stream = &ulpq->asoc->stream;
+ event_list = (struct sk_buff_head *)sctp_event2skb(event)->prev;
+
+ sctp_skb_for_each(pos, &ulpq->lobby, tmp) {
+ struct sctp_ulpevent *cevent = (struct sctp_ulpevent *)pos->cb;
+
+ if (cevent->stream > sid)
+ break;
+
+ if (cevent->stream < sid)
+ continue;
+
+ if (cevent->mid != sctp_mid_peek(stream, in, sid))
+ break;
+
+ sctp_mid_next(stream, in, sid);
+
+ __skb_unlink(pos, &ulpq->lobby);
+
+ __skb_queue_tail(event_list, pos);
+ }
+}
+
+static struct sctp_ulpevent *sctp_intl_order(struct sctp_ulpq *ulpq,
+ struct sctp_ulpevent *event)
+{
+ struct sctp_stream *stream;
+ __u16 sid;
+
+ stream = &ulpq->asoc->stream;
+ sid = event->stream;
+
+ if (event->mid != sctp_mid_peek(stream, in, sid)) {
+ sctp_intl_store_ordered(ulpq, event);
+ return NULL;
+ }
+
+ sctp_mid_next(stream, in, sid);
+
+ sctp_intl_retrieve_ordered(ulpq, event);
+
+ return event;
+}
+
+static int sctp_enqueue_event(struct sctp_ulpq *ulpq,
+ struct sctp_ulpevent *event)
+{
+ struct sk_buff *skb = sctp_event2skb(event);
+ struct sock *sk = ulpq->asoc->base.sk;
+ struct sctp_sock *sp = sctp_sk(sk);
+ struct sk_buff_head *skb_list;
+
+ skb_list = (struct sk_buff_head *)skb->prev;
+
+ if (sk->sk_shutdown & RCV_SHUTDOWN &&
+ (sk->sk_shutdown & SEND_SHUTDOWN ||
+ !sctp_ulpevent_is_notification(event)))
+ goto out_free;
+
+ if (!sctp_ulpevent_is_notification(event)) {
+ sk_mark_napi_id(sk, skb);
+ sk_incoming_cpu_update(sk);
+ }
+
+ if (!sctp_ulpevent_is_enabled(event, &sp->subscribe))
+ goto out_free;
+
+ if (skb_list)
+ skb_queue_splice_tail_init(skb_list,
+ &sk->sk_receive_queue);
+ else
+ __skb_queue_tail(&sk->sk_receive_queue, skb);
+
+ if (!sp->data_ready_signalled) {
+ sp->data_ready_signalled = 1;
+ sk->sk_data_ready(sk);
+ }
+
+ return 1;
+
+out_free:
+ if (skb_list)
+ sctp_queue_purge_ulpevents(skb_list);
+ else
+ sctp_ulpevent_free(event);
+
+ return 0;
+}
+
+static void sctp_intl_store_reasm_uo(struct sctp_ulpq *ulpq,
+ struct sctp_ulpevent *event)
+{
+ struct sctp_ulpevent *cevent;
+ struct sk_buff *pos;
+
+ pos = skb_peek_tail(&ulpq->reasm_uo);
+ if (!pos) {
+ __skb_queue_tail(&ulpq->reasm_uo, sctp_event2skb(event));
+ return;
+ }
+
+ cevent = sctp_skb2event(pos);
+
+ if (event->stream == cevent->stream &&
+ event->mid == cevent->mid &&
+ (cevent->msg_flags & SCTP_DATA_FIRST_FRAG ||
+ (!(event->msg_flags & SCTP_DATA_FIRST_FRAG) &&
+ event->fsn > cevent->fsn))) {
+ __skb_queue_tail(&ulpq->reasm_uo, sctp_event2skb(event));
+ return;
+ }
+
+ if ((event->stream == cevent->stream &&
+ MID_lt(cevent->mid, event->mid)) ||
+ event->stream > cevent->stream) {
+ __skb_queue_tail(&ulpq->reasm_uo, sctp_event2skb(event));
+ return;
+ }
+
+ skb_queue_walk(&ulpq->reasm_uo, pos) {
+ cevent = sctp_skb2event(pos);
+
+ if (event->stream < cevent->stream ||
+ (event->stream == cevent->stream &&
+ MID_lt(event->mid, cevent->mid)))
+ break;
+
+ if (event->stream == cevent->stream &&
+ event->mid == cevent->mid &&
+ !(cevent->msg_flags & SCTP_DATA_FIRST_FRAG) &&
+ (event->msg_flags & SCTP_DATA_FIRST_FRAG ||
+ event->fsn < cevent->fsn))
+ break;
+ }
+
+ __skb_queue_before(&ulpq->reasm_uo, pos, sctp_event2skb(event));
+}
+
+static struct sctp_ulpevent *sctp_intl_retrieve_partial_uo(
+ struct sctp_ulpq *ulpq,
+ struct sctp_ulpevent *event)
+{
+ struct sk_buff *first_frag = NULL;
+ struct sk_buff *last_frag = NULL;
+ struct sctp_ulpevent *retval;
+ struct sctp_stream_in *sin;
+ struct sk_buff *pos;
+ __u32 next_fsn = 0;
+ int is_last = 0;
+
+ sin = sctp_stream_in(ulpq->asoc, event->stream);
+
+ skb_queue_walk(&ulpq->reasm_uo, pos) {
+ struct sctp_ulpevent *cevent = sctp_skb2event(pos);
+
+ if (cevent->stream < event->stream)
+ continue;
+ if (cevent->stream > event->stream)
+ break;
+
+ if (MID_lt(cevent->mid, sin->mid_uo))
+ continue;
+ if (MID_lt(sin->mid_uo, cevent->mid))
+ break;
+
+ switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) {
+ case SCTP_DATA_FIRST_FRAG:
+ goto out;
+ case SCTP_DATA_MIDDLE_FRAG:
+ if (!first_frag) {
+ if (cevent->fsn == sin->fsn_uo) {
+ first_frag = pos;
+ last_frag = pos;
+ next_fsn = cevent->fsn + 1;
+ }
+ } else if (cevent->fsn == next_fsn) {
+ last_frag = pos;
+ next_fsn++;
+ } else {
+ goto out;
+ }
+ break;
+ case SCTP_DATA_LAST_FRAG:
+ if (!first_frag) {
+ if (cevent->fsn == sin->fsn_uo) {
+ first_frag = pos;
+ last_frag = pos;
+ next_fsn = 0;
+ is_last = 1;
+ }
+ } else if (cevent->fsn == next_fsn) {
+ last_frag = pos;
+ next_fsn = 0;
+ is_last = 1;
+ }
+ goto out;
+ default:
+ goto out;
+ }
+ }
+
+out:
+ if (!first_frag)
+ return NULL;
+
+ retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk),
+ &ulpq->reasm_uo, first_frag,
+ last_frag);
+ if (retval) {
+ sin->fsn_uo = next_fsn;
+ if (is_last) {
+ retval->msg_flags |= MSG_EOR;
+ sin->pd_mode_uo = 0;
+ }
+ }
+
+ return retval;
+}
+
+static struct sctp_ulpevent *sctp_intl_retrieve_reassembled_uo(
+ struct sctp_ulpq *ulpq,
+ struct sctp_ulpevent *event)
+{
+ struct sctp_association *asoc = ulpq->asoc;
+ struct sk_buff *pos, *first_frag = NULL;
+ struct sctp_ulpevent *retval = NULL;
+ struct sk_buff *pd_first = NULL;
+ struct sk_buff *pd_last = NULL;
+ struct sctp_stream_in *sin;
+ __u32 next_fsn = 0;
+ __u32 pd_point = 0;
+ __u32 pd_len = 0;
+ __u32 mid = 0;
+
+ sin = sctp_stream_in(ulpq->asoc, event->stream);
+
+ skb_queue_walk(&ulpq->reasm_uo, pos) {
+ struct sctp_ulpevent *cevent = sctp_skb2event(pos);
+
+ if (cevent->stream < event->stream)
+ continue;
+ if (cevent->stream > event->stream)
+ break;
+
+ if (MID_lt(cevent->mid, event->mid))
+ continue;
+ if (MID_lt(event->mid, cevent->mid))
+ break;
+
+ switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) {
+ case SCTP_DATA_FIRST_FRAG:
+ if (!sin->pd_mode_uo) {
+ sin->mid_uo = cevent->mid;
+ pd_first = pos;
+ pd_last = pos;
+ pd_len = pos->len;
+ }
+
+ first_frag = pos;
+ next_fsn = 0;
+ mid = cevent->mid;
+ break;
+
+ case SCTP_DATA_MIDDLE_FRAG:
+ if (first_frag && cevent->mid == mid &&
+ cevent->fsn == next_fsn) {
+ next_fsn++;
+ if (pd_first) {
+ pd_last = pos;
+ pd_len += pos->len;
+ }
+ } else {
+ first_frag = NULL;
+ }
+ break;
+
+ case SCTP_DATA_LAST_FRAG:
+ if (first_frag && cevent->mid == mid &&
+ cevent->fsn == next_fsn)
+ goto found;
+ else
+ first_frag = NULL;
+ break;
+ }
+ }
+
+ if (!pd_first)
+ goto out;
+
+ pd_point = sctp_sk(asoc->base.sk)->pd_point;
+ if (pd_point && pd_point <= pd_len) {
+ retval = sctp_make_reassembled_event(sock_net(asoc->base.sk),
+ &ulpq->reasm_uo,
+ pd_first, pd_last);
+ if (retval) {
+ sin->fsn_uo = next_fsn;
+ sin->pd_mode_uo = 1;
+ }
+ }
+ goto out;
+
+found:
+ retval = sctp_make_reassembled_event(sock_net(asoc->base.sk),
+ &ulpq->reasm_uo,
+ first_frag, pos);
+ if (retval)
+ retval->msg_flags |= MSG_EOR;
+
+out:
+ return retval;
+}
+
+static struct sctp_ulpevent *sctp_intl_reasm_uo(struct sctp_ulpq *ulpq,
+ struct sctp_ulpevent *event)
+{
+ struct sctp_ulpevent *retval = NULL;
+ struct sctp_stream_in *sin;
+
+ if (SCTP_DATA_NOT_FRAG == (event->msg_flags & SCTP_DATA_FRAG_MASK)) {
+ event->msg_flags |= MSG_EOR;
+ return event;
+ }
+
+ sctp_intl_store_reasm_uo(ulpq, event);
+
+ sin = sctp_stream_in(ulpq->asoc, event->stream);
+ if (sin->pd_mode_uo && event->mid == sin->mid_uo &&
+ event->fsn == sin->fsn_uo)
+ retval = sctp_intl_retrieve_partial_uo(ulpq, event);
+
+ if (!retval)
+ retval = sctp_intl_retrieve_reassembled_uo(ulpq, event);
+
+ return retval;
+}
+
+static struct sctp_ulpevent *sctp_intl_retrieve_first_uo(struct sctp_ulpq *ulpq)
+{
+ struct sctp_stream_in *csin, *sin = NULL;
+ struct sk_buff *first_frag = NULL;
+ struct sk_buff *last_frag = NULL;
+ struct sctp_ulpevent *retval;
+ struct sk_buff *pos;
+ __u32 next_fsn = 0;
+ __u16 sid = 0;
+
+ skb_queue_walk(&ulpq->reasm_uo, pos) {
+ struct sctp_ulpevent *cevent = sctp_skb2event(pos);
+
+ csin = sctp_stream_in(ulpq->asoc, cevent->stream);
+ if (csin->pd_mode_uo)
+ continue;
+
+ switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) {
+ case SCTP_DATA_FIRST_FRAG:
+ if (first_frag)
+ goto out;
+ first_frag = pos;
+ last_frag = pos;
+ next_fsn = 0;
+ sin = csin;
+ sid = cevent->stream;
+ sin->mid_uo = cevent->mid;
+ break;
+ case SCTP_DATA_MIDDLE_FRAG:
+ if (!first_frag)
+ break;
+ if (cevent->stream == sid &&
+ cevent->mid == sin->mid_uo &&
+ cevent->fsn == next_fsn) {
+ next_fsn++;
+ last_frag = pos;
+ } else {
+ goto out;
+ }
+ break;
+ case SCTP_DATA_LAST_FRAG:
+ if (first_frag)
+ goto out;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (!first_frag)
+ return NULL;
+
+out:
+ retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk),
+ &ulpq->reasm_uo, first_frag,
+ last_frag);
+ if (retval) {
+ sin->fsn_uo = next_fsn;
+ sin->pd_mode_uo = 1;
+ }
+
+ return retval;
+}
+
+static int sctp_ulpevent_idata(struct sctp_ulpq *ulpq,
+ struct sctp_chunk *chunk, gfp_t gfp)
+{
+ struct sctp_ulpevent *event;
+ struct sk_buff_head temp;
+ int event_eor = 0;
+
+ event = sctp_ulpevent_make_rcvmsg(chunk->asoc, chunk, gfp);
+ if (!event)
+ return -ENOMEM;
+
+ event->mid = ntohl(chunk->subh.idata_hdr->mid);
+ if (event->msg_flags & SCTP_DATA_FIRST_FRAG)
+ event->ppid = chunk->subh.idata_hdr->ppid;
+ else
+ event->fsn = ntohl(chunk->subh.idata_hdr->fsn);
+
+ if (!(event->msg_flags & SCTP_DATA_UNORDERED)) {
+ event = sctp_intl_reasm(ulpq, event);
+ if (event && event->msg_flags & MSG_EOR) {
+ skb_queue_head_init(&temp);
+ __skb_queue_tail(&temp, sctp_event2skb(event));
+
+ event = sctp_intl_order(ulpq, event);
+ }
+ } else {
+ event = sctp_intl_reasm_uo(ulpq, event);
+ }
+
+ if (event) {
+ event_eor = (event->msg_flags & MSG_EOR) ? 1 : 0;
+ sctp_enqueue_event(ulpq, event);
+ }
+
+ return event_eor;
+}
+
+static struct sctp_ulpevent *sctp_intl_retrieve_first(struct sctp_ulpq *ulpq)
+{
+ struct sctp_stream_in *csin, *sin = NULL;
+ struct sk_buff *first_frag = NULL;
+ struct sk_buff *last_frag = NULL;
+ struct sctp_ulpevent *retval;
+ struct sk_buff *pos;
+ __u32 next_fsn = 0;
+ __u16 sid = 0;
+
+ skb_queue_walk(&ulpq->reasm, pos) {
+ struct sctp_ulpevent *cevent = sctp_skb2event(pos);
+
+ csin = sctp_stream_in(ulpq->asoc, cevent->stream);
+ if (csin->pd_mode)
+ continue;
+
+ switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) {
+ case SCTP_DATA_FIRST_FRAG:
+ if (first_frag)
+ goto out;
+ if (cevent->mid == csin->mid) {
+ first_frag = pos;
+ last_frag = pos;
+ next_fsn = 0;
+ sin = csin;
+ sid = cevent->stream;
+ }
+ break;
+ case SCTP_DATA_MIDDLE_FRAG:
+ if (!first_frag)
+ break;
+ if (cevent->stream == sid &&
+ cevent->mid == sin->mid &&
+ cevent->fsn == next_fsn) {
+ next_fsn++;
+ last_frag = pos;
+ } else {
+ goto out;
+ }
+ break;
+ case SCTP_DATA_LAST_FRAG:
+ if (first_frag)
+ goto out;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (!first_frag)
+ return NULL;
+
+out:
+ retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk),
+ &ulpq->reasm, first_frag,
+ last_frag);
+ if (retval) {
+ sin->fsn = next_fsn;
+ sin->pd_mode = 1;
+ }
+
+ return retval;
+}
+
+static void sctp_intl_start_pd(struct sctp_ulpq *ulpq, gfp_t gfp)
+{
+ struct sctp_ulpevent *event;
+
+ if (!skb_queue_empty(&ulpq->reasm)) {
+ do {
+ event = sctp_intl_retrieve_first(ulpq);
+ if (event)
+ sctp_enqueue_event(ulpq, event);
+ } while (event);
+ }
+
+ if (!skb_queue_empty(&ulpq->reasm_uo)) {
+ do {
+ event = sctp_intl_retrieve_first_uo(ulpq);
+ if (event)
+ sctp_enqueue_event(ulpq, event);
+ } while (event);
+ }
+}
+
+static void sctp_renege_events(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
+ gfp_t gfp)
+{
+ struct sctp_association *asoc = ulpq->asoc;
+ __u32 freed = 0;
+ __u16 needed;
+
+ if (chunk) {
+ needed = ntohs(chunk->chunk_hdr->length);
+ needed -= sizeof(struct sctp_idata_chunk);
+ } else {
+ needed = SCTP_DEFAULT_MAXWINDOW;
+ }
+
+ if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) {
+ freed = sctp_ulpq_renege_list(ulpq, &ulpq->lobby, needed);
+ if (freed < needed)
+ freed += sctp_ulpq_renege_list(ulpq, &ulpq->reasm,
+ needed);
+ if (freed < needed)
+ freed += sctp_ulpq_renege_list(ulpq, &ulpq->reasm_uo,
+ needed);
+ }
+
+ if (chunk && freed >= needed)
+ if (sctp_ulpevent_idata(ulpq, chunk, gfp) <= 0)
+ sctp_intl_start_pd(ulpq, gfp);
+
+ sk_mem_reclaim(asoc->base.sk);
+}
+
+static void sctp_intl_stream_abort_pd(struct sctp_ulpq *ulpq, __u16 sid,
+ __u32 mid, __u16 flags, gfp_t gfp)
+{
+ struct sock *sk = ulpq->asoc->base.sk;
+ struct sctp_ulpevent *ev = NULL;
+
+ if (!sctp_ulpevent_type_enabled(SCTP_PARTIAL_DELIVERY_EVENT,
+ &sctp_sk(sk)->subscribe))
+ return;
+
+ ev = sctp_ulpevent_make_pdapi(ulpq->asoc, SCTP_PARTIAL_DELIVERY_ABORTED,
+ sid, mid, flags, gfp);
+ if (ev) {
+ __skb_queue_tail(&sk->sk_receive_queue, sctp_event2skb(ev));
+
+ if (!sctp_sk(sk)->data_ready_signalled) {
+ sctp_sk(sk)->data_ready_signalled = 1;
+ sk->sk_data_ready(sk);
+ }
+ }
+}
+
+static void sctp_intl_reap_ordered(struct sctp_ulpq *ulpq, __u16 sid)
+{
+ struct sctp_stream *stream = &ulpq->asoc->stream;
+ struct sctp_ulpevent *cevent, *event = NULL;
+ struct sk_buff_head *lobby = &ulpq->lobby;
+ struct sk_buff *pos, *tmp;
+ struct sk_buff_head temp;
+ __u16 csid;
+ __u32 cmid;
+
+ skb_queue_head_init(&temp);
+ sctp_skb_for_each(pos, lobby, tmp) {
+ cevent = (struct sctp_ulpevent *)pos->cb;
+ csid = cevent->stream;
+ cmid = cevent->mid;
+
+ if (csid > sid)
+ break;
+
+ if (csid < sid)
+ continue;
+
+ if (!MID_lt(cmid, sctp_mid_peek(stream, in, csid)))
+ break;
+
+ __skb_unlink(pos, lobby);
+ if (!event)
+ event = sctp_skb2event(pos);
+
+ __skb_queue_tail(&temp, pos);
+ }
+
+ if (!event && pos != (struct sk_buff *)lobby) {
+ cevent = (struct sctp_ulpevent *)pos->cb;
+ csid = cevent->stream;
+ cmid = cevent->mid;
+
+ if (csid == sid && cmid == sctp_mid_peek(stream, in, csid)) {
+ sctp_mid_next(stream, in, csid);
+ __skb_unlink(pos, lobby);
+ __skb_queue_tail(&temp, pos);
+ event = sctp_skb2event(pos);
+ }
+ }
+
+ if (event) {
+ sctp_intl_retrieve_ordered(ulpq, event);
+ sctp_enqueue_event(ulpq, event);
+ }
+}
+
+static void sctp_intl_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp)
+{
+ struct sctp_stream *stream = &ulpq->asoc->stream;
+ __u16 sid;
+
+ for (sid = 0; sid < stream->incnt; sid++) {
+ struct sctp_stream_in *sin = &stream->in[sid];
+ __u32 mid;
+
+ if (sin->pd_mode_uo) {
+ sin->pd_mode_uo = 0;
+
+ mid = sin->mid_uo;
+ sctp_intl_stream_abort_pd(ulpq, sid, mid, 0x1, gfp);
+ }
+
+ if (sin->pd_mode) {
+ sin->pd_mode = 0;
+
+ mid = sin->mid;
+ sctp_intl_stream_abort_pd(ulpq, sid, mid, 0, gfp);
+ sctp_mid_skip(stream, in, sid, mid);
+
+ sctp_intl_reap_ordered(ulpq, sid);
+ }
+ }
+
+ /* intl abort pd happens only when all data needs to be cleaned */
+ sctp_ulpq_flush(ulpq);
+}
+
+static inline int sctp_get_skip_pos(struct sctp_ifwdtsn_skip *skiplist,
+ int nskips, __be16 stream, __u8 flags)
+{
+ int i;
+
+ for (i = 0; i < nskips; i++)
+ if (skiplist[i].stream == stream &&
+ skiplist[i].flags == flags)
+ return i;
+
+ return i;
+}
+
+#define SCTP_FTSN_U_BIT 0x1
+static void sctp_generate_iftsn(struct sctp_outq *q, __u32 ctsn)
+{
+ struct sctp_ifwdtsn_skip ftsn_skip_arr[10];
+ struct sctp_association *asoc = q->asoc;
+ struct sctp_chunk *ftsn_chunk = NULL;
+ struct list_head *lchunk, *temp;
+ int nskips = 0, skip_pos;
+ struct sctp_chunk *chunk;
+ __u32 tsn;
+
+ if (!asoc->peer.prsctp_capable)
+ return;
+
+ if (TSN_lt(asoc->adv_peer_ack_point, ctsn))
+ asoc->adv_peer_ack_point = ctsn;
+
+ list_for_each_safe(lchunk, temp, &q->abandoned) {
+ chunk = list_entry(lchunk, struct sctp_chunk, transmitted_list);
+ tsn = ntohl(chunk->subh.data_hdr->tsn);
+
+ if (TSN_lte(tsn, ctsn)) {
+ list_del_init(lchunk);
+ sctp_chunk_free(chunk);
+ } else if (TSN_lte(tsn, asoc->adv_peer_ack_point + 1)) {
+ __be16 sid = chunk->subh.idata_hdr->stream;
+ __be32 mid = chunk->subh.idata_hdr->mid;
+ __u8 flags = 0;
+
+ if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
+ flags |= SCTP_FTSN_U_BIT;
+
+ asoc->adv_peer_ack_point = tsn;
+ skip_pos = sctp_get_skip_pos(&ftsn_skip_arr[0], nskips,
+ sid, flags);
+ ftsn_skip_arr[skip_pos].stream = sid;
+ ftsn_skip_arr[skip_pos].reserved = 0;
+ ftsn_skip_arr[skip_pos].flags = flags;
+ ftsn_skip_arr[skip_pos].mid = mid;
+ if (skip_pos == nskips)
+ nskips++;
+ if (nskips == 10)
+ break;
+ } else {
+ break;
+ }
+ }
+
+ if (asoc->adv_peer_ack_point > ctsn)
+ ftsn_chunk = sctp_make_ifwdtsn(asoc, asoc->adv_peer_ack_point,
+ nskips, &ftsn_skip_arr[0]);
+
+ if (ftsn_chunk) {
+ list_add_tail(&ftsn_chunk->list, &q->control_chunk_list);
+ SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_OUTCTRLCHUNKS);
+ }
+}
+
+#define _sctp_walk_ifwdtsn(pos, chunk, end) \
+ for (pos = chunk->subh.ifwdtsn_hdr->skip; \
+ (void *)pos < (void *)chunk->subh.ifwdtsn_hdr->skip + (end); pos++)
+
+#define sctp_walk_ifwdtsn(pos, ch) \
+ _sctp_walk_ifwdtsn((pos), (ch), ntohs((ch)->chunk_hdr->length) - \
+ sizeof(struct sctp_ifwdtsn_chunk))
+
+static bool sctp_validate_fwdtsn(struct sctp_chunk *chunk)
+{
+ struct sctp_fwdtsn_skip *skip;
+ __u16 incnt;
+
+ if (chunk->chunk_hdr->type != SCTP_CID_FWD_TSN)
+ return false;
+
+ incnt = chunk->asoc->stream.incnt;
+ sctp_walk_fwdtsn(skip, chunk)
+ if (ntohs(skip->stream) >= incnt)
+ return false;
+
+ return true;
+}
+
+static bool sctp_validate_iftsn(struct sctp_chunk *chunk)
+{
+ struct sctp_ifwdtsn_skip *skip;
+ __u16 incnt;
+
+ if (chunk->chunk_hdr->type != SCTP_CID_I_FWD_TSN)
+ return false;
+
+ incnt = chunk->asoc->stream.incnt;
+ sctp_walk_ifwdtsn(skip, chunk)
+ if (ntohs(skip->stream) >= incnt)
+ return false;
+
+ return true;
+}
+
+static void sctp_report_fwdtsn(struct sctp_ulpq *ulpq, __u32 ftsn)
+{
+ /* Move the Cumulattive TSN Ack ahead. */
+ sctp_tsnmap_skip(&ulpq->asoc->peer.tsn_map, ftsn);
+ /* purge the fragmentation queue */
+ sctp_ulpq_reasm_flushtsn(ulpq, ftsn);
+ /* Abort any in progress partial delivery. */
+ sctp_ulpq_abort_pd(ulpq, GFP_ATOMIC);
+}
+
+static void sctp_intl_reasm_flushtsn(struct sctp_ulpq *ulpq, __u32 ftsn)
+{
+ struct sk_buff *pos, *tmp;
+
+ skb_queue_walk_safe(&ulpq->reasm, pos, tmp) {
+ struct sctp_ulpevent *event = sctp_skb2event(pos);
+ __u32 tsn = event->tsn;
+
+ if (TSN_lte(tsn, ftsn)) {
+ __skb_unlink(pos, &ulpq->reasm);
+ sctp_ulpevent_free(event);
+ }
+ }
+
+ skb_queue_walk_safe(&ulpq->reasm_uo, pos, tmp) {
+ struct sctp_ulpevent *event = sctp_skb2event(pos);
+ __u32 tsn = event->tsn;
+
+ if (TSN_lte(tsn, ftsn)) {
+ __skb_unlink(pos, &ulpq->reasm_uo);
+ sctp_ulpevent_free(event);
+ }
+ }
+}
+
+static void sctp_report_iftsn(struct sctp_ulpq *ulpq, __u32 ftsn)
+{
+ /* Move the Cumulattive TSN Ack ahead. */
+ sctp_tsnmap_skip(&ulpq->asoc->peer.tsn_map, ftsn);
+ /* purge the fragmentation queue */
+ sctp_intl_reasm_flushtsn(ulpq, ftsn);
+ /* abort only when it's for all data */
+ if (ftsn == sctp_tsnmap_get_max_tsn_seen(&ulpq->asoc->peer.tsn_map))
+ sctp_intl_abort_pd(ulpq, GFP_ATOMIC);
+}
+
+static void sctp_handle_fwdtsn(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk)
+{
+ struct sctp_fwdtsn_skip *skip;
+
+ /* Walk through all the skipped SSNs */
+ sctp_walk_fwdtsn(skip, chunk)
+ sctp_ulpq_skip(ulpq, ntohs(skip->stream), ntohs(skip->ssn));
+}
+
+static void sctp_intl_skip(struct sctp_ulpq *ulpq, __u16 sid, __u32 mid,
+ __u8 flags)
+{
+ struct sctp_stream_in *sin = sctp_stream_in(ulpq->asoc, sid);
+ struct sctp_stream *stream = &ulpq->asoc->stream;
+
+ if (flags & SCTP_FTSN_U_BIT) {
+ if (sin->pd_mode_uo && MID_lt(sin->mid_uo, mid)) {
+ sin->pd_mode_uo = 0;
+ sctp_intl_stream_abort_pd(ulpq, sid, mid, 0x1,
+ GFP_ATOMIC);
+ }
+ return;
+ }
+
+ if (MID_lt(mid, sctp_mid_peek(stream, in, sid)))
+ return;
+
+ if (sin->pd_mode) {
+ sin->pd_mode = 0;
+ sctp_intl_stream_abort_pd(ulpq, sid, mid, 0x0, GFP_ATOMIC);
+ }
+
+ sctp_mid_skip(stream, in, sid, mid);
+
+ sctp_intl_reap_ordered(ulpq, sid);
+}
+
+static void sctp_handle_iftsn(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk)
+{
+ struct sctp_ifwdtsn_skip *skip;
+
+ /* Walk through all the skipped MIDs and abort stream pd if possible */
+ sctp_walk_ifwdtsn(skip, chunk)
+ sctp_intl_skip(ulpq, ntohs(skip->stream),
+ ntohl(skip->mid), skip->flags);
+}
+
+static struct sctp_stream_interleave sctp_stream_interleave_0 = {
+ .data_chunk_len = sizeof(struct sctp_data_chunk),
+ .ftsn_chunk_len = sizeof(struct sctp_fwdtsn_chunk),
+ /* DATA process functions */
+ .make_datafrag = sctp_make_datafrag_empty,
+ .assign_number = sctp_chunk_assign_ssn,
+ .validate_data = sctp_validate_data,
+ .ulpevent_data = sctp_ulpq_tail_data,
+ .enqueue_event = sctp_ulpq_tail_event,
+ .renege_events = sctp_ulpq_renege,
+ .start_pd = sctp_ulpq_partial_delivery,
+ .abort_pd = sctp_ulpq_abort_pd,
+ /* FORWARD-TSN process functions */
+ .generate_ftsn = sctp_generate_fwdtsn,
+ .validate_ftsn = sctp_validate_fwdtsn,
+ .report_ftsn = sctp_report_fwdtsn,
+ .handle_ftsn = sctp_handle_fwdtsn,
+};
+
+static struct sctp_stream_interleave sctp_stream_interleave_1 = {
+ .data_chunk_len = sizeof(struct sctp_idata_chunk),
+ .ftsn_chunk_len = sizeof(struct sctp_ifwdtsn_chunk),
+ /* I-DATA process functions */
+ .make_datafrag = sctp_make_idatafrag_empty,
+ .assign_number = sctp_chunk_assign_mid,
+ .validate_data = sctp_validate_idata,
+ .ulpevent_data = sctp_ulpevent_idata,
+ .enqueue_event = sctp_enqueue_event,
+ .renege_events = sctp_renege_events,
+ .start_pd = sctp_intl_start_pd,
+ .abort_pd = sctp_intl_abort_pd,
+ /* I-FORWARD-TSN process functions */
+ .generate_ftsn = sctp_generate_iftsn,
+ .validate_ftsn = sctp_validate_iftsn,
+ .report_ftsn = sctp_report_iftsn,
+ .handle_ftsn = sctp_handle_iftsn,
+};
+
+void sctp_stream_interleave_init(struct sctp_stream *stream)
+{
+ struct sctp_association *asoc;
+
+ asoc = container_of(stream, struct sctp_association, stream);
+ stream->si = asoc->intl_enable ? &sctp_stream_interleave_1
+ : &sctp_stream_interleave_0;
+}
diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
index d8c162a4089c..f5fcd425232a 100644
--- a/net/sctp/stream_sched.c
+++ b/net/sctp/stream_sched.c
@@ -242,7 +242,8 @@ int sctp_sched_get_value(struct sctp_association *asoc, __u16 sid,
void sctp_sched_dequeue_done(struct sctp_outq *q, struct sctp_chunk *ch)
{
- if (!list_is_last(&ch->frag_list, &ch->msg->chunks)) {
+ if (!list_is_last(&ch->frag_list, &ch->msg->chunks) &&
+ !q->asoc->intl_enable) {
struct sctp_stream_out *sout;
__u16 sid;
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index ef7ca44d6e6a..33ca5b73cdb3 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -289,6 +289,13 @@ static struct ctl_table sctp_net_table[] = {
.proc_handler = proc_sctp_do_auth,
},
{
+ .procname = "intl_enable",
+ .data = &init_net.sctp.intl_enable,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
.procname = "addr_scope_policy",
.data = &init_net.sctp.scope_policy,
.maxlen = sizeof(int),
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 5447228bf1a0..84207ad33e8e 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -443,8 +443,8 @@ struct sctp_ulpevent *sctp_ulpevent_make_send_failed(
goto fail;
/* Pull off the common chunk header and DATA header. */
- skb_pull(skb, sizeof(struct sctp_data_chunk));
- len -= sizeof(struct sctp_data_chunk);
+ skb_pull(skb, sctp_datachk_len(&asoc->stream));
+ len -= sctp_datachk_len(&asoc->stream);
/* Embed the event fields inside the cloned skb. */
event = sctp_skb2event(skb);
@@ -705,8 +705,6 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
sctp_ulpevent_receive_data(event, asoc);
event->stream = ntohs(chunk->subh.data_hdr->stream);
- event->ssn = ntohs(chunk->subh.data_hdr->ssn);
- event->ppid = chunk->subh.data_hdr->ppid;
if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) {
event->flags |= SCTP_UNORDERED;
event->cumtsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map);
@@ -732,8 +730,9 @@ fail:
* various events.
*/
struct sctp_ulpevent *sctp_ulpevent_make_pdapi(
- const struct sctp_association *asoc, __u32 indication,
- gfp_t gfp)
+ const struct sctp_association *asoc,
+ __u32 indication, __u32 sid, __u32 seq,
+ __u32 flags, gfp_t gfp)
{
struct sctp_ulpevent *event;
struct sctp_pdapi_event *pd;
@@ -754,7 +753,9 @@ struct sctp_ulpevent *sctp_ulpevent_make_pdapi(
* Currently unused.
*/
pd->pdapi_type = SCTP_PARTIAL_DELIVERY_EVENT;
- pd->pdapi_flags = 0;
+ pd->pdapi_flags = flags;
+ pd->pdapi_stream = sid;
+ pd->pdapi_seq = seq;
/* pdapi_length: 32 bits (unsigned integer)
*
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index a71be33f3afe..97fae53310e0 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -60,6 +60,7 @@ struct sctp_ulpq *sctp_ulpq_init(struct sctp_ulpq *ulpq,
ulpq->asoc = asoc;
skb_queue_head_init(&ulpq->reasm);
+ skb_queue_head_init(&ulpq->reasm_uo);
skb_queue_head_init(&ulpq->lobby);
ulpq->pd_mode = 0;
@@ -83,6 +84,10 @@ void sctp_ulpq_flush(struct sctp_ulpq *ulpq)
sctp_ulpevent_free(event);
}
+ while ((skb = __skb_dequeue(&ulpq->reasm_uo)) != NULL) {
+ event = sctp_skb2event(skb);
+ sctp_ulpevent_free(event);
+ }
}
/* Dispose of a ulpqueue. */
@@ -104,6 +109,9 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
if (!event)
return -ENOMEM;
+ event->ssn = ntohs(chunk->subh.data_hdr->ssn);
+ event->ppid = chunk->subh.data_hdr->ppid;
+
/* Do reassembly if needed. */
event = sctp_ulpq_reasm(ulpq, event);
@@ -328,9 +336,10 @@ static void sctp_ulpq_store_reasm(struct sctp_ulpq *ulpq,
* payload was fragmented on the way and ip had to reassemble them.
* We add the rest of skb's to the first skb's fraglist.
*/
-static struct sctp_ulpevent *sctp_make_reassembled_event(struct net *net,
- struct sk_buff_head *queue, struct sk_buff *f_frag,
- struct sk_buff *l_frag)
+struct sctp_ulpevent *sctp_make_reassembled_event(struct net *net,
+ struct sk_buff_head *queue,
+ struct sk_buff *f_frag,
+ struct sk_buff *l_frag)
{
struct sk_buff *pos;
struct sk_buff *new = NULL;
@@ -853,7 +862,7 @@ static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *ulpq,
struct sctp_stream *stream;
/* Check if this message needs ordering. */
- if (SCTP_DATA_UNORDERED & event->msg_flags)
+ if (event->msg_flags & SCTP_DATA_UNORDERED)
return event;
/* Note: The stream ID must be verified before this routine. */
@@ -974,8 +983,8 @@ void sctp_ulpq_skip(struct sctp_ulpq *ulpq, __u16 sid, __u16 ssn)
sctp_ulpq_reap_ordered(ulpq, sid);
}
-static __u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq,
- struct sk_buff_head *list, __u16 needed)
+__u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq, struct sk_buff_head *list,
+ __u16 needed)
{
__u16 freed = 0;
__u32 tsn, last_tsn;
@@ -1140,7 +1149,7 @@ void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp)
&sctp_sk(sk)->subscribe))
ev = sctp_ulpevent_make_pdapi(ulpq->asoc,
SCTP_PARTIAL_DELIVERY_ABORTED,
- gfp);
+ 0, 0, 0, gfp);
if (ev)
__skb_queue_tail(&sk->sk_receive_queue, sctp_event2skb(ev));
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 6451c5013e06..daf8075f5a4c 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -520,7 +520,7 @@ decline_rdma:
smc->use_fallback = true;
if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
rc = smc_clc_send_decline(smc, reason_code);
- if (rc < sizeof(struct smc_clc_msg_decline))
+ if (rc < 0)
goto out_err;
}
goto out_connected;
@@ -751,14 +751,16 @@ static void smc_listen_work(struct work_struct *work)
{
struct smc_sock *new_smc = container_of(work, struct smc_sock,
smc_listen_work);
+ struct smc_clc_msg_proposal_prefix *pclc_prfx;
struct socket *newclcsock = new_smc->clcsock;
struct smc_sock *lsmc = new_smc->listen_smc;
struct smc_clc_msg_accept_confirm cclc;
int local_contact = SMC_REUSE_CONTACT;
struct sock *newsmcsk = &new_smc->sk;
- struct smc_clc_msg_proposal pclc;
+ struct smc_clc_msg_proposal *pclc;
struct smc_ib_device *smcibdev;
struct sockaddr_in peeraddr;
+ u8 buf[SMC_CLC_MAX_LEN];
struct smc_link *link;
int reason_code = 0;
int rc = 0, len;
@@ -775,7 +777,7 @@ static void smc_listen_work(struct work_struct *work)
/* do inband token exchange -
*wait for and receive SMC Proposal CLC message
*/
- reason_code = smc_clc_wait_msg(new_smc, &pclc, sizeof(pclc),
+ reason_code = smc_clc_wait_msg(new_smc, &buf, sizeof(buf),
SMC_CLC_PROPOSAL);
if (reason_code < 0)
goto out_err;
@@ -804,8 +806,11 @@ static void smc_listen_work(struct work_struct *work)
reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
goto decline_rdma;
}
- if ((pclc.outgoing_subnet != subnet) ||
- (pclc.prefix_len != prefix_len)) {
+
+ pclc = (struct smc_clc_msg_proposal *)&buf;
+ pclc_prfx = smc_clc_proposal_get_prefix(pclc);
+ if (pclc_prfx->outgoing_subnet != subnet ||
+ pclc_prfx->prefix_len != prefix_len) {
reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
goto decline_rdma;
}
@@ -816,7 +821,7 @@ static void smc_listen_work(struct work_struct *work)
/* allocate connection / link group */
mutex_lock(&smc_create_lgr_pending);
local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr,
- smcibdev, ibport, &pclc.lcl, 0);
+ smcibdev, ibport, &pclc->lcl, 0);
if (local_contact < 0) {
rc = local_contact;
if (rc == -ENOMEM)
@@ -879,11 +884,9 @@ static void smc_listen_work(struct work_struct *work)
}
/* QP confirmation over RoCE fabric */
reason_code = smc_serv_conf_first_link(new_smc);
- if (reason_code < 0) {
+ if (reason_code < 0)
/* peer is not aware of a problem */
- rc = reason_code;
goto out_err_unlock;
- }
if (reason_code > 0)
goto decline_rdma_unlock;
}
@@ -916,8 +919,7 @@ decline_rdma:
smc_conn_free(&new_smc->conn);
new_smc->use_fallback = true;
if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
- rc = smc_clc_send_decline(new_smc, reason_code);
- if (rc < sizeof(struct smc_clc_msg_decline))
+ if (smc_clc_send_decline(new_smc, reason_code) < 0)
goto out_err;
}
goto out_connected;
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 87f7bede6eab..d4155ff6acde 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -213,6 +213,9 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
/* guarantee 0 <= bytes_to_rcv <= rmbe_size */
smp_mb__after_atomic();
smc->sk.sk_data_ready(&smc->sk);
+ } else if ((conn->local_rx_ctrl.prod_flags.write_blocked) ||
+ (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req)) {
+ smc->sk.sk_data_ready(&smc->sk);
}
if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) {
@@ -234,15 +237,6 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
/* trigger socket release if connection closed */
smc_close_wake_tx_prepared(smc);
}
-
- /* socket connected but not accepted */
- if (!smc->sk.sk_socket)
- return;
-
- /* data available */
- if ((conn->local_rx_ctrl.prod_flags.write_blocked) ||
- (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req))
- smc_tx_consumer_update(conn);
}
/* called under tasklet context */
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 1800e16b2a02..abf7ceb6690b 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -22,6 +22,54 @@
#include "smc_clc.h"
#include "smc_ib.h"
+/* check if received message has a correct header length and contains valid
+ * heading and trailing eyecatchers
+ */
+static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
+{
+ struct smc_clc_msg_proposal_prefix *pclc_prfx;
+ struct smc_clc_msg_accept_confirm *clc;
+ struct smc_clc_msg_proposal *pclc;
+ struct smc_clc_msg_decline *dclc;
+ struct smc_clc_msg_trail *trl;
+
+ if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)))
+ return false;
+ switch (clcm->type) {
+ case SMC_CLC_PROPOSAL:
+ pclc = (struct smc_clc_msg_proposal *)clcm;
+ pclc_prfx = smc_clc_proposal_get_prefix(pclc);
+ if (ntohs(pclc->hdr.length) !=
+ sizeof(*pclc) + ntohs(pclc->iparea_offset) +
+ sizeof(*pclc_prfx) +
+ pclc_prfx->ipv6_prefixes_cnt *
+ sizeof(struct smc_clc_ipv6_prefix) +
+ sizeof(*trl))
+ return false;
+ trl = (struct smc_clc_msg_trail *)
+ ((u8 *)pclc + ntohs(pclc->hdr.length) - sizeof(*trl));
+ break;
+ case SMC_CLC_ACCEPT:
+ case SMC_CLC_CONFIRM:
+ clc = (struct smc_clc_msg_accept_confirm *)clcm;
+ if (ntohs(clc->hdr.length) != sizeof(*clc))
+ return false;
+ trl = &clc->trl;
+ break;
+ case SMC_CLC_DECLINE:
+ dclc = (struct smc_clc_msg_decline *)clcm;
+ if (ntohs(dclc->hdr.length) != sizeof(*dclc))
+ return false;
+ trl = &dclc->trl;
+ break;
+ default:
+ return false;
+ }
+ if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)))
+ return false;
+ return true;
+}
+
/* Wait for data on the tcp-socket, analyze received data
* Returns:
* 0 if success and it was not a decline that we received.
@@ -72,9 +120,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
}
datlen = ntohs(clcm->length);
if ((len < sizeof(struct smc_clc_msg_hdr)) ||
- (datlen < sizeof(struct smc_clc_msg_decline)) ||
- (datlen > sizeof(struct smc_clc_msg_accept_confirm)) ||
- memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) ||
+ (datlen > buflen) ||
((clcm->type != SMC_CLC_DECLINE) &&
(clcm->type != expected_type))) {
smc->sk.sk_err = EPROTO;
@@ -89,7 +135,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
krflags = MSG_WAITALL;
smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, datlen, krflags);
- if (len < datlen) {
+ if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) {
smc->sk.sk_err = EPROTO;
reason_code = -EPROTO;
goto out;
@@ -133,7 +179,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
smc->sk.sk_err = EPROTO;
if (len < 0)
smc->sk.sk_err = -len;
- return len;
+ return sock_error(&smc->sk);
}
/* send CLC PROPOSAL message across internal TCP socket */
@@ -141,33 +187,43 @@ int smc_clc_send_proposal(struct smc_sock *smc,
struct smc_ib_device *smcibdev,
u8 ibport)
{
+ struct smc_clc_msg_proposal_prefix pclc_prfx;
struct smc_clc_msg_proposal pclc;
+ struct smc_clc_msg_trail trl;
int reason_code = 0;
+ struct kvec vec[3];
struct msghdr msg;
- struct kvec vec;
- int len, rc;
+ int len, plen, rc;
/* send SMC Proposal CLC message */
+ plen = sizeof(pclc) + sizeof(pclc_prfx) + sizeof(trl);
memset(&pclc, 0, sizeof(pclc));
memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
pclc.hdr.type = SMC_CLC_PROPOSAL;
- pclc.hdr.length = htons(sizeof(pclc));
+ pclc.hdr.length = htons(plen);
pclc.hdr.version = SMC_CLC_V1; /* SMC version */
memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE);
memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN);
+ pclc.iparea_offset = htons(0);
+ memset(&pclc_prfx, 0, sizeof(pclc_prfx));
/* determine subnet and mask from internal TCP socket */
- rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc.outgoing_subnet,
- &pclc.prefix_len);
+ rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
+ &pclc_prfx.prefix_len);
if (rc)
return SMC_CLC_DECL_CNFERR; /* configuration error */
- memcpy(pclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
+ pclc_prfx.ipv6_prefixes_cnt = 0;
+ memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
memset(&msg, 0, sizeof(msg));
- vec.iov_base = &pclc;
- vec.iov_len = sizeof(pclc);
+ vec[0].iov_base = &pclc;
+ vec[0].iov_len = sizeof(pclc);
+ vec[1].iov_base = &pclc_prfx;
+ vec[1].iov_len = sizeof(pclc_prfx);
+ vec[2].iov_base = &trl;
+ vec[2].iov_len = sizeof(trl);
/* due to the few bytes needed for clc-handshake this cannot block */
- len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(pclc));
+ len = kernel_sendmsg(smc->clcsock, &msg, vec, 3, plen);
if (len < sizeof(pclc)) {
if (len >= 0) {
reason_code = -ENETUNREACH;
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 12a9af1539a2..c145a0f36a68 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -44,7 +44,7 @@ struct smc_clc_msg_hdr { /* header1 of clc messages */
#if defined(__BIG_ENDIAN_BITFIELD)
u8 version : 4,
flag : 1,
- rsvd : 3;
+ rsvd : 3;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
u8 rsvd : 3,
flag : 1,
@@ -62,17 +62,31 @@ struct smc_clc_msg_local { /* header2 of clc messages */
u8 mac[6]; /* mac of ib_device port */
};
-struct smc_clc_msg_proposal { /* clc proposal message */
- struct smc_clc_msg_hdr hdr;
- struct smc_clc_msg_local lcl;
- __be16 iparea_offset; /* offset to IP address information area */
+struct smc_clc_ipv6_prefix {
+ u8 prefix[4];
+ u8 prefix_len;
+} __packed;
+
+struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/
__be32 outgoing_subnet; /* subnet mask */
u8 prefix_len; /* number of significant bits in mask */
u8 reserved[2];
u8 ipv6_prefixes_cnt; /* number of IPv6 prefixes in prefix array */
- struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */
} __aligned(4);
+struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */
+ struct smc_clc_msg_hdr hdr;
+ struct smc_clc_msg_local lcl;
+ __be16 iparea_offset; /* offset to IP address information area */
+} __aligned(4);
+
+#define SMC_CLC_PROPOSAL_MAX_OFFSET 0x28
+#define SMC_CLC_PROPOSAL_MAX_PREFIX (8 * sizeof(struct smc_clc_ipv6_prefix))
+#define SMC_CLC_MAX_LEN (sizeof(struct smc_clc_msg_proposal) + \
+ SMC_CLC_PROPOSAL_MAX_OFFSET + \
+ SMC_CLC_PROPOSAL_MAX_PREFIX + \
+ sizeof(struct smc_clc_msg_trail))
+
struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */
struct smc_clc_msg_hdr hdr;
struct smc_clc_msg_local lcl;
@@ -102,6 +116,14 @@ struct smc_clc_msg_decline { /* clc decline message */
struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */
} __aligned(4);
+/* determine start of the prefix area within the proposal message */
+static inline struct smc_clc_msg_proposal_prefix *
+smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
+{
+ return (struct smc_clc_msg_proposal_prefix *)
+ ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset));
+}
+
struct smc_sock;
struct smc_ib_device;
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index 48615d2ac4aa..e194c6cc308a 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -113,7 +113,7 @@ static int smc_close_abort(struct smc_connection *conn)
/* terminate smc socket abnormally - active abort
* RDMA communication no longer possible
*/
-void smc_close_active_abort(struct smc_sock *smc)
+static void smc_close_active_abort(struct smc_sock *smc)
{
struct smc_cdc_conn_state_flags *txflags =
&smc->conn.local_tx_ctrl.conn_state_flags;
diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h
index ed82506b1b0a..8c498885d758 100644
--- a/net/smc/smc_close.h
+++ b/net/smc/smc_close.h
@@ -20,7 +20,6 @@
#define SMC_CLOSE_SOCK_PUT_DELAY HZ
void smc_close_wake_tx_prepared(struct smc_sock *smc);
-void smc_close_active_abort(struct smc_sock *smc);
int smc_close_active(struct smc_sock *smc);
void smc_close_sock_put_work(struct work_struct *work);
int smc_close_shutdown_write(struct smc_sock *smc);
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index cbf58637ee14..9dc392ca06bf 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -65,7 +65,6 @@ static int smc_rx_wait_data(struct smc_sock *smc, long *timeo)
rc = sk_wait_event(sk, timeo,
sk->sk_err ||
sk->sk_shutdown & RCV_SHUTDOWN ||
- sock_flag(sk, SOCK_DONE) ||
atomic_read(&conn->bytes_to_rcv) ||
smc_cdc_rxed_any_close_or_senddone(conn),
&wait);
@@ -116,7 +115,7 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len,
if (read_done) {
if (sk->sk_err ||
sk->sk_state == SMC_CLOSED ||
- (sk->sk_shutdown & RCV_SHUTDOWN) ||
+ sk->sk_shutdown & RCV_SHUTDOWN ||
!timeo ||
signal_pending(current) ||
smc_cdc_rxed_any_close_or_senddone(conn) ||
@@ -124,8 +123,6 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len,
peer_conn_abort)
break;
} else {
- if (sock_flag(sk, SOCK_DONE))
- break;
if (sk->sk_err) {
read_done = sock_error(sk);
break;
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index c48dc2d5fd3a..2e50fddf8ce9 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -104,14 +104,12 @@ static int smc_tx_wait_memory(struct smc_sock *smc, int flags)
if (atomic_read(&conn->sndbuf_space))
break; /* at least 1 byte of free space available */
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- sk->sk_write_pending++;
sk_wait_event(sk, &timeo,
sk->sk_err ||
(sk->sk_shutdown & SEND_SHUTDOWN) ||
smc_cdc_rxed_any_close_or_senddone(conn) ||
atomic_read(&conn->sndbuf_space),
&wait);
- sk->sk_write_pending--;
}
remove_wait_queue(sk_sleep(sk), &wait);
return rc;
@@ -450,9 +448,7 @@ static void smc_tx_work(struct work_struct *work)
void smc_tx_consumer_update(struct smc_connection *conn)
{
union smc_host_cursor cfed, cons;
- struct smc_cdc_tx_pend *pend;
- struct smc_wr_buf *wr_buf;
- int to_confirm, rc;
+ int to_confirm;
smc_curs_write(&cons,
smc_curs_read(&conn->local_tx_ctrl.cons, conn),
@@ -466,10 +462,7 @@ void smc_tx_consumer_update(struct smc_connection *conn)
((to_confirm > conn->rmbe_update_limit) &&
((to_confirm > (conn->rmbe_size / 2)) ||
conn->local_rx_ctrl.prod_flags.write_blocked))) {
- rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
- if (!rc)
- rc = smc_cdc_msg_send(conn, wr_buf, pend);
- if (rc < 0) {
+ if (smc_cdc_get_slot_and_msg_send(conn) < 0) {
schedule_delayed_work(&conn->tx_work,
SMC_TX_WORK_DELAY);
return;
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 329325bd553e..37892b3909af 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -1,7 +1,7 @@
/*
* net/tipc/bcast.c: TIPC broadcast code
*
- * Copyright (c) 2004-2006, 2014-2016, Ericsson AB
+ * Copyright (c) 2004-2006, 2014-2017, Ericsson AB
* Copyright (c) 2004, Intel Corporation.
* Copyright (c) 2005, 2010-2011, Wind River Systems
* All rights reserved.
@@ -42,8 +42,8 @@
#include "link.h"
#include "name_table.h"
-#define BCLINK_WIN_DEFAULT 50 /* bcast link window size (default) */
-#define BCLINK_WIN_MIN 32 /* bcast minimum link window size */
+#define BCLINK_WIN_DEFAULT 50 /* bcast link window size (default) */
+#define BCLINK_WIN_MIN 32 /* bcast minimum link window size */
const char tipc_bclink_name[] = "broadcast-link";
@@ -74,6 +74,10 @@ static struct tipc_bc_base *tipc_bc_base(struct net *net)
return tipc_net(net)->bcbase;
}
+/* tipc_bcast_get_mtu(): -get the MTU currently used by broadcast link
+ * Note: the MTU is decremented to give room for a tunnel header, in
+ * case the message needs to be sent as replicast
+ */
int tipc_bcast_get_mtu(struct net *net)
{
return tipc_link_mtu(tipc_bc_sndlink(net)) - INT_H_SIZE;
@@ -515,7 +519,7 @@ int tipc_bcast_init(struct net *net)
spin_lock_init(&tipc_net(net)->bclock);
if (!tipc_link_bc_create(net, 0, 0,
- U16_MAX,
+ FB_MTU,
BCLINK_WIN_DEFAULT,
0,
&bb->inputq,
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 6bce0b1117bd..2d6b2aed30e0 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -483,7 +483,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
/**
* tipc_link_bc_create - create new link to be used for broadcast
* @n: pointer to associated node
- * @mtu: mtu to be used
+ * @mtu: mtu to be used initially if no peers
* @window: send window to be used
* @inputq: queue to put messages ready for delivery
* @namedq: queue to put binding table update messages ready for delivery
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index b0d07b35909d..55d8ba92291d 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -251,20 +251,23 @@ bool tipc_msg_validate(struct sk_buff **_skb)
* @pktmax: Max packet size that can be used
* @list: Buffer or chain of buffers to be returned to caller
*
+ * Note that the recursive call we are making here is safe, since it can
+ * logically go only one further level down.
+ *
* Returns message data size or errno: -ENOMEM, -EFAULT
*/
-int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
- int offset, int dsz, int pktmax, struct sk_buff_head *list)
+int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset,
+ int dsz, int pktmax, struct sk_buff_head *list)
{
int mhsz = msg_hdr_sz(mhdr);
+ struct tipc_msg pkthdr;
int msz = mhsz + dsz;
- int pktno = 1;
- int pktsz;
int pktrem = pktmax;
- int drem = dsz;
- struct tipc_msg pkthdr;
struct sk_buff *skb;
+ int drem = dsz;
+ int pktno = 1;
char *pktpos;
+ int pktsz;
int rc;
msg_set_size(mhdr, msz);
@@ -272,8 +275,18 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
/* No fragmentation needed? */
if (likely(msz <= pktmax)) {
skb = tipc_buf_acquire(msz, GFP_KERNEL);
- if (unlikely(!skb))
+
+ /* Fall back to smaller MTU if node local message */
+ if (unlikely(!skb)) {
+ if (pktmax != MAX_MSG_SIZE)
+ return -ENOMEM;
+ rc = tipc_msg_build(mhdr, m, offset, dsz, FB_MTU, list);
+ if (rc != dsz)
+ return rc;
+ if (tipc_msg_assemble(list))
+ return dsz;
return -ENOMEM;
+ }
skb_orphan(skb);
__skb_queue_tail(list, skb);
skb_copy_to_linear_data(skb, mhdr, mhsz);
@@ -589,6 +602,30 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
return true;
}
+/* tipc_msg_assemble() - assemble chain of fragments into one message
+ */
+bool tipc_msg_assemble(struct sk_buff_head *list)
+{
+ struct sk_buff *skb, *tmp = NULL;
+
+ if (skb_queue_len(list) == 1)
+ return true;
+
+ while ((skb = __skb_dequeue(list))) {
+ skb->next = NULL;
+ if (tipc_buf_append(&tmp, &skb)) {
+ __skb_queue_tail(list, skb);
+ return true;
+ }
+ if (!tmp)
+ break;
+ }
+ __skb_queue_purge(list);
+ __skb_queue_head_init(list);
+ pr_warn("Failed do assemble buffer\n");
+ return false;
+}
+
/* tipc_msg_reassemble() - clone a buffer chain of fragments and
* reassemble the clones into one message
*/
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 3e4384c222f7..b4ba1b4f9ae7 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -98,7 +98,7 @@ struct plist;
#define MAX_H_SIZE 60 /* Largest possible TIPC header size */
#define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE)
-
+#define FB_MTU 3744
#define TIPC_MEDIA_INFO_OFFSET 5
struct tipc_skb_cb {
@@ -943,6 +943,7 @@ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos);
int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
int offset, int dsz, int mtu, struct sk_buff_head *list);
bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err);
+bool tipc_msg_assemble(struct sk_buff_head *list);
bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq);
bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg,
struct sk_buff_head *cpy);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 41127d0b925e..0cdf5c2ad881 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2640,9 +2640,7 @@ void tipc_sk_reinit(struct net *net)
rhashtable_walk_enter(&tn->sk_rht, &iter);
do {
- tsk = ERR_PTR(rhashtable_walk_start(&iter));
- if (IS_ERR(tsk))
- goto walk_stop;
+ rhashtable_walk_start(&iter);
while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) {
spin_lock_bh(&tsk->sk.sk_lock.slock);
@@ -2651,7 +2649,7 @@ void tipc_sk_reinit(struct net *net)
msg_set_orignode(msg, tn->own_addr);
spin_unlock_bh(&tsk->sk.sk_lock.slock);
}
-walk_stop:
+
rhashtable_walk_stop(&iter);
} while (tsk == ERR_PTR(-EAGAIN));
}
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 30e5746085b8..00641b611aed 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -67,7 +67,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
/* We don't yet support UDP encapsulation, TFC padding and ESN. */
if (x->encap || x->tfcpad || (x->props.flags & XFRM_STATE_ESN))
- return 0;
+ return -EINVAL;
dev = dev_get_by_index(net, xuo->ifindex);
if (!dev) {
@@ -120,8 +120,8 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
if (!x->type_offload || x->encap)
return false;
- if ((x->xso.offload_handle && (dev == dst->path->dev)) &&
- !dst->child->xfrm && x->type->get_mtu) {
+ if ((x->xso.offload_handle && (dev == xfrm_dst_path(dst)->dev)) &&
+ !xdst->child->xfrm && x->type->get_mtu) {
mtu = x->type->get_mtu(x, xdst->child_mtu_cached);
if (skb->len <= mtu)
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 347ab31574d5..ac277b97e0d7 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -231,7 +231,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
if (xo && (xo->flags & CRYPTO_DONE)) {
crypto_done = true;
- x = xfrm_input_state(skb);
family = XFRM_SPI_SKB_CB(skb)->family;
if (!(xo->status & CRYPTO_SUCCESS)) {
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 73ad8c8ef344..23468672a767 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -44,7 +44,7 @@ static int xfrm_skb_check_space(struct sk_buff *skb)
static struct dst_entry *skb_dst_pop(struct sk_buff *skb)
{
- struct dst_entry *child = dst_clone(skb_dst(skb)->child);
+ struct dst_entry *child = dst_clone(xfrm_dst_child(skb_dst(skb)));
skb_dst_drop(skb);
return child;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 9542975eb2f9..e3a5aca9cdda 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -54,7 +54,7 @@ static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
static struct kmem_cache *xfrm_dst_cache __read_mostly;
static __read_mostly seqcount_t xfrm_policy_hash_generation;
-static void xfrm_init_pmtu(struct dst_entry *dst);
+static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr);
static int stale_bundle(struct dst_entry *dst);
static int xfrm_bundle_ok(struct xfrm_dst *xdst);
static void xfrm_policy_queue_process(struct timer_list *t);
@@ -1251,7 +1251,7 @@ EXPORT_SYMBOL(xfrm_policy_delete);
int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
{
- struct net *net = xp_net(pol);
+ struct net *net = sock_net(sk);
struct xfrm_policy *old_pol;
#ifdef CONFIG_XFRM_SUB_POLICY
@@ -1538,7 +1538,9 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
*/
static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
- struct xfrm_state **xfrm, int nx,
+ struct xfrm_state **xfrm,
+ struct xfrm_dst **bundle,
+ int nx,
const struct flowi *fl,
struct dst_entry *dst)
{
@@ -1546,8 +1548,8 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
unsigned long now = jiffies;
struct net_device *dev;
struct xfrm_mode *inner_mode;
- struct dst_entry *dst_prev = NULL;
- struct dst_entry *dst0 = NULL;
+ struct xfrm_dst *xdst_prev = NULL;
+ struct xfrm_dst *xdst0 = NULL;
int i = 0;
int err;
int header_len = 0;
@@ -1573,13 +1575,14 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
goto put_states;
}
- if (!dst_prev)
- dst0 = dst1;
+ bundle[i] = xdst;
+ if (!xdst_prev)
+ xdst0 = xdst;
else
/* Ref count is taken during xfrm_alloc_dst()
* No need to do dst_clone() on dst1
*/
- dst_prev->child = dst1;
+ xfrm_dst_set_child(xdst_prev, &xdst->u.dst);
if (xfrm[i]->sel.family == AF_UNSPEC) {
inner_mode = xfrm_ip2inner_mode(xfrm[i],
@@ -1616,8 +1619,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
dst1->input = dst_discard;
dst1->output = inner_mode->afinfo->output;
- dst1->next = dst_prev;
- dst_prev = dst1;
+ xdst_prev = xdst;
header_len += xfrm[i]->props.header_len;
if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
@@ -1625,40 +1627,39 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
trailer_len += xfrm[i]->props.trailer_len;
}
- dst_prev->child = dst;
- dst0->path = dst;
+ xfrm_dst_set_child(xdst_prev, dst);
+ xdst0->path = dst;
err = -ENODEV;
dev = dst->dev;
if (!dev)
goto free_dst;
- xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
- xfrm_init_pmtu(dst_prev);
-
- for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
- struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
+ xfrm_init_path(xdst0, dst, nfheader_len);
+ xfrm_init_pmtu(bundle, nx);
- err = xfrm_fill_dst(xdst, dev, fl);
+ for (xdst_prev = xdst0; xdst_prev != (struct xfrm_dst *)dst;
+ xdst_prev = (struct xfrm_dst *) xfrm_dst_child(&xdst_prev->u.dst)) {
+ err = xfrm_fill_dst(xdst_prev, dev, fl);
if (err)
goto free_dst;
- dst_prev->header_len = header_len;
- dst_prev->trailer_len = trailer_len;
- header_len -= xdst->u.dst.xfrm->props.header_len;
- trailer_len -= xdst->u.dst.xfrm->props.trailer_len;
+ xdst_prev->u.dst.header_len = header_len;
+ xdst_prev->u.dst.trailer_len = trailer_len;
+ header_len -= xdst_prev->u.dst.xfrm->props.header_len;
+ trailer_len -= xdst_prev->u.dst.xfrm->props.trailer_len;
}
out:
- return dst0;
+ return &xdst0->u.dst;
put_states:
for (; i < nx; i++)
xfrm_state_put(xfrm[i]);
free_dst:
- if (dst0)
- dst_release_immediate(dst0);
- dst0 = ERR_PTR(err);
+ if (xdst0)
+ dst_release_immediate(&xdst0->u.dst);
+ xdst0 = ERR_PTR(err);
goto out;
}
@@ -1800,7 +1801,7 @@ static bool xfrm_xdst_can_reuse(struct xfrm_dst *xdst,
for (i = 0; i < num; i++) {
if (!dst || dst->xfrm != xfrm[i])
return false;
- dst = dst->child;
+ dst = xfrm_dst_child(dst);
}
return xfrm_bundle_ok(xdst);
@@ -1813,6 +1814,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
{
struct net *net = xp_net(pols[0]);
struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
+ struct xfrm_dst *bundle[XFRM_MAX_DEPTH];
struct xfrm_dst *xdst, *old;
struct dst_entry *dst;
int err;
@@ -1840,7 +1842,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
old = xdst;
- dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
+ dst = xfrm_bundle_create(pols[0], xfrm, bundle, err, fl, dst_orig);
if (IS_ERR(dst)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
return ERR_CAST(dst);
@@ -1880,8 +1882,8 @@ static void xfrm_policy_queue_process(struct timer_list *t)
xfrm_decode_session(skb, &fl, dst->ops->family);
spin_unlock(&pq->hold_queue.lock);
- dst_hold(dst->path);
- dst = xfrm_lookup(net, dst->path, &fl, sk, 0);
+ dst_hold(xfrm_dst_path(dst));
+ dst = xfrm_lookup(net, xfrm_dst_path(dst), &fl, sk, 0);
if (IS_ERR(dst))
goto purge_queue;
@@ -1910,8 +1912,8 @@ static void xfrm_policy_queue_process(struct timer_list *t)
skb = __skb_dequeue(&list);
xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
- dst_hold(skb_dst(skb)->path);
- dst = xfrm_lookup(net, skb_dst(skb)->path, &fl, skb->sk, 0);
+ dst_hold(xfrm_dst_path(skb_dst(skb)));
+ dst = xfrm_lookup(net, xfrm_dst_path(skb_dst(skb)), &fl, skb->sk, 0);
if (IS_ERR(dst)) {
kfree_skb(skb);
continue;
@@ -2012,8 +2014,8 @@ static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
dst1->output = xdst_queue_output;
dst_hold(dst);
- dst1->child = dst;
- dst1->path = dst;
+ xfrm_dst_set_child(xdst, dst);
+ xdst->path = dst;
xfrm_init_path((struct xfrm_dst *)dst1, dst, 0);
@@ -2576,7 +2578,7 @@ static int stale_bundle(struct dst_entry *dst)
void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
{
- while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
+ while ((dst = xfrm_dst_child(dst)) && dst->xfrm && dst->dev == dev) {
dst->dev = dev_net(dev)->loopback_dev;
dev_hold(dst->dev);
dev_put(dev);
@@ -2600,13 +2602,15 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
return dst;
}
-static void xfrm_init_pmtu(struct dst_entry *dst)
+static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr)
{
- do {
- struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+ while (nr--) {
+ struct xfrm_dst *xdst = bundle[nr];
u32 pmtu, route_mtu_cached;
+ struct dst_entry *dst;
- pmtu = dst_mtu(dst->child);
+ dst = &xdst->u.dst;
+ pmtu = dst_mtu(xfrm_dst_child(dst));
xdst->child_mtu_cached = pmtu;
pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
@@ -2618,7 +2622,7 @@ static void xfrm_init_pmtu(struct dst_entry *dst)
pmtu = route_mtu_cached;
dst_metric_set(dst, RTAX_MTU, pmtu);
- } while ((dst = dst->next));
+ }
}
/* Check that the bundle accepts the flow and its components are
@@ -2627,19 +2631,20 @@ static void xfrm_init_pmtu(struct dst_entry *dst)
static int xfrm_bundle_ok(struct xfrm_dst *first)
{
+ struct xfrm_dst *bundle[XFRM_MAX_DEPTH];
struct dst_entry *dst = &first->u.dst;
- struct xfrm_dst *last;
+ struct xfrm_dst *xdst;
+ int start_from, nr;
u32 mtu;
- if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
+ if (!dst_check(xfrm_dst_path(dst), ((struct xfrm_dst *)dst)->path_cookie) ||
(dst->dev && !netif_running(dst->dev)))
return 0;
if (dst->flags & DST_XFRM_QUEUE)
return 1;
- last = NULL;
-
+ start_from = nr = 0;
do {
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
@@ -2651,9 +2656,11 @@ static int xfrm_bundle_ok(struct xfrm_dst *first)
xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
return 0;
- mtu = dst_mtu(dst->child);
+ bundle[nr++] = xdst;
+
+ mtu = dst_mtu(xfrm_dst_child(dst));
if (xdst->child_mtu_cached != mtu) {
- last = xdst;
+ start_from = nr;
xdst->child_mtu_cached = mtu;
}
@@ -2661,30 +2668,30 @@ static int xfrm_bundle_ok(struct xfrm_dst *first)
return 0;
mtu = dst_mtu(xdst->route);
if (xdst->route_mtu_cached != mtu) {
- last = xdst;
+ start_from = nr;
xdst->route_mtu_cached = mtu;
}
- dst = dst->child;
+ dst = xfrm_dst_child(dst);
} while (dst->xfrm);
- if (likely(!last))
+ if (likely(!start_from))
return 1;
- mtu = last->child_mtu_cached;
- for (;;) {
- dst = &last->u.dst;
+ xdst = bundle[start_from - 1];
+ mtu = xdst->child_mtu_cached;
+ while (start_from--) {
+ dst = &xdst->u.dst;
mtu = xfrm_state_mtu(dst->xfrm, mtu);
- if (mtu > last->route_mtu_cached)
- mtu = last->route_mtu_cached;
+ if (mtu > xdst->route_mtu_cached)
+ mtu = xdst->route_mtu_cached;
dst_metric_set(dst, RTAX_MTU, mtu);
-
- if (last == first)
+ if (!start_from)
break;
- last = (struct xfrm_dst *)last->u.dst.next;
- last->child_mtu_cached = mtu;
+ xdst = bundle[start_from - 1];
+ xdst->child_mtu_cached = mtu;
}
return 1;
@@ -2692,22 +2699,20 @@ static int xfrm_bundle_ok(struct xfrm_dst *first)
static unsigned int xfrm_default_advmss(const struct dst_entry *dst)
{
- return dst_metric_advmss(dst->path);
+ return dst_metric_advmss(xfrm_dst_path(dst));
}
static unsigned int xfrm_mtu(const struct dst_entry *dst)
{
unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
- return mtu ? : dst_mtu(dst->path);
+ return mtu ? : dst_mtu(xfrm_dst_path(dst));
}
static const void *xfrm_get_dst_nexthop(const struct dst_entry *dst,
const void *daddr)
{
- const struct dst_entry *path = dst->path;
-
- for (; dst != path; dst = dst->child) {
+ while (dst->xfrm) {
const struct xfrm_state *xfrm = dst->xfrm;
if (xfrm->props.mode == XFRM_MODE_TRANSPORT)
@@ -2716,6 +2721,8 @@ static const void *xfrm_get_dst_nexthop(const struct dst_entry *dst,
daddr = xfrm->coaddr;
else if (!(xfrm->type->flags & XFRM_TYPE_LOCAL_COADDR))
daddr = &xfrm->id.daddr;
+
+ dst = xfrm_dst_child(dst);
}
return daddr;
}
@@ -2724,7 +2731,7 @@ static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst,
struct sk_buff *skb,
const void *daddr)
{
- const struct dst_entry *path = dst->path;
+ const struct dst_entry *path = xfrm_dst_path(dst);
if (!skb)
daddr = xfrm_get_dst_nexthop(dst, daddr);
@@ -2733,7 +2740,7 @@ static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst,
static void xfrm_confirm_neigh(const struct dst_entry *dst, const void *daddr)
{
- const struct dst_entry *path = dst->path;
+ const struct dst_entry *path = xfrm_dst_path(dst);
daddr = xfrm_get_dst_nexthop(dst, daddr);
path->ops->confirm_neigh(path, daddr);
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 8b23c5bcf8e8..02501817227b 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -666,7 +666,7 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff
if (unlikely(oseq < replay_esn->oseq)) {
XFRM_SKB_CB(skb)->seq.output.hi = ++oseq_hi;
xo->seq.hi = oseq_hi;
-
+ replay_esn->oseq_hi = oseq_hi;
if (replay_esn->oseq_hi == 0) {
replay_esn->oseq--;
replay_esn->oseq_hi--;
@@ -678,7 +678,6 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff
}
replay_esn->oseq = oseq;
- replay_esn->oseq_hi = oseq_hi;
if (xfrm_aevent_is_on(net))
x->repl->notify(x, XFRM_REPLAY_UPDATE);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 065d89606888..1b7856be3eeb 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -2048,6 +2048,13 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen
struct xfrm_mgr *km;
struct xfrm_policy *pol = NULL;
+ if (!optval && !optlen) {
+ xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL);
+ xfrm_sk_policy_insert(sk, XFRM_POLICY_OUT, NULL);
+ __sk_dst_reset(sk);
+ return 0;
+ }
+
if (optlen <= 0 || optlen > PAGE_SIZE)
return -EMSGSIZE;
diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c
index 370b749f5ee6..f6bbf8f50da3 100644
--- a/samples/bpf/tcbpf2_kern.c
+++ b/samples/bpf/tcbpf2_kern.c
@@ -35,12 +35,22 @@ struct geneve_opt {
u8 opt_data[8]; /* hard-coded to 8 byte */
};
+struct erspan_md2 {
+ __be32 timestamp;
+ __be16 sgt;
+ __be16 flags;
+};
+
struct vxlan_metadata {
u32 gbp;
};
struct erspan_metadata {
- __be32 index;
+ union {
+ __be32 index;
+ struct erspan_md2 md2;
+ } u;
+ int version;
};
SEC("gre_set_tunnel")
@@ -81,6 +91,49 @@ int _gre_get_tunnel(struct __sk_buff *skb)
return TC_ACT_OK;
}
+SEC("ip6gretap_set_tunnel")
+int _ip6gretap_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key;
+ int ret;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv6[3] = _htonl(0x11); /* ::11 */
+ key.tunnel_id = 2;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+ key.tunnel_label = 0xabcde;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("ip6gretap_get_tunnel")
+int _ip6gretap_get_tunnel(struct __sk_buff *skb)
+{
+ char fmt[] = "key %d remote ip6 ::%x label %x\n";
+ struct bpf_tunnel_key key;
+ int ret;
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
+
+ return TC_ACT_OK;
+}
+
SEC("erspan_set_tunnel")
int _erspan_set_tunnel(struct __sk_buff *skb)
{
@@ -100,7 +153,18 @@ int _erspan_set_tunnel(struct __sk_buff *skb)
return TC_ACT_SHOT;
}
- md.index = htonl(123);
+ __builtin_memset(&md, 0, sizeof(md));
+#ifdef ERSPAN_V1
+ md.version = 1;
+ md.u.index = htonl(123);
+#else
+ u8 direction = 1;
+ u16 hwid = 7;
+
+ md.version = 2;
+ md.u.md2.flags = htons((direction << 3) | (hwid << 4));
+#endif
+
ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
if (ret < 0) {
ERROR(ret);
@@ -113,7 +177,7 @@ int _erspan_set_tunnel(struct __sk_buff *skb)
SEC("erspan_get_tunnel")
int _erspan_get_tunnel(struct __sk_buff *skb)
{
- char fmt[] = "key %d remote ip 0x%x erspan index 0x%x\n";
+ char fmt[] = "key %d remote ip 0x%x erspan version %d\n";
struct bpf_tunnel_key key;
struct erspan_metadata md;
u32 index;
@@ -131,9 +195,105 @@ int _erspan_get_tunnel(struct __sk_buff *skb)
return TC_ACT_SHOT;
}
- index = bpf_ntohl(md.index);
bpf_trace_printk(fmt, sizeof(fmt),
- key.tunnel_id, key.remote_ipv4, index);
+ key.tunnel_id, key.remote_ipv4, md.version);
+
+#ifdef ERSPAN_V1
+ char fmt2[] = "\tindex %x\n";
+
+ index = bpf_ntohl(md.u.index);
+ bpf_trace_printk(fmt2, sizeof(fmt2), index);
+#else
+ char fmt2[] = "\tdirection %d hwid %x timestamp %u\n";
+
+ bpf_trace_printk(fmt2, sizeof(fmt2),
+ (ntohs(md.u.md2.flags) >> 3) & 0x1,
+ (ntohs(md.u.md2.flags) >> 4) & 0x3f,
+ bpf_ntohl(md.u.md2.timestamp));
+#endif
+
+ return TC_ACT_OK;
+}
+
+SEC("ip4ip6erspan_set_tunnel")
+int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key;
+ struct erspan_metadata md;
+ int ret;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv6[3] = _htonl(0x11);
+ key.tunnel_id = 2;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ __builtin_memset(&md, 0, sizeof(md));
+
+#ifdef ERSPAN_V1
+ md.u.index = htonl(123);
+ md.version = 1;
+#else
+ u8 direction = 0;
+ u16 hwid = 17;
+
+ md.version = 2;
+ md.u.md2.flags = htons((direction << 3) | (hwid << 4));
+#endif
+
+ ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("ip4ip6erspan_get_tunnel")
+int _ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
+{
+ char fmt[] = "ip6erspan get key %d remote ip6 ::%x erspan version %d\n";
+ struct bpf_tunnel_key key;
+ struct erspan_metadata md;
+ u32 index;
+ int ret;
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ key.tunnel_id, key.remote_ipv4, md.version);
+
+#ifdef ERSPAN_V1
+ char fmt2[] = "\tindex %x\n";
+
+ index = bpf_ntohl(md.u.index);
+ bpf_trace_printk(fmt2, sizeof(fmt2), index);
+#else
+ char fmt2[] = "\tdirection %d hwid %x timestamp %u\n";
+
+ bpf_trace_printk(fmt2, sizeof(fmt2),
+ (ntohs(md.u.md2.flags) >> 3) & 0x1,
+ (ntohs(md.u.md2.flags) >> 4) & 0x3f,
+ bpf_ntohl(md.u.md2.timestamp));
+#endif
return TC_ACT_OK;
}
diff --git a/samples/bpf/test_cgrp2_attach2.c b/samples/bpf/test_cgrp2_attach2.c
index 3e8232cc04a8..1af412ec6007 100644
--- a/samples/bpf/test_cgrp2_attach2.c
+++ b/samples/bpf/test_cgrp2_attach2.c
@@ -78,7 +78,8 @@ static int test_foo_bar(void)
if (join_cgroup(FOO))
goto err;
- if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 1)) {
+ if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE)) {
log_err("Attaching prog to /foo");
goto err;
}
@@ -97,7 +98,8 @@ static int test_foo_bar(void)
printf("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n");
assert(system(PING_CMD) != 0);
- if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) {
+ if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE)) {
log_err("Attaching prog to /foo/bar");
goto err;
}
@@ -114,7 +116,8 @@ static int test_foo_bar(void)
"This ping in cgroup /foo/bar should fail...\n");
assert(system(PING_CMD) != 0);
- if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) {
+ if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE)) {
log_err("Attaching prog to /foo/bar");
goto err;
}
@@ -128,7 +131,8 @@ static int test_foo_bar(void)
"This ping in cgroup /foo/bar should pass...\n");
assert(system(PING_CMD) == 0);
- if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) {
+ if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE)) {
log_err("Attaching prog to /foo/bar");
goto err;
}
@@ -161,13 +165,15 @@ static int test_foo_bar(void)
goto err;
}
- if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) {
+ if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE)) {
errno = 0;
log_err("Unexpected success attaching overridable prog to /foo/bar");
goto err;
}
- if (!bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 1)) {
+ if (!bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE)) {
errno = 0;
log_err("Unexpected success attaching overridable prog to /foo");
goto err;
@@ -273,27 +279,33 @@ static int test_multiprog(void)
if (join_cgroup("/cg1/cg2/cg3/cg4/cg5"))
goto err;
- if (bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS, 2)) {
+ if (bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_MULTI)) {
log_err("Attaching prog to cg1");
goto err;
}
- if (!bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS, 2)) {
+ if (!bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_MULTI)) {
log_err("Unexpected success attaching the same prog to cg1");
goto err;
}
- if (bpf_prog_attach(allow_prog[1], cg1, BPF_CGROUP_INET_EGRESS, 2)) {
+ if (bpf_prog_attach(allow_prog[1], cg1, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_MULTI)) {
log_err("Attaching prog2 to cg1");
goto err;
}
- if (bpf_prog_attach(allow_prog[2], cg2, BPF_CGROUP_INET_EGRESS, 1)) {
+ if (bpf_prog_attach(allow_prog[2], cg2, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE)) {
log_err("Attaching prog to cg2");
goto err;
}
- if (bpf_prog_attach(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS, 2)) {
+ if (bpf_prog_attach(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_MULTI)) {
log_err("Attaching prog to cg3");
goto err;
}
- if (bpf_prog_attach(allow_prog[4], cg4, BPF_CGROUP_INET_EGRESS, 1)) {
+ if (bpf_prog_attach(allow_prog[4], cg4, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE)) {
log_err("Attaching prog to cg4");
goto err;
}
diff --git a/samples/bpf/test_tunnel_bpf.sh b/samples/bpf/test_tunnel_bpf.sh
index 312e1722a39f..ae7f7c38309b 100755
--- a/samples/bpf/test_tunnel_bpf.sh
+++ b/samples/bpf/test_tunnel_bpf.sh
@@ -33,10 +33,43 @@ function add_gre_tunnel {
ip addr add dev $DEV 10.1.1.200/24
}
-function add_erspan_tunnel {
+function add_ip6gretap_tunnel {
+
+ # assign ipv6 address
+ ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+ ip netns exec at_ns0 ip link set dev veth0 up
+ ip addr add dev veth1 ::22/96
+ ip link set dev veth1 up
+
# in namespace
ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq key 2 local 172.16.1.100 remote 172.16.1.200 erspan 123
+ ip link add dev $DEV_NS type $TYPE flowlabel 0xbcdef key 2 \
+ local ::11 remote ::22
+
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+ ip netns exec at_ns0 ip addr add dev $DEV_NS fc80::100/96
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+ # out of namespace
+ ip link add dev $DEV type $TYPE external
+ ip addr add dev $DEV 10.1.1.200/24
+ ip addr add dev $DEV fc80::200/24
+ ip link set dev $DEV up
+}
+
+function add_erspan_tunnel {
+ # in namespace
+ if [ "$1" == "v1" ]; then
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE seq key 2 \
+ local 172.16.1.100 remote 172.16.1.200 \
+ erspan_ver 1 erspan 123
+ else
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE seq key 2 \
+ local 172.16.1.100 remote 172.16.1.200 \
+ erspan_ver 2 erspan_dir 1 erspan_hwid 3
+ fi
ip netns exec at_ns0 ip link set dev $DEV_NS up
ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
@@ -46,6 +79,35 @@ function add_erspan_tunnel {
ip addr add dev $DEV 10.1.1.200/24
}
+function add_ip6erspan_tunnel {
+
+ # assign ipv6 address
+ ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+ ip netns exec at_ns0 ip link set dev veth0 up
+ ip addr add dev veth1 ::22/96
+ ip link set dev veth1 up
+
+ # in namespace
+ if [ "$1" == "v1" ]; then
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE seq key 2 \
+ local ::11 remote ::22 \
+ erspan_ver 1 erspan 123
+ else
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE seq key 2 \
+ local ::11 remote ::22 \
+ erspan_ver 2 erspan_dir 1 erspan_hwid 7
+ fi
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+ # out of namespace
+ ip link add dev $DEV type $TYPE external
+ ip addr add dev $DEV 10.1.1.200/24
+ ip link set dev $DEV up
+}
+
function add_vxlan_tunnel {
# Set static ARP entry here because iptables set-mark works
# on L3 packet, as a result not applying to ARP packets,
@@ -113,18 +175,65 @@ function test_gre {
cleanup
}
+function test_ip6gre {
+ TYPE=ip6gre
+ DEV_NS=ip6gre00
+ DEV=ip6gre11
+ config_device
+ # reuse the ip6gretap function
+ add_ip6gretap_tunnel
+ attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
+ # underlay
+ ping6 -c 4 ::11
+ # overlay: ipv4 over ipv6
+ ip netns exec at_ns0 ping -c 1 10.1.1.200
+ ping -c 1 10.1.1.100
+ # overlay: ipv6 over ipv6
+ ip netns exec at_ns0 ping6 -c 1 fc80::200
+ cleanup
+}
+
+function test_ip6gretap {
+ TYPE=ip6gretap
+ DEV_NS=ip6gretap00
+ DEV=ip6gretap11
+ config_device
+ add_ip6gretap_tunnel
+ attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
+ # underlay
+ ping6 -c 4 ::11
+ # overlay: ipv4 over ipv6
+ ip netns exec at_ns0 ping -i .2 -c 1 10.1.1.200
+ ping -c 1 10.1.1.100
+ # overlay: ipv6 over ipv6
+ ip netns exec at_ns0 ping6 -c 1 fc80::200
+ cleanup
+}
+
function test_erspan {
TYPE=erspan
DEV_NS=erspan00
DEV=erspan11
config_device
- add_erspan_tunnel
+ add_erspan_tunnel $1
attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel
ping -c 1 10.1.1.100
ip netns exec at_ns0 ping -c 1 10.1.1.200
cleanup
}
+function test_ip6erspan {
+ TYPE=ip6erspan
+ DEV_NS=ip6erspan00
+ DEV=ip6erspan11
+ config_device
+ add_ip6erspan_tunnel $1
+ attach_bpf $DEV ip4ip6erspan_set_tunnel ip4ip6erspan_get_tunnel
+ ping6 -c 3 ::11
+ ip netns exec at_ns0 ping -c 1 10.1.1.200
+ cleanup
+}
+
function test_vxlan {
TYPE=vxlan
DEV_NS=vxlan00
@@ -175,9 +284,12 @@ function cleanup {
ip link del veth1
ip link del ipip11
ip link del gretap11
+ ip link del ip6gre11
+ ip link del ip6gretap11
ip link del vxlan11
ip link del geneve11
ip link del erspan11
+ ip link del ip6erspan11
pkill tcpdump
pkill cat
set -ex
@@ -187,8 +299,16 @@ trap cleanup 0 2 3 6 9
cleanup
echo "Testing GRE tunnel..."
test_gre
+echo "Testing IP6GRE tunnel..."
+test_ip6gre
+echo "Testing IP6GRETAP tunnel..."
+test_ip6gretap
echo "Testing ERSPAN tunnel..."
-test_erspan
+test_erspan v1
+test_erspan v2
+echo "Testing IP6ERSPAN tunnel..."
+test_ip6erspan v1
+test_ip6erspan v2
echo "Testing VXLAN tunnel..."
test_vxlan
echo "Testing GENEVE tunnel..."
diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c
index 56e354fcdfc6..928188902901 100644
--- a/security/selinux/xfrm.c
+++ b/security/selinux/xfrm.c
@@ -452,7 +452,7 @@ int selinux_xfrm_postroute_last(u32 sk_sid, struct sk_buff *skb,
if (dst) {
struct dst_entry *iter;
- for (iter = dst; iter != NULL; iter = iter->child) {
+ for (iter = dst; iter != NULL; iter = xfrm_dst_child(iter)) {
struct xfrm_state *x = iter->xfrm;
if (x && selinux_authorizable_xfrm(x))
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 792af7c3b74f..4a9fb8fb445f 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -18,9 +18,10 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
- sockmap_verdict_prog.o dev_cgroup.o
+ sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o
-TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh
+TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \
+ test_offload.py
include ../lib.mk
diff --git a/tools/testing/selftests/bpf/sample_ret0.c b/tools/testing/selftests/bpf/sample_ret0.c
new file mode 100644
index 000000000000..fec99750d6ea
--- /dev/null
+++ b/tools/testing/selftests/bpf/sample_ret0.c
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
+
+/* Sample program which should always load for testing control paths. */
+int func()
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c
index 8591c89c0828..fe916d29e166 100644
--- a/tools/testing/selftests/bpf/test_align.c
+++ b/tools/testing/selftests/bpf/test_align.c
@@ -64,11 +64,11 @@ static struct bpf_align_test tests[] = {
.matches = {
{1, "R1=ctx(id=0,off=0,imm=0)"},
{1, "R10=fp0"},
- {1, "R3=inv2"},
- {2, "R3=inv4"},
- {3, "R3=inv8"},
- {4, "R3=inv16"},
- {5, "R3=inv32"},
+ {1, "R3_w=inv2"},
+ {2, "R3_w=inv4"},
+ {3, "R3_w=inv8"},
+ {4, "R3_w=inv16"},
+ {5, "R3_w=inv32"},
},
},
{
@@ -92,17 +92,17 @@ static struct bpf_align_test tests[] = {
.matches = {
{1, "R1=ctx(id=0,off=0,imm=0)"},
{1, "R10=fp0"},
- {1, "R3=inv1"},
- {2, "R3=inv2"},
- {3, "R3=inv4"},
- {4, "R3=inv8"},
- {5, "R3=inv16"},
- {6, "R3=inv1"},
- {7, "R4=inv32"},
- {8, "R4=inv16"},
- {9, "R4=inv8"},
- {10, "R4=inv4"},
- {11, "R4=inv2"},
+ {1, "R3_w=inv1"},
+ {2, "R3_w=inv2"},
+ {3, "R3_w=inv4"},
+ {4, "R3_w=inv8"},
+ {5, "R3_w=inv16"},
+ {6, "R3_w=inv1"},
+ {7, "R4_w=inv32"},
+ {8, "R4_w=inv16"},
+ {9, "R4_w=inv8"},
+ {10, "R4_w=inv4"},
+ {11, "R4_w=inv2"},
},
},
{
@@ -121,12 +121,12 @@ static struct bpf_align_test tests[] = {
.matches = {
{1, "R1=ctx(id=0,off=0,imm=0)"},
{1, "R10=fp0"},
- {1, "R3=inv4"},
- {2, "R3=inv8"},
- {3, "R3=inv10"},
- {4, "R4=inv8"},
- {5, "R4=inv12"},
- {6, "R4=inv14"},
+ {1, "R3_w=inv4"},
+ {2, "R3_w=inv8"},
+ {3, "R3_w=inv10"},
+ {4, "R4_w=inv8"},
+ {5, "R4_w=inv12"},
+ {6, "R4_w=inv14"},
},
},
{
@@ -143,10 +143,10 @@ static struct bpf_align_test tests[] = {
.matches = {
{1, "R1=ctx(id=0,off=0,imm=0)"},
{1, "R10=fp0"},
- {1, "R3=inv7"},
- {2, "R3=inv7"},
- {3, "R3=inv14"},
- {4, "R3=inv56"},
+ {1, "R3_w=inv7"},
+ {2, "R3_w=inv7"},
+ {3, "R3_w=inv14"},
+ {4, "R3_w=inv56"},
},
},
@@ -185,18 +185,18 @@ static struct bpf_align_test tests[] = {
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
{7, "R0=pkt(id=0,off=8,r=8,imm=0)"},
- {7, "R3=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- {8, "R3=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
- {9, "R3=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
- {10, "R3=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
- {11, "R3=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
+ {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {8, "R3_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
+ {9, "R3_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {10, "R3_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
+ {11, "R3_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
{18, "R3=pkt_end(id=0,off=0,imm=0)"},
- {18, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- {19, "R4=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"},
- {20, "R4=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
- {21, "R4=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
- {22, "R4=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
- {23, "R4=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
+ {18, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {19, "R4_w=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"},
+ {20, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
+ {21, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
+ {22, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {23, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
},
},
{
@@ -217,16 +217,16 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {7, "R3=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- {8, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- {9, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- {10, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- {11, "R4=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
- {12, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- {13, "R4=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
- {14, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- {15, "R4=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
- {16, "R4=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
+ {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {8, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {9, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {11, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
+ {12, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {13, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {14, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {15, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
+ {16, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
},
},
{
@@ -257,14 +257,14 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {4, "R5=pkt(id=0,off=0,r=0,imm=0)"},
- {5, "R5=pkt(id=0,off=14,r=0,imm=0)"},
- {6, "R4=pkt(id=0,off=14,r=0,imm=0)"},
+ {4, "R5_w=pkt(id=0,off=0,r=0,imm=0)"},
+ {5, "R5_w=pkt(id=0,off=14,r=0,imm=0)"},
+ {6, "R4_w=pkt(id=0,off=14,r=0,imm=0)"},
{10, "R2=pkt(id=0,off=0,r=18,imm=0)"},
{10, "R5=pkt(id=0,off=14,r=18,imm=0)"},
- {10, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- {14, "R4=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
- {15, "R4=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
+ {10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {14, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
+ {15, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
},
},
{
@@ -320,11 +320,11 @@ static struct bpf_align_test tests[] = {
* alignment of 4.
*/
{8, "R2=pkt(id=0,off=0,r=8,imm=0)"},
- {8, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* Offset is added to packet pointer R5, resulting in
* known fixed offset, and variable offset from R6.
*/
- {11, "R5=pkt(id=1,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {11, "R5_w=pkt(id=1,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* At the time the word size load is performed from R5,
* it's total offset is NET_IP_ALIGN + reg->off (0) +
* reg->aux_off (14) which is 16. Then the variable
@@ -336,11 +336,11 @@ static struct bpf_align_test tests[] = {
/* Variable offset is added to R5 packet pointer,
* resulting in auxiliary alignment of 4.
*/
- {18, "R5=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {18, "R5_w=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* Constant offset is added to R5, resulting in
* reg->off of 14.
*/
- {19, "R5=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {19, "R5_w=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off
* (14) which is 16. Then the variable offset is 4-byte
@@ -352,18 +352,18 @@ static struct bpf_align_test tests[] = {
/* Constant offset is added to R5 packet pointer,
* resulting in reg->off value of 14.
*/
- {26, "R5=pkt(id=0,off=14,r=8"},
+ {26, "R5_w=pkt(id=0,off=14,r=8"},
/* Variable offset is added to R5, resulting in a
* variable offset of (4n).
*/
- {27, "R5=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {27, "R5_w=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* Constant is added to R5 again, setting reg->off to 18. */
- {28, "R5=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {28, "R5_w=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* And once more we add a variable; resulting var_off
* is still (4n), fixed offset is not changed.
* Also, we create a new reg->id.
*/
- {29, "R5=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc))"},
+ {29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc))"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (18)
* which is 20. Then the variable offset is (4n), so
@@ -410,11 +410,11 @@ static struct bpf_align_test tests[] = {
* alignment of 4.
*/
{8, "R2=pkt(id=0,off=0,r=8,imm=0)"},
- {8, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* Adding 14 makes R6 be (4n+2) */
- {9, "R6=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ {9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
/* Packet pointer has (4n+2) offset */
- {11, "R5=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ {11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
{13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
@@ -426,11 +426,11 @@ static struct bpf_align_test tests[] = {
/* Newly read value in R6 was shifted left by 2, so has
* known alignment of 4.
*/
- {18, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {18, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* Added (4n) to packet pointer's (4n+2) var_off, giving
* another (4n+2).
*/
- {19, "R5=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
+ {19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
{21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
@@ -473,11 +473,11 @@ static struct bpf_align_test tests[] = {
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.matches = {
- {4, "R5=pkt(id=0,off=0,r=0,imm=0)"},
+ {4, "R5_w=pkt(id=0,off=0,r=0,imm=0)"},
/* ptr & 0x40 == either 0 or 0x40 */
- {5, "R5=inv(id=0,umax_value=64,var_off=(0x0; 0x40))"},
+ {5, "R5_w=inv(id=0,umax_value=64,var_off=(0x0; 0x40))"},
/* ptr << 2 == unknown, (4n) */
- {7, "R5=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"},
+ {7, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"},
/* (4n) + 14 == (4n+2). We blow our bounds, because
* the add could overflow.
*/
@@ -485,7 +485,7 @@ static struct bpf_align_test tests[] = {
/* Checked s>=0 */
{10, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
/* packet pointer + nonnegative (4n+2) */
- {12, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+ {12, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
{14, "R4=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
* We checked the bounds, but it might have been able
@@ -530,11 +530,11 @@ static struct bpf_align_test tests[] = {
* alignment of 4.
*/
{7, "R2=pkt(id=0,off=0,r=8,imm=0)"},
- {9, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {9, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* Adding 14 makes R6 be (4n+2) */
- {10, "R6=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ {10, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
/* New unknown value in R7 is (4n) */
- {11, "R7=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {11, "R7_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* Subtracting it from R6 blows our unsigned bounds */
{12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,var_off=(0x2; 0xfffffffffffffffc))"},
/* Checked s>= 0 */
@@ -583,15 +583,15 @@ static struct bpf_align_test tests[] = {
* alignment of 4.
*/
{7, "R2=pkt(id=0,off=0,r=8,imm=0)"},
- {10, "R6=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"},
+ {10, "R6_w=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"},
/* Adding 14 makes R6 be (4n+2) */
- {11, "R6=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"},
+ {11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"},
/* Subtracting from packet pointer overflows ubounds */
- {13, "R5=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c))"},
+ {13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c))"},
/* New unknown value in R7 is (4n), >= 76 */
- {15, "R7=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"},
+ {15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"},
/* Adding it to packet pointer gives nice bounds again */
- {16, "R5=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"},
+ {16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
* which is 2. Then the variable offset is (4n+2), so
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
new file mode 100755
index 000000000000..3914f7a4585a
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -0,0 +1,681 @@
+#!/usr/bin/python3
+
+# Copyright (C) 2017 Netronome Systems, Inc.
+#
+# This software is licensed under the GNU General License Version 2,
+# June 1991 as shown in the file COPYING in the top-level directory of this
+# source tree.
+#
+# THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS"
+# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
+# BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE
+# OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
+# THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+from datetime import datetime
+import argparse
+import json
+import os
+import pprint
+import subprocess
+import time
+
+logfile = None
+log_level = 1
+bpf_test_dir = os.path.dirname(os.path.realpath(__file__))
+pp = pprint.PrettyPrinter()
+devs = [] # devices we created for clean up
+files = [] # files to be removed
+
+def log_get_sec(level=0):
+ return "*" * (log_level + level)
+
+def log_level_inc(add=1):
+ global log_level
+ log_level += add
+
+def log_level_dec(sub=1):
+ global log_level
+ log_level -= sub
+
+def log_level_set(level):
+ global log_level
+ log_level = level
+
+def log(header, data, level=None):
+ """
+ Output to an optional log.
+ """
+ if logfile is None:
+ return
+ if level is not None:
+ log_level_set(level)
+
+ if not isinstance(data, str):
+ data = pp.pformat(data)
+
+ if len(header):
+ logfile.write("\n" + log_get_sec() + " ")
+ logfile.write(header)
+ if len(header) and len(data.strip()):
+ logfile.write("\n")
+ logfile.write(data)
+
+def skip(cond, msg):
+ if not cond:
+ return
+ print("SKIP: " + msg)
+ log("SKIP: " + msg, "", level=1)
+ os.sys.exit(0)
+
+def fail(cond, msg):
+ if not cond:
+ return
+ print("FAIL: " + msg)
+ log("FAIL: " + msg, "", level=1)
+ os.sys.exit(1)
+
+def start_test(msg):
+ log(msg, "", level=1)
+ log_level_inc()
+ print(msg)
+
+def cmd(cmd, shell=True, include_stderr=False, background=False, fail=True):
+ """
+ Run a command in subprocess and return tuple of (retval, stdout);
+ optionally return stderr as well as third value.
+ """
+ proc = subprocess.Popen(cmd, shell=shell, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ if background:
+ msg = "%s START: %s" % (log_get_sec(1),
+ datetime.now().strftime("%H:%M:%S.%f"))
+ log("BKG " + proc.args, msg)
+ return proc
+
+ return cmd_result(proc, include_stderr=include_stderr, fail=fail)
+
+def cmd_result(proc, include_stderr=False, fail=False):
+ stdout, stderr = proc.communicate()
+ stdout = stdout.decode("utf-8")
+ stderr = stderr.decode("utf-8")
+ proc.stdout.close()
+ proc.stderr.close()
+
+ stderr = "\n" + stderr
+ if stderr[-1] == "\n":
+ stderr = stderr[:-1]
+
+ sec = log_get_sec(1)
+ log("CMD " + proc.args,
+ "RETCODE: %d\n%s STDOUT:\n%s%s STDERR:%s\n%s END: %s" %
+ (proc.returncode, sec, stdout, sec, stderr,
+ sec, datetime.now().strftime("%H:%M:%S.%f")))
+
+ if proc.returncode != 0 and fail:
+ if len(stderr) > 0 and stderr[-1] == "\n":
+ stderr = stderr[:-1]
+ raise Exception("Command failed: %s\n%s" % (proc.args, stderr))
+
+ if include_stderr:
+ return proc.returncode, stdout, stderr
+ else:
+ return proc.returncode, stdout
+
+def rm(f):
+ cmd("rm -f %s" % (f))
+ if f in files:
+ files.remove(f)
+
+def tool(name, args, flags, JSON=True, fail=True):
+ params = ""
+ if JSON:
+ params += "%s " % (flags["json"])
+
+ ret, out = cmd(name + " " + params + args, fail=fail)
+ if JSON and len(out.strip()) != 0:
+ return ret, json.loads(out)
+ else:
+ return ret, out
+
+def bpftool(args, JSON=True, fail=True):
+ return tool("bpftool", args, {"json":"-p"}, JSON=JSON, fail=fail)
+
+def bpftool_prog_list(expected=None):
+ _, progs = bpftool("prog show", JSON=True, fail=True)
+ if expected is not None:
+ if len(progs) != expected:
+ fail(True, "%d BPF programs loaded, expected %d" %
+ (len(progs), expected))
+ return progs
+
+def bpftool_prog_list_wait(expected=0, n_retry=20):
+ for i in range(n_retry):
+ nprogs = len(bpftool_prog_list())
+ if nprogs == expected:
+ return
+ time.sleep(0.05)
+ raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs))
+
+def ip(args, force=False, JSON=True, fail=True):
+ if force:
+ args = "-force " + args
+ return tool("ip", args, {"json":"-j"}, JSON=JSON, fail=fail)
+
+def tc(args, JSON=True, fail=True):
+ return tool("tc", args, {"json":"-p"}, JSON=JSON, fail=fail)
+
+def ethtool(dev, opt, args, fail=True):
+ return cmd("ethtool %s %s %s" % (opt, dev["ifname"], args), fail=fail)
+
+def bpf_obj(name, sec=".text", path=bpf_test_dir,):
+ return "obj %s sec %s" % (os.path.join(path, name), sec)
+
+def bpf_pinned(name):
+ return "pinned %s" % (name)
+
+def bpf_bytecode(bytecode):
+ return "bytecode \"%s\"" % (bytecode)
+
+class DebugfsDir:
+ """
+ Class for accessing DebugFS directories as a dictionary.
+ """
+
+ def __init__(self, path):
+ self.path = path
+ self._dict = self._debugfs_dir_read(path)
+
+ def __len__(self):
+ return len(self._dict.keys())
+
+ def __getitem__(self, key):
+ if type(key) is int:
+ key = list(self._dict.keys())[key]
+ return self._dict[key]
+
+ def __setitem__(self, key, value):
+ log("DebugFS set %s = %s" % (key, value), "")
+ log_level_inc()
+
+ cmd("echo '%s' > %s/%s" % (value, self.path, key))
+ log_level_dec()
+
+ _, out = cmd('cat %s/%s' % (self.path, key))
+ self._dict[key] = out.strip()
+
+ def _debugfs_dir_read(self, path):
+ dfs = {}
+
+ log("DebugFS state for %s" % (path), "")
+ log_level_inc(add=2)
+
+ _, out = cmd('ls ' + path)
+ for f in out.split():
+ p = os.path.join(path, f)
+ if os.path.isfile(p):
+ _, out = cmd('cat %s/%s' % (path, f))
+ dfs[f] = out.strip()
+ elif os.path.isdir(p):
+ dfs[f] = DebugfsDir(p)
+ else:
+ raise Exception("%s is neither file nor directory" % (p))
+
+ log_level_dec()
+ log("DebugFS state", dfs)
+ log_level_dec()
+
+ return dfs
+
+class NetdevSim:
+ """
+ Class for netdevsim netdevice and its attributes.
+ """
+
+ def __init__(self):
+ self.dev = self._netdevsim_create()
+ devs.append(self)
+
+ self.dfs_dir = '/sys/kernel/debug/netdevsim/%s' % (self.dev['ifname'])
+ self.dfs_refresh()
+
+ def __getitem__(self, key):
+ return self.dev[key]
+
+ def _netdevsim_create(self):
+ _, old = ip("link show")
+ ip("link add sim%d type netdevsim")
+ _, new = ip("link show")
+
+ for dev in new:
+ f = filter(lambda x: x["ifname"] == dev["ifname"], old)
+ if len(list(f)) == 0:
+ return dev
+
+ raise Exception("failed to create netdevsim device")
+
+ def remove(self):
+ devs.remove(self)
+ ip("link del dev %s" % (self.dev["ifname"]))
+
+ def dfs_refresh(self):
+ self.dfs = DebugfsDir(self.dfs_dir)
+ return self.dfs
+
+ def dfs_num_bound_progs(self):
+ path = os.path.join(self.dfs_dir, "bpf_bound_progs")
+ _, progs = cmd('ls %s' % (path))
+ return len(progs.split())
+
+ def dfs_get_bound_progs(self, expected):
+ progs = DebugfsDir(os.path.join(self.dfs_dir, "bpf_bound_progs"))
+ if expected is not None:
+ if len(progs) != expected:
+ fail(True, "%d BPF programs bound, expected %d" %
+ (len(progs), expected))
+ return progs
+
+ def wait_for_flush(self, bound=0, total=0, n_retry=20):
+ for i in range(n_retry):
+ nbound = self.dfs_num_bound_progs()
+ nprogs = len(bpftool_prog_list())
+ if nbound == bound and nprogs == total:
+ return
+ time.sleep(0.05)
+ raise Exception("Time out waiting for program counts to stabilize want %d/%d, have %d bound, %d loaded" % (bound, total, nbound, nprogs))
+
+ def set_mtu(self, mtu, fail=True):
+ return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu),
+ fail=fail)
+
+ def set_xdp(self, bpf, mode, force=False, fail=True):
+ return ip("link set dev %s xdp%s %s" % (self.dev["ifname"], mode, bpf),
+ force=force, fail=fail)
+
+ def unset_xdp(self, mode, force=False, fail=True):
+ return ip("link set dev %s xdp%s off" % (self.dev["ifname"], mode),
+ force=force, fail=fail)
+
+ def ip_link_show(self, xdp):
+ _, link = ip("link show dev %s" % (self['ifname']))
+ if len(link) > 1:
+ raise Exception("Multiple objects on ip link show")
+ if len(link) < 1:
+ return {}
+ fail(xdp != "xdp" in link,
+ "XDP program not reporting in iplink (reported %s, expected %s)" %
+ ("xdp" in link, xdp))
+ return link[0]
+
+ def tc_add_ingress(self):
+ tc("qdisc add dev %s ingress" % (self['ifname']))
+
+ def tc_del_ingress(self):
+ tc("qdisc del dev %s ingress" % (self['ifname']))
+
+ def tc_flush_filters(self, bound=0, total=0):
+ self.tc_del_ingress()
+ self.tc_add_ingress()
+ self.wait_for_flush(bound=bound, total=total)
+
+ def tc_show_ingress(self, expected=None):
+ # No JSON support, oh well...
+ flags = ["skip_sw", "skip_hw", "in_hw"]
+ named = ["protocol", "pref", "chain", "handle", "id", "tag"]
+
+ args = "-s filter show dev %s ingress" % (self['ifname'])
+ _, out = tc(args, JSON=False)
+
+ filters = []
+ lines = out.split('\n')
+ for line in lines:
+ words = line.split()
+ if "handle" not in words:
+ continue
+ fltr = {}
+ for flag in flags:
+ fltr[flag] = flag in words
+ for name in named:
+ try:
+ idx = words.index(name)
+ fltr[name] = words[idx + 1]
+ except ValueError:
+ pass
+ filters.append(fltr)
+
+ if expected is not None:
+ fail(len(filters) != expected,
+ "%d ingress filters loaded, expected %d" %
+ (len(filters), expected))
+ return filters
+
+ def cls_bpf_add_filter(self, bpf, da=False, skip_sw=False, skip_hw=False,
+ fail=True):
+ params = ""
+ if da:
+ params += " da"
+ if skip_sw:
+ params += " skip_sw"
+ if skip_hw:
+ params += " skip_hw"
+ return tc("filter add dev %s ingress bpf %s %s" %
+ (self['ifname'], bpf, params), fail=fail)
+
+ def set_ethtool_tc_offloads(self, enable, fail=True):
+ args = "hw-tc-offload %s" % ("on" if enable else "off")
+ return ethtool(self, "-K", args, fail=fail)
+
+################################################################################
+def clean_up():
+ for dev in devs:
+ dev.remove()
+ for f in files:
+ cmd("rm -f %s" % (f))
+
+def pin_prog(file_name, idx=0):
+ progs = bpftool_prog_list(expected=(idx + 1))
+ prog = progs[idx]
+ bpftool("prog pin id %d %s" % (prog["id"], file_name))
+ files.append(file_name)
+
+ return file_name, bpf_pinned(file_name)
+
+# Parse command line
+parser = argparse.ArgumentParser()
+parser.add_argument("--log", help="output verbose log to given file")
+args = parser.parse_args()
+if args.log:
+ logfile = open(args.log, 'w+')
+ logfile.write("# -*-Org-*-")
+
+log("Prepare...", "", level=1)
+log_level_inc()
+
+# Check permissions
+skip(os.getuid() != 0, "test must be run as root")
+
+# Check tools
+ret, progs = bpftool("prog", fail=False)
+skip(ret != 0, "bpftool not installed")
+# Check no BPF programs are loaded
+skip(len(progs) != 0, "BPF programs already loaded on the system")
+
+# Check netdevsim
+ret, out = cmd("modprobe netdevsim", fail=False)
+skip(ret != 0, "netdevsim module could not be loaded")
+
+# Check debugfs
+_, out = cmd("mount")
+if out.find("/sys/kernel/debug type debugfs") == -1:
+ cmd("mount -t debugfs none /sys/kernel/debug")
+
+# Check samples are compiled
+samples = ["sample_ret0.o"]
+for s in samples:
+ ret, out = cmd("ls %s/%s" % (bpf_test_dir, s), fail=False)
+ skip(ret != 0, "sample %s/%s not found, please compile it" %
+ (bpf_test_dir, s))
+
+try:
+ obj = bpf_obj("sample_ret0.o")
+ bytecode = bpf_bytecode("1,6 0 0 4294967295,")
+
+ start_test("Test destruction of generic XDP...")
+ sim = NetdevSim()
+ sim.set_xdp(obj, "generic")
+ sim.remove()
+ bpftool_prog_list_wait(expected=0)
+
+ sim = NetdevSim()
+ sim.tc_add_ingress()
+
+ start_test("Test TC non-offloaded...")
+ ret, _ = sim.cls_bpf_add_filter(obj, skip_hw=True, fail=False)
+ fail(ret != 0, "Software TC filter did not load")
+
+ start_test("Test TC non-offloaded isn't getting bound...")
+ ret, _ = sim.cls_bpf_add_filter(obj, fail=False)
+ fail(ret != 0, "Software TC filter did not load")
+ sim.dfs_get_bound_progs(expected=0)
+
+ sim.tc_flush_filters()
+
+ start_test("Test TC offloads are off by default...")
+ ret, _ = sim.cls_bpf_add_filter(obj, skip_sw=True, fail=False)
+ fail(ret == 0, "TC filter loaded without enabling TC offloads")
+ sim.wait_for_flush()
+
+ sim.set_ethtool_tc_offloads(True)
+ sim.dfs["bpf_tc_non_bound_accept"] = "Y"
+
+ start_test("Test TC offload by default...")
+ ret, _ = sim.cls_bpf_add_filter(obj, fail=False)
+ fail(ret != 0, "Software TC filter did not load")
+ sim.dfs_get_bound_progs(expected=0)
+ ingress = sim.tc_show_ingress(expected=1)
+ fltr = ingress[0]
+ fail(not fltr["in_hw"], "Filter not offloaded by default")
+
+ sim.tc_flush_filters()
+
+ start_test("Test TC cBPF bytcode tries offload by default...")
+ ret, _ = sim.cls_bpf_add_filter(bytecode, fail=False)
+ fail(ret != 0, "Software TC filter did not load")
+ sim.dfs_get_bound_progs(expected=0)
+ ingress = sim.tc_show_ingress(expected=1)
+ fltr = ingress[0]
+ fail(not fltr["in_hw"], "Bytecode not offloaded by default")
+
+ sim.tc_flush_filters()
+ sim.dfs["bpf_tc_non_bound_accept"] = "N"
+
+ start_test("Test TC cBPF unbound bytecode doesn't offload...")
+ ret, _ = sim.cls_bpf_add_filter(bytecode, skip_sw=True, fail=False)
+ fail(ret == 0, "TC bytecode loaded for offload")
+ sim.wait_for_flush()
+
+ start_test("Test TC offloads work...")
+ ret, _ = sim.cls_bpf_add_filter(obj, skip_sw=True, fail=False)
+ fail(ret != 0, "TC filter did not load with TC offloads enabled")
+
+ start_test("Test TC offload basics...")
+ dfs = sim.dfs_get_bound_progs(expected=1)
+ progs = bpftool_prog_list(expected=1)
+ ingress = sim.tc_show_ingress(expected=1)
+
+ dprog = dfs[0]
+ prog = progs[0]
+ fltr = ingress[0]
+ fail(fltr["skip_hw"], "TC does reports 'skip_hw' on offloaded filter")
+ fail(not fltr["in_hw"], "TC does not report 'in_hw' for offloaded filter")
+ fail(not fltr["skip_sw"], "TC does not report 'skip_sw' back")
+
+ start_test("Test TC offload is device-bound...")
+ fail(str(prog["id"]) != fltr["id"], "Program IDs don't match")
+ fail(prog["tag"] != fltr["tag"], "Program tags don't match")
+ fail(fltr["id"] != dprog["id"], "Program IDs don't match")
+ fail(dprog["state"] != "xlated", "Offloaded program state not translated")
+ fail(dprog["loaded"] != "Y", "Offloaded program is not loaded")
+
+ start_test("Test disabling TC offloads is rejected while filters installed...")
+ ret, _ = sim.set_ethtool_tc_offloads(False, fail=False)
+ fail(ret == 0, "Driver should refuse to disable TC offloads with filters installed...")
+
+ start_test("Test qdisc removal frees things...")
+ sim.tc_flush_filters()
+ sim.tc_show_ingress(expected=0)
+
+ start_test("Test disabling TC offloads is OK without filters...")
+ ret, _ = sim.set_ethtool_tc_offloads(False, fail=False)
+ fail(ret != 0,
+ "Driver refused to disable TC offloads without filters installed...")
+
+ sim.set_ethtool_tc_offloads(True)
+
+ start_test("Test destroying device gets rid of TC filters...")
+ sim.cls_bpf_add_filter(obj, skip_sw=True)
+ sim.remove()
+ bpftool_prog_list_wait(expected=0)
+
+ sim = NetdevSim()
+ sim.set_ethtool_tc_offloads(True)
+
+ start_test("Test destroying device gets rid of XDP...")
+ sim.set_xdp(obj, "offload")
+ sim.remove()
+ bpftool_prog_list_wait(expected=0)
+
+ sim = NetdevSim()
+ sim.set_ethtool_tc_offloads(True)
+
+ start_test("Test XDP prog reporting...")
+ sim.set_xdp(obj, "drv")
+ ipl = sim.ip_link_show(xdp=True)
+ progs = bpftool_prog_list(expected=1)
+ fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"],
+ "Loaded program has wrong ID")
+
+ start_test("Test XDP prog replace without force...")
+ ret, _ = sim.set_xdp(obj, "drv", fail=False)
+ fail(ret == 0, "Replaced XDP program without -force")
+ sim.wait_for_flush(total=1)
+
+ start_test("Test XDP prog replace with force...")
+ ret, _ = sim.set_xdp(obj, "drv", force=True, fail=False)
+ fail(ret != 0, "Could not replace XDP program with -force")
+ bpftool_prog_list_wait(expected=1)
+ ipl = sim.ip_link_show(xdp=True)
+ progs = bpftool_prog_list(expected=1)
+ fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"],
+ "Loaded program has wrong ID")
+
+ start_test("Test XDP prog replace with bad flags...")
+ ret, _ = sim.set_xdp(obj, "offload", force=True, fail=False)
+ fail(ret == 0, "Replaced XDP program with a program in different mode")
+ ret, _ = sim.set_xdp(obj, "", force=True, fail=False)
+ fail(ret == 0, "Replaced XDP program with a program in different mode")
+
+ start_test("Test XDP prog remove with bad flags...")
+ ret, _ = sim.unset_xdp("offload", force=True, fail=False)
+ fail(ret == 0, "Removed program with a bad mode mode")
+ ret, _ = sim.unset_xdp("", force=True, fail=False)
+ fail(ret == 0, "Removed program with a bad mode mode")
+
+ start_test("Test MTU restrictions...")
+ ret, _ = sim.set_mtu(9000, fail=False)
+ fail(ret == 0,
+ "Driver should refuse to increase MTU to 9000 with XDP loaded...")
+ sim.unset_xdp("drv")
+ bpftool_prog_list_wait(expected=0)
+ sim.set_mtu(9000)
+ ret, _ = sim.set_xdp(obj, "drv", fail=False)
+ fail(ret == 0, "Driver should refuse to load program with MTU of 9000...")
+ sim.set_mtu(1500)
+
+ sim.wait_for_flush()
+ start_test("Test XDP offload...")
+ sim.set_xdp(obj, "offload")
+ ipl = sim.ip_link_show(xdp=True)
+ link_xdp = ipl["xdp"]["prog"]
+ progs = bpftool_prog_list(expected=1)
+ prog = progs[0]
+ fail(link_xdp["id"] != prog["id"], "Loaded program has wrong ID")
+
+ start_test("Test XDP offload is device bound...")
+ dfs = sim.dfs_get_bound_progs(expected=1)
+ dprog = dfs[0]
+
+ fail(prog["id"] != link_xdp["id"], "Program IDs don't match")
+ fail(prog["tag"] != link_xdp["tag"], "Program tags don't match")
+ fail(str(link_xdp["id"]) != dprog["id"], "Program IDs don't match")
+ fail(dprog["state"] != "xlated", "Offloaded program state not translated")
+ fail(dprog["loaded"] != "Y", "Offloaded program is not loaded")
+
+ start_test("Test removing XDP program many times...")
+ sim.unset_xdp("offload")
+ sim.unset_xdp("offload")
+ sim.unset_xdp("drv")
+ sim.unset_xdp("drv")
+ sim.unset_xdp("")
+ sim.unset_xdp("")
+ bpftool_prog_list_wait(expected=0)
+
+ start_test("Test attempt to use a program for a wrong device...")
+ sim2 = NetdevSim()
+ sim2.set_xdp(obj, "offload")
+ pin_file, pinned = pin_prog("/sys/fs/bpf/tmp")
+
+ ret, _ = sim.set_xdp(pinned, "offload", fail=False)
+ fail(ret == 0, "Pinned program loaded for a different device accepted")
+ sim2.remove()
+ ret, _ = sim.set_xdp(pinned, "offload", fail=False)
+ fail(ret == 0, "Pinned program loaded for a removed device accepted")
+ rm(pin_file)
+ bpftool_prog_list_wait(expected=0)
+
+ start_test("Test mixing of TC and XDP...")
+ sim.tc_add_ingress()
+ sim.set_xdp(obj, "offload")
+ ret, _ = sim.cls_bpf_add_filter(obj, skip_sw=True, fail=False)
+ fail(ret == 0, "Loading TC when XDP active should fail")
+ sim.unset_xdp("offload")
+ sim.wait_for_flush()
+
+ sim.cls_bpf_add_filter(obj, skip_sw=True)
+ ret, _ = sim.set_xdp(obj, "offload", fail=False)
+ fail(ret == 0, "Loading XDP when TC active should fail")
+
+ start_test("Test binding TC from pinned...")
+ pin_file, pinned = pin_prog("/sys/fs/bpf/tmp")
+ sim.tc_flush_filters(bound=1, total=1)
+ sim.cls_bpf_add_filter(pinned, da=True, skip_sw=True)
+ sim.tc_flush_filters(bound=1, total=1)
+
+ start_test("Test binding XDP from pinned...")
+ sim.set_xdp(obj, "offload")
+ pin_file, pinned = pin_prog("/sys/fs/bpf/tmp2", idx=1)
+
+ sim.set_xdp(pinned, "offload", force=True)
+ sim.unset_xdp("offload")
+ sim.set_xdp(pinned, "offload", force=True)
+ sim.unset_xdp("offload")
+
+ start_test("Test offload of wrong type fails...")
+ ret, _ = sim.cls_bpf_add_filter(pinned, da=True, skip_sw=True, fail=False)
+ fail(ret == 0, "Managed to attach XDP program to TC")
+
+ start_test("Test asking for TC offload of two filters...")
+ sim.cls_bpf_add_filter(obj, da=True, skip_sw=True)
+ sim.cls_bpf_add_filter(obj, da=True, skip_sw=True)
+ # The above will trigger a splat until TC cls_bpf drivers are fixed
+
+ sim.tc_flush_filters(bound=2, total=2)
+
+ start_test("Test if netdev removal waits for translation...")
+ delay_msec = 500
+ sim.dfs["bpf_bind_verifier_delay"] = delay_msec
+ start = time.time()
+ cmd_line = "tc filter add dev %s ingress bpf %s da skip_sw" % \
+ (sim['ifname'], obj)
+ tc_proc = cmd(cmd_line, background=True, fail=False)
+ # Wait for the verifier to start
+ while sim.dfs_num_bound_progs() <= 2:
+ pass
+ sim.remove()
+ end = time.time()
+ ret, _ = cmd_result(tc_proc, fail=False)
+ time_diff = end - start
+ log("Time", "start:\t%s\nend:\t%s\ndiff:\t%s" % (start, end, time_diff))
+
+ fail(ret == 0, "Managed to load TC filter on a unregistering device")
+ delay_sec = delay_msec * 0.001
+ fail(time_diff < delay_sec, "Removal process took %s, expected %s" %
+ (time_diff, delay_sec))
+
+ print("%s: OK" % (os.path.basename(__file__)))
+
+finally:
+ log("Clean up...", "", level=1)
+ log_level_inc()
+ clean_up()