summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt4
-rw-r--r--Documentation/admin-guide/sysctl/net.rst19
-rw-r--r--Documentation/bpf/kfuncs.rst15
-rw-r--r--Documentation/devicetree/bindings/net/dsa/arrow,xrs700x.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/dsa/dsa-port.yaml17
-rw-r--r--Documentation/devicetree/bindings/net/dsa/hirschmann,hellcreek.yaml6
-rw-r--r--Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml4
-rw-r--r--Documentation/devicetree/bindings/net/dsa/renesas,rzn1-a5psw.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/ethernet-controller.yaml1
-rw-r--r--Documentation/devicetree/bindings/net/fsl,fman-dtsec.yaml145
-rw-r--r--Documentation/devicetree/bindings/net/fsl-fman.txt128
-rw-r--r--Documentation/devicetree/bindings/net/nxp,tja11xx.yaml17
-rw-r--r--Documentation/devicetree/bindings/net/qca,ar803x.yaml8
-rw-r--r--Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml18
-rw-r--r--Documentation/devicetree/bindings/net/vertexcom-mse102x.yaml2
-rw-r--r--Documentation/networking/decnet.rst243
-rw-r--r--Documentation/networking/devlink/index.rst6
-rw-r--r--Documentation/networking/index.rst1
-rw-r--r--Documentation/networking/phy.rst9
-rw-r--r--Documentation/userspace-api/index.rst1
-rw-r--r--Documentation/userspace-api/ioctl/ioctl-number.rst1
-rw-r--r--Documentation/userspace-api/netlink/index.rst12
-rw-r--r--Documentation/userspace-api/netlink/intro.rst643
-rw-r--r--MAINTAINERS7
-rw-r--r--arch/mips/configs/decstation_64_defconfig2
-rw-r--r--arch/mips/configs/decstation_defconfig2
-rw-r--r--arch/mips/configs/decstation_r4k_defconfig2
-rw-r--r--arch/mips/configs/gpr_defconfig2
-rw-r--r--arch/mips/configs/mtx1_defconfig2
-rw-r--r--arch/mips/configs/rm200_defconfig2
-rw-r--r--arch/powerpc/configs/ppc6xx_defconfig2
-rw-r--r--drivers/isdn/capi/kcapi.c4
-rw-r--r--drivers/net/amt.c6
-rw-r--r--drivers/net/dsa/bcm_sf2.c130
-rw-r--r--drivers/net/dsa/ocelot/felix.c6
-rw-r--r--drivers/net/ethernet/altera/altera_tse_ethtool.c1
-rw-r--r--drivers/net/ethernet/engleder/tsnep.h1
-rw-r--r--drivers/net/ethernet/engleder/tsnep_hw.h3
-rw-r--r--drivers/net/ethernet/engleder/tsnep_main.c115
-rw-r--r--drivers/net/ethernet/faraday/ftmac100.c1
-rw-r--r--drivers/net/ethernet/freescale/dpaa/dpaa_eth.c11
-rw-r--r--drivers/net/ethernet/freescale/fman/fman.c31
-rw-r--r--drivers/net/ethernet/freescale/fman/fman.h31
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_dtsec.c141
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_dtsec.h35
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_keygen.c29
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_keygen.h29
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_mac.h10
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_memac.c47
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_memac.h35
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_muram.c31
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_muram.h32
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_port.c29
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_port.h29
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_sp.c29
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_sp.h28
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_tgec.c45
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_tgec.h35
-rw-r--r--drivers/net/ethernet/freescale/fman/mac.c537
-rw-r--r--drivers/net/ethernet/freescale/fman/mac.h41
-rw-r--r--drivers/net/ethernet/freescale/xgmac_mdio.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_base.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.c20
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c23
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lag.c16
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lag.h2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c23
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.h2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c67
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.c752
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.h13
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.c166
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.h3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_type.h2
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c128
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c4
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_type.h3
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c56
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera.h2
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_hw.c19
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_hw.h2
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_main.c41
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_switchdev.c8
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs.h158
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c188
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c44
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/trap.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c111
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c141
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs.c437
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c76
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c42
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c77
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.h9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c85
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c495
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c25
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.c108
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.h14
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_linecards.c96
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/i2c.c87
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/minimal.c373
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/reg.h92
-rw-r--r--drivers/net/ethernet/micrel/ks8851_spi.c5
-rw-r--r--drivers/net/ethernet/microchip/lan966x/Kconfig1
-rw-r--r--drivers/net/ethernet/microchip/lan966x/Makefile2
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_fdb.c155
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_lag.c363
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_mac.c104
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_main.c2
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_main.h39
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_phylink.c3
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_port.c22
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_regs.h51
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c138
-rw-r--r--drivers/net/ethernet/mscc/ocelot.c83
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/offload.c9
-rw-r--r--drivers/net/ethernet/realtek/r8169.h12
-rw-r--r--drivers/net/ethernet/realtek/r8169_main.c124
-rw-r--r--drivers/net/ethernet/realtek/r8169_phy_config.c130
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c1
-rw-r--r--drivers/net/ethernet/ti/Kconfig1
-rw-r--r--drivers/net/ethernet/ti/am65-cpsw-nuss.c44
-rw-r--r--drivers/net/ethernet/ti/am65-cpsw-nuss.h2
-rw-r--r--drivers/net/ethernet/ti/davinci_mdio.c242
-rw-r--r--drivers/net/ethernet/vertexcom/mse102x.c2
-rw-r--r--drivers/net/geneve.c9
-rw-r--r--drivers/net/netdevsim/dev.c15
-rw-r--r--drivers/net/phy/at803x.c25
-rw-r--r--drivers/net/phy/broadcom.c39
-rw-r--r--drivers/net/phy/nxp-tja11xx.c83
-rw-r--r--drivers/net/phy/phy-core.c52
-rw-r--r--drivers/net/phy/phylink.c3
-rw-r--r--drivers/net/phy/realtek.c44
-rw-r--r--drivers/net/phy/sfp.c121
-rw-r--r--drivers/net/vxlan/vxlan_core.c9
-rw-r--r--drivers/of/base.c1
-rw-r--r--include/linux/bpf.h1
-rw-r--r--include/linux/brcmphy.h1
-rw-r--r--include/linux/btf.h2
-rw-r--r--include/linux/compiler_attributes.h7
-rw-r--r--include/linux/netdevice.h8
-rw-r--r--include/linux/netfilter.h5
-rw-r--r--include/linux/netfilter_defs.h8
-rw-r--r--include/linux/phy.h6
-rw-r--r--include/linux/skbuff.h3
-rw-r--r--include/linux/stmmac.h1
-rw-r--r--include/net/af_vsock.h2
-rw-r--r--include/net/devlink.h19
-rw-r--r--include/net/dn.h231
-rw-r--r--include/net/dn_dev.h200
-rw-r--r--include/net/dn_fib.h169
-rw-r--r--include/net/dn_neigh.h32
-rw-r--r--include/net/dn_nsp.h201
-rw-r--r--include/net/dn_route.h118
-rw-r--r--include/net/dsa.h4
-rw-r--r--include/net/gro.h33
-rw-r--r--include/net/inet_connection_sock.h3
-rw-r--r--include/net/inet_hashtables.h80
-rw-r--r--include/net/macsec.h1
-rw-r--r--include/net/netfilter/nf_conntrack_core.h6
-rw-r--r--include/net/netlink.h4
-rw-r--r--include/net/netns/netfilter.h3
-rw-r--r--include/net/pkt_sched.h2
-rw-r--r--include/net/sch_generic.h1
-rw-r--r--include/net/sock.h14
-rw-r--r--include/soc/mscc/ocelot.h2
-rw-r--r--include/uapi/linux/bpf.h33
-rw-r--r--include/uapi/linux/dn.h149
-rw-r--r--include/uapi/linux/if_macsec.h2
-rw-r--r--include/uapi/linux/netfilter_decnet.h72
-rw-r--r--include/uapi/linux/netlink.h23
-rw-r--r--kernel/bpf/bpf_iter.c5
-rw-r--r--kernel/bpf/bpf_local_storage.c6
-rw-r--r--kernel/bpf/btf.c18
-rw-r--r--kernel/bpf/core.c1
-rw-r--r--kernel/bpf/cpumap.c6
-rw-r--r--kernel/bpf/devmap.c6
-rw-r--r--kernel/bpf/hashtab.c6
-rw-r--r--kernel/bpf/helpers.c32
-rw-r--r--kernel/bpf/local_storage.c5
-rw-r--r--kernel/bpf/lpm_trie.c4
-rw-r--r--kernel/bpf/offload.c6
-rw-r--r--kernel/bpf/queue_stack_maps.c2
-rw-r--r--kernel/bpf/ringbuf.c10
-rw-r--r--kernel/bpf/verifier.c159
-rw-r--r--kernel/cgroup/cgroup.c5
-rw-r--r--net/8021q/vlan_core.c9
-rw-r--r--net/8021q/vlan_dev.c6
-rw-r--r--net/Kconfig2
-rw-r--r--net/Makefile1
-rw-r--r--net/ax25/af_ax25.c2
-rw-r--r--net/bpf/test_run.c5
-rw-r--r--net/bridge/br_device.c8
-rw-r--r--net/bridge/br_if.c20
-rw-r--r--net/bridge/br_sysfs_if.c4
-rw-r--r--net/bridge/netfilter/ebtables.c2
-rw-r--r--net/caif/caif_dev.c2
-rw-r--r--net/caif/caif_usb.c2
-rw-r--r--net/caif/cfcnfg.c4
-rw-r--r--net/caif/cfctrl.c2
-rw-r--r--net/core/dev.c8
-rw-r--r--net/core/devlink.c139
-rw-r--r--net/core/drop_monitor.c2
-rw-r--r--net/core/neighbour.c3
-rw-r--r--net/core/netpoll.c4
-rw-r--r--net/core/skbuff.c5
-rw-r--r--net/core/sock_map.c12
-rw-r--r--net/dccp/ipv4.c25
-rw-r--r--net/dccp/ipv6.c18
-rw-r--r--net/dccp/proto.c34
-rw-r--r--net/decnet/Kconfig43
-rw-r--r--net/decnet/Makefile10
-rw-r--r--net/decnet/README8
-rw-r--r--net/decnet/af_decnet.c2404
-rw-r--r--net/decnet/dn_dev.c1433
-rw-r--r--net/decnet/dn_fib.c798
-rw-r--r--net/decnet/dn_neigh.c607
-rw-r--r--net/decnet/dn_nsp_in.c907
-rw-r--r--net/decnet/dn_nsp_out.c696
-rw-r--r--net/decnet/dn_route.c1922
-rw-r--r--net/decnet/dn_rules.c253
-rw-r--r--net/decnet/dn_table.c929
-rw-r--r--net/decnet/dn_timer.c104
-rw-r--r--net/decnet/netfilter/Kconfig17
-rw-r--r--net/decnet/netfilter/Makefile6
-rw-r--r--net/decnet/netfilter/dn_rtmsg.c158
-rw-r--r--net/decnet/sysctl_net_decnet.c362
-rw-r--r--net/dsa/dsa2.c92
-rw-r--r--net/dsa/dsa_priv.h5
-rw-r--r--net/dsa/master.c4
-rw-r--r--net/dsa/port.c190
-rw-r--r--net/dsa/slave.c125
-rw-r--r--net/dsa/tag_8021q.c4
-rw-r--r--net/ethernet/eth.c9
-rw-r--r--net/ethtool/ioctl.c8
-rw-r--r--net/ipv4/af_inet.c35
-rw-r--r--net/ipv4/arp.c2
-rw-r--r--net/ipv4/fou.c9
-rw-r--r--net/ipv4/gre_offload.c9
-rw-r--r--net/ipv4/inet_connection_sock.c275
-rw-r--r--net/ipv4/inet_hashtables.c268
-rw-r--r--net/ipv4/tcp.c25
-rw-r--r--net/ipv4/tcp_fastopen.c3
-rw-r--r--net/ipv4/tcp_ipv4.c23
-rw-r--r--net/ipv4/tcp_offload.c9
-rw-r--r--net/ipv4/tcp_timer.c2
-rw-r--r--net/ipv6/ip6_gre.c2
-rw-r--r--net/ipv6/ip6_offload.c9
-rw-r--r--net/ipv6/ip6_tunnel.c2
-rw-r--r--net/ipv6/ip6_vti.c2
-rw-r--r--net/ipv6/sit.c2
-rw-r--r--net/ipv6/tcp_ipv6.c17
-rw-r--r--net/l2tp/l2tp_eth.c4
-rw-r--r--net/netfilter/core.c10
-rw-r--r--net/netfilter/nf_conntrack_core.c6
-rw-r--r--net/netfilter/nfnetlink_hook.c7
-rw-r--r--net/openvswitch/datapath.c18
-rw-r--r--net/openvswitch/vport-internal_dev.c2
-rw-r--r--net/packet/af_packet.c4
-rw-r--r--net/sched/cls_api.c10
-rw-r--r--net/sched/sch_api.c24
-rw-r--r--net/sched/sch_atm.c1
-rw-r--r--net/sched/sch_cbq.c1
-rw-r--r--net/sched/sch_choke.c2
-rw-r--r--net/sched/sch_drr.c2
-rw-r--r--net/sched/sch_dsmark.c2
-rw-r--r--net/sched/sch_etf.c3
-rw-r--r--net/sched/sch_ets.c2
-rw-r--r--net/sched/sch_fq_codel.c2
-rw-r--r--net/sched/sch_fq_pie.c3
-rw-r--r--net/sched/sch_hfsc.c2
-rw-r--r--net/sched/sch_htb.c2
-rw-r--r--net/sched/sch_multiq.c1
-rw-r--r--net/sched/sch_prio.c2
-rw-r--r--net/sched/sch_qfq.c2
-rw-r--r--net/sched/sch_red.c2
-rw-r--r--net/sched/sch_sfb.c2
-rw-r--r--net/sched/sch_skbprio.c3
-rw-r--r--net/sched/sch_taprio.c2
-rw-r--r--net/sched/sch_tbf.c2
-rw-r--r--net/sched/sch_teql.c3
-rw-r--r--net/unix/af_unix.c36
-rw-r--r--net/vmw_vsock/af_vsock.c33
-rw-r--r--net/vmw_vsock/hyperv_transport.c7
-rw-r--r--net/vmw_vsock/virtio_transport_common.c7
-rw-r--r--net/vmw_vsock/vmci_transport_notify.c10
-rw-r--r--net/vmw_vsock/vmci_transport_notify_qstate.c12
-rw-r--r--tools/bpf/bpftool/common.c15
-rw-r--r--tools/bpf/bpftool/feature.c2
-rw-r--r--tools/bpf/bpftool/main.c10
-rw-r--r--tools/include/uapi/linux/bpf.h33
-rw-r--r--tools/lib/bpf/bpf.c186
-rw-r--r--tools/lib/bpf/bpf_tracing.h14
-rw-r--r--tools/lib/bpf/btf.c2
-rw-r--r--tools/lib/bpf/btf.h1
-rw-r--r--tools/lib/bpf/libbpf.c104
-rw-r--r--tools/lib/bpf/libbpf.h2
-rw-r--r--tools/lib/bpf/libbpf.map2
-rw-r--r--tools/lib/bpf/libbpf_internal.h3
-rw-r--r--tools/lib/bpf/libbpf_legacy.h2
-rw-r--r--tools/lib/bpf/libbpf_probes.c2
-rw-r--r--tools/lib/bpf/netlink.c3
-rw-r--r--tools/lib/bpf/skel_internal.h10
-rw-r--r--tools/lib/bpf/usdt.bpf.h4
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.s390x2
-rw-r--r--tools/testing/selftests/bpf/config2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/attach_probe.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/autoattach.c30
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_cookie.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_nf.c60
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dynptr.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfunc_call.c36
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_pt_regs.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/time_tai.c74
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_fail.c94
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_destructive.c14
-rw-r--r--tools/testing/selftests/bpf/progs/lsm.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_autoattach.c23
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_cookie.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_nf.c21
-rw-r--r--tools/testing/selftests/bpf/progs/test_helper_restricted.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_time_tai.c24
-rwxr-xr-xtools/testing/selftests/bpf/vmtest.sh34
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.c166
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.h8
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh273
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh264
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1q.sh264
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_vxlan.sh311
-rw-r--r--tools/testing/selftests/net/.gitignore5
-rw-r--r--tools/testing/selftests/net/Makefile6
-rw-r--r--tools/testing/selftests/net/bind_bhash.c144
-rwxr-xr-xtools/testing/selftests/net/bind_bhash.sh66
-rwxr-xr-xtools/testing/selftests/net/l2_tos_ttl_inherit.sh390
-rw-r--r--tools/testing/selftests/net/sk_bind_sendto_listen.c80
-rw-r--r--tools/testing/selftests/net/sk_connect_zero_addr.c62
-rw-r--r--tools/testing/vsock/vsock_test.c108
358 files changed, 9725 insertions, 16231 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index d7f30902fda0..adfda56b2691 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -966,10 +966,6 @@
debugpat [X86] Enable PAT debugging
- decnet.addr= [HW,NET]
- Format: <area>[,<node>]
- See also Documentation/networking/decnet.rst.
-
default_hugepagesz=
[HW] The size of the default HugeTLB page. This is
the size represented by the legacy /proc/ hugepages
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
index 60d44165fba7..555681ef6195 100644
--- a/Documentation/admin-guide/sysctl/net.rst
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -31,17 +31,18 @@ see only some of them, depending on your kernel's configuration.
Table : Subdirectories in /proc/sys/net
- ========= =================== = ========== ==================
+ ========= =================== = ========== ===================
Directory Content Directory Content
- ========= =================== = ========== ==================
- core General parameter appletalk Appletalk protocol
- unix Unix domain sockets netrom NET/ROM
- 802 E802 protocol ax25 AX25
- ethernet Ethernet protocol rose X.25 PLP layer
+ ========= =================== = ========== ===================
+ 802 E802 protocol mptcp Multipath TCP
+ appletalk Appletalk protocol netfilter Network Filter
+ ax25 AX25 netrom NET/ROM
+ bridge Bridging rose X.25 PLP layer
+ core General parameter tipc TIPC
+ ethernet Ethernet protocol unix Unix domain sockets
ipv4 IP version 4 x25 X.25 protocol
- bridge Bridging decnet DEC net
- ipv6 IP version 6 tipc TIPC
- ========= =================== = ========== ==================
+ ipv6 IP version 6
+ ========= =================== = ========== ===================
1. /proc/sys/net/core - Network core options
============================================
diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst
index c0b7dae6dbf5..781731749e55 100644
--- a/Documentation/bpf/kfuncs.rst
+++ b/Documentation/bpf/kfuncs.rst
@@ -146,6 +146,21 @@ that operate (change some property, perform some operation) on an object that
was obtained using an acquire kfunc. Such kfuncs need an unchanged pointer to
ensure the integrity of the operation being performed on the expected object.
+2.4.6 KF_SLEEPABLE flag
+-----------------------
+
+The KF_SLEEPABLE flag is used for kfuncs that may sleep. Such kfuncs can only
+be called by sleepable BPF programs (BPF_F_SLEEPABLE).
+
+2.4.7 KF_DESTRUCTIVE flag
+--------------------------
+
+The KF_DESTRUCTIVE flag is used to indicate functions calling which is
+destructive to the system. For example such a call can result in system
+rebooting or panicking. Due to this additional restrictions apply to these
+calls. At the moment they only require CAP_SYS_BOOT capability, but more can be
+added later.
+
2.5 Registering the kfuncs
--------------------------
diff --git a/Documentation/devicetree/bindings/net/dsa/arrow,xrs700x.yaml b/Documentation/devicetree/bindings/net/dsa/arrow,xrs700x.yaml
index 3f01b65f3b22..eb01a8f37ce4 100644
--- a/Documentation/devicetree/bindings/net/dsa/arrow,xrs700x.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/arrow,xrs700x.yaml
@@ -63,6 +63,8 @@ examples:
reg = <3>;
label = "cpu";
ethernet = <&fec1>;
+ phy-mode = "rgmii-id";
+
fixed-link {
speed = <1000>;
full-duplex;
diff --git a/Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml b/Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml
index 23114d691d2a..2e01371b8288 100644
--- a/Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml
@@ -254,6 +254,8 @@ examples:
ethernet = <&amac2>;
label = "cpu";
reg = <8>;
+ phy-mode = "internal";
+
fixed-link {
speed = <1000>;
full-duplex;
diff --git a/Documentation/devicetree/bindings/net/dsa/dsa-port.yaml b/Documentation/devicetree/bindings/net/dsa/dsa-port.yaml
index 09317e16cb5d..10ad7e71097b 100644
--- a/Documentation/devicetree/bindings/net/dsa/dsa-port.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/dsa-port.yaml
@@ -76,6 +76,23 @@ properties:
required:
- reg
+# CPU and DSA ports must have phylink-compatible link descriptions
+if:
+ oneOf:
+ - required: [ ethernet ]
+ - required: [ link ]
+then:
+ allOf:
+ - required:
+ - phy-mode
+ - oneOf:
+ - required:
+ - fixed-link
+ - required:
+ - phy-handle
+ - required:
+ - managed
+
additionalProperties: true
...
diff --git a/Documentation/devicetree/bindings/net/dsa/hirschmann,hellcreek.yaml b/Documentation/devicetree/bindings/net/dsa/hirschmann,hellcreek.yaml
index 228683773151..1ff44dd68a61 100644
--- a/Documentation/devicetree/bindings/net/dsa/hirschmann,hellcreek.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/hirschmann,hellcreek.yaml
@@ -93,6 +93,12 @@ examples:
reg = <0>;
label = "cpu";
ethernet = <&gmac0>;
+ phy-mode = "mii";
+
+ fixed-link {
+ speed = <100>;
+ full-duplex;
+ };
};
port@2 {
diff --git a/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml b/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml
index 6bbd8145b6c1..456802affc9d 100644
--- a/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml
@@ -109,6 +109,8 @@ examples:
reg = <5>;
label = "cpu";
ethernet = <&eth0>;
+ phy-mode = "rgmii";
+
fixed-link {
speed = <1000>;
full-duplex;
@@ -146,6 +148,8 @@ examples:
reg = <6>;
label = "cpu";
ethernet = <&eth0>;
+ phy-mode = "rgmii";
+
fixed-link {
speed = <1000>;
full-duplex;
diff --git a/Documentation/devicetree/bindings/net/dsa/renesas,rzn1-a5psw.yaml b/Documentation/devicetree/bindings/net/dsa/renesas,rzn1-a5psw.yaml
index 4d428f5ad044..14a1f0b4c32b 100644
--- a/Documentation/devicetree/bindings/net/dsa/renesas,rzn1-a5psw.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/renesas,rzn1-a5psw.yaml
@@ -131,6 +131,8 @@ examples:
reg = <4>;
ethernet = <&gmac2>;
label = "cpu";
+ phy-mode = "internal";
+
fixed-link {
speed = <1000>;
full-duplex;
diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
index c138a1022879..4b3c590fcebf 100644
--- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
@@ -67,6 +67,7 @@ properties:
- gmii
- sgmii
- qsgmii
+ - qusgmii
- tbi
- rev-mii
- rmii
diff --git a/Documentation/devicetree/bindings/net/fsl,fman-dtsec.yaml b/Documentation/devicetree/bindings/net/fsl,fman-dtsec.yaml
new file mode 100644
index 000000000000..3a35ac1c260d
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/fsl,fman-dtsec.yaml
@@ -0,0 +1,145 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/fsl,fman-dtsec.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP FMan MAC
+
+maintainers:
+ - Madalin Bucur <madalin.bucur@nxp.com>
+
+description: |
+ Each FMan has several MACs, each implementing an Ethernet interface. Earlier
+ versions of FMan used the Datapath Three Speed Ethernet Controller (dTSEC) for
+ 10/100/1000 MBit/s speeds, and the 10-Gigabit Ethernet Media Access Controller
+ (10GEC) for 10 Gbit/s speeds. Later versions of FMan use the Multirate
+ Ethernet Media Access Controller (mEMAC) to handle all speeds.
+
+properties:
+ compatible:
+ enum:
+ - fsl,fman-dtsec
+ - fsl,fman-xgec
+ - fsl,fman-memac
+
+ cell-index:
+ maximum: 64
+ description: |
+ FManV2:
+ register[bit] MAC cell-index
+ ============================================================
+ FM_EPI[16] XGEC 8
+ FM_EPI[16+n] dTSECn n-1
+ FM_NPI[11+n] dTSECn n-1
+ n = 1,..,5
+
+ FManV3:
+ register[bit] MAC cell-index
+ ============================================================
+ FM_EPI[16+n] mEMACn n-1
+ FM_EPI[25] mEMAC10 9
+
+ FM_NPI[11+n] mEMACn n-1
+ FM_NPI[10] mEMAC10 9
+ FM_NPI[11] mEMAC9 8
+ n = 1,..8
+
+ FM_EPI and FM_NPI are located in the FMan memory map.
+
+ 2. SoC registers:
+
+ - P2041, P3041, P4080 P5020, P5040:
+ register[bit] FMan MAC cell
+ Unit index
+ ============================================================
+ DCFG_DEVDISR2[7] 1 XGEC 8
+ DCFG_DEVDISR2[7+n] 1 dTSECn n-1
+ DCFG_DEVDISR2[15] 2 XGEC 8
+ DCFG_DEVDISR2[15+n] 2 dTSECn n-1
+ n = 1,..5
+
+ - T1040, T2080, T4240, B4860:
+ register[bit] FMan MAC cell
+ Unit index
+ ============================================================
+ DCFG_CCSR_DEVDISR2[n-1] 1 mEMACn n-1
+ DCFG_CCSR_DEVDISR2[11+n] 2 mEMACn n-1
+ n = 1,..6,9,10
+
+ EVDISR, DCFG_DEVDISR2 and DCFG_CCSR_DEVDISR2 are located in
+ the specific SoC "Device Configuration/Pin Control" Memory
+ Map.
+
+ reg:
+ maxItems: 1
+
+ fsl,fman-ports:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ maxItems: 2
+ description: |
+ An array of two references: the first is the FMan RX port and the second
+ is the TX port used by this MAC.
+
+ ptp-timer:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: A reference to the IEEE1588 timer
+
+ pcsphy-handle:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: A reference to the PCS (typically found on the SerDes)
+
+ tbi-handle:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: A reference to the (TBI-based) PCS
+
+required:
+ - compatible
+ - cell-index
+ - reg
+ - fsl,fman-ports
+ - ptp-timer
+
+allOf:
+ - $ref: ethernet-controller.yaml#
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: fsl,fman-dtsec
+ then:
+ required:
+ - tbi-handle
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: fsl,fman-memac
+ then:
+ required:
+ - pcsphy-handle
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ ethernet@e0000 {
+ compatible = "fsl,fman-dtsec";
+ cell-index = <0>;
+ reg = <0xe0000 0x1000>;
+ fsl,fman-ports = <&fman1_rx8 &fman1_tx28>;
+ ptp-timer = <&ptp_timer>;
+ tbi-handle = <&tbi0>;
+ };
+ - |
+ ethernet@e8000 {
+ cell-index = <4>;
+ compatible = "fsl,fman-memac";
+ reg = <0xe8000 0x1000>;
+ fsl,fman-ports = <&fman0_rx_0x0c &fman0_tx_0x2c>;
+ ptp-timer = <&ptp_timer0>;
+ pcsphy-handle = <&pcsphy4>;
+ phy-handle = <&sgmii_phy1>;
+ phy-connection-type = "sgmii";
+ };
+...
diff --git a/Documentation/devicetree/bindings/net/fsl-fman.txt b/Documentation/devicetree/bindings/net/fsl-fman.txt
index 801efc7d6818..b9055335db3b 100644
--- a/Documentation/devicetree/bindings/net/fsl-fman.txt
+++ b/Documentation/devicetree/bindings/net/fsl-fman.txt
@@ -232,133 +232,7 @@ port@81000 {
=============================================================================
FMan dTSEC/XGEC/mEMAC Node
-DESCRIPTION
-
-mEMAC/dTSEC/XGEC are the Ethernet network interfaces
-
-PROPERTIES
-
-- compatible
- Usage: required
- Value type: <stringlist>
- Definition: A standard property.
- Must include one of the following:
- - "fsl,fman-dtsec" for dTSEC MAC
- - "fsl,fman-xgec" for XGEC MAC
- - "fsl,fman-memac" for mEMAC MAC
-
-- cell-index
- Usage: required
- Value type: <u32>
- Definition: Specifies the MAC id.
-
- The cell-index value may be used by the FMan or the SoC, to
- identify the MAC unit in the FMan (or SoC) memory map.
- In the tables below there's a description of the cell-index
- use, there are two tables, one describes the use of cell-index
- by the FMan, the second describes the use by the SoC:
-
- 1. FMan Registers
-
- FManV2:
- register[bit] MAC cell-index
- ============================================================
- FM_EPI[16] XGEC 8
- FM_EPI[16+n] dTSECn n-1
- FM_NPI[11+n] dTSECn n-1
- n = 1,..,5
-
- FManV3:
- register[bit] MAC cell-index
- ============================================================
- FM_EPI[16+n] mEMACn n-1
- FM_EPI[25] mEMAC10 9
-
- FM_NPI[11+n] mEMACn n-1
- FM_NPI[10] mEMAC10 9
- FM_NPI[11] mEMAC9 8
- n = 1,..8
-
- FM_EPI and FM_NPI are located in the FMan memory map.
-
- 2. SoC registers:
-
- - P2041, P3041, P4080 P5020, P5040:
- register[bit] FMan MAC cell
- Unit index
- ============================================================
- DCFG_DEVDISR2[7] 1 XGEC 8
- DCFG_DEVDISR2[7+n] 1 dTSECn n-1
- DCFG_DEVDISR2[15] 2 XGEC 8
- DCFG_DEVDISR2[15+n] 2 dTSECn n-1
- n = 1,..5
-
- - T1040, T2080, T4240, B4860:
- register[bit] FMan MAC cell
- Unit index
- ============================================================
- DCFG_CCSR_DEVDISR2[n-1] 1 mEMACn n-1
- DCFG_CCSR_DEVDISR2[11+n] 2 mEMACn n-1
- n = 1,..6,9,10
-
- EVDISR, DCFG_DEVDISR2 and DCFG_CCSR_DEVDISR2 are located in
- the specific SoC "Device Configuration/Pin Control" Memory
- Map.
-
-- reg
- Usage: required
- Value type: <prop-encoded-array>
- Definition: A standard property.
-
-- fsl,fman-ports
- Usage: required
- Value type: <prop-encoded-array>
- Definition: An array of two phandles - the first references is
- the FMan RX port and the second is the TX port used by this
- MAC.
-
-- ptp-timer
- Usage required
- Value type: <phandle>
- Definition: A phandle for 1EEE1588 timer.
-
-- pcsphy-handle
- Usage required for "fsl,fman-memac" MACs
- Value type: <phandle>
- Definition: A phandle for pcsphy.
-
-- tbi-handle
- Usage required for "fsl,fman-dtsec" MACs
- Value type: <phandle>
- Definition: A phandle for tbiphy.
-
-EXAMPLE
-
-fman1_tx28: port@a8000 {
- cell-index = <0x28>;
- compatible = "fsl,fman-v2-port-tx";
- reg = <0xa8000 0x1000>;
-};
-
-fman1_rx8: port@88000 {
- cell-index = <0x8>;
- compatible = "fsl,fman-v2-port-rx";
- reg = <0x88000 0x1000>;
-};
-
-ptp-timer: ptp_timer@fe000 {
- compatible = "fsl,fman-ptp-timer";
- reg = <0xfe000 0x1000>;
-};
-
-ethernet@e0000 {
- compatible = "fsl,fman-dtsec";
- cell-index = <0>;
- reg = <0xe0000 0x1000>;
- fsl,fman-ports = <&fman1_rx8 &fman1_tx28>;
- ptp-timer = <&ptp-timer>;
- tbi-handle = <&tbi0>;
-};
+Refer to Documentation/devicetree/bindings/net/fsl,fman-dtsec.yaml
============================================================================
FMan IEEE 1588 Node
diff --git a/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml b/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml
index d51da24f3505..ab8867e6939b 100644
--- a/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml
+++ b/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml
@@ -31,6 +31,22 @@ patternProperties:
description:
The ID number for the child PHY. Should be +1 of parent PHY.
+ nxp,rmii-refclk-in:
+ type: boolean
+ description: |
+ The REF_CLK is provided for both transmitted and received data
+ in RMII mode. This clock signal is provided by the PHY and is
+ typically derived from an external 25MHz crystal. Alternatively,
+ a 50MHz clock signal generated by an external oscillator can be
+ connected to pin REF_CLK. A third option is to connect a 25MHz
+ clock to pin CLK_IN_OUT. So, the REF_CLK should be configured
+ as input or output according to the actual circuit connection.
+ If present, indicates that the REF_CLK will be configured as
+ interface reference clock input when RMII mode enabled.
+ If not present, the REF_CLK will be configured as interface
+ reference clock output when RMII mode enabled.
+ Only supported on TJA1100 and TJA1101.
+
required:
- reg
@@ -44,6 +60,7 @@ examples:
tja1101_phy0: ethernet-phy@4 {
reg = <0x4>;
+ nxp,rmii-refclk-in;
};
};
- |
diff --git a/Documentation/devicetree/bindings/net/qca,ar803x.yaml b/Documentation/devicetree/bindings/net/qca,ar803x.yaml
index b3d4013b7ca6..161d28919316 100644
--- a/Documentation/devicetree/bindings/net/qca,ar803x.yaml
+++ b/Documentation/devicetree/bindings/net/qca,ar803x.yaml
@@ -40,6 +40,14 @@ properties:
Only supported on the AR8031.
type: boolean
+ qca,disable-hibernation-mode:
+ description: |
+ Disable Atheros AR803X PHYs hibernation mode. If present, indicates
+ that the hardware of PHY will not enter power saving mode when the
+ cable is disconnected. And the RX_CLK always keeps outputting a
+ valid clock.
+ type: boolean
+
qca,smarteee-tw-us-100m:
description: EEE Tw parameter for 100M links.
$ref: /schemas/types.yaml#/definitions/uint32
diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
index b8281d8be940..9ef11913052c 100644
--- a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
+++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
@@ -55,6 +55,7 @@ properties:
compatible:
enum:
- ti,am654-cpsw-nuss
+ - ti,j7200-cpswxg-nuss
- ti,j721e-cpsw-nuss
- ti,am642-cpsw-nuss
@@ -110,7 +111,7 @@ properties:
const: 0
patternProperties:
- port@[1-2]:
+ "^port@[1-4]$":
type: object
description: CPSWxG NUSS external ports
@@ -119,7 +120,7 @@ properties:
properties:
reg:
minimum: 1
- maximum: 2
+ maximum: 4
description: CPSW port number
phys:
@@ -178,6 +179,19 @@ required:
- '#address-cells'
- '#size-cells'
+allOf:
+ - if:
+ not:
+ properties:
+ compatible:
+ contains:
+ const: ti,j7200-cpswxg-nuss
+ then:
+ properties:
+ ethernet-ports:
+ patternProperties:
+ "^port@[3-4]$": false
+
additionalProperties: false
examples:
diff --git a/Documentation/devicetree/bindings/net/vertexcom-mse102x.yaml b/Documentation/devicetree/bindings/net/vertexcom-mse102x.yaml
index 8156a9aeb589..304757bf9281 100644
--- a/Documentation/devicetree/bindings/net/vertexcom-mse102x.yaml
+++ b/Documentation/devicetree/bindings/net/vertexcom-mse102x.yaml
@@ -7,7 +7,7 @@ $schema: "http://devicetree.org/meta-schemas/core.yaml#"
title: The Vertexcom MSE102x (SPI) Device Tree Bindings
maintainers:
- - Stefan Wahren <stefan.wahren@in-tech.com>
+ - Stefan Wahren <stefan.wahren@chargebyte.com>
description:
Vertexcom's MSE102x are a family of HomePlug GreenPHY chips.
diff --git a/Documentation/networking/decnet.rst b/Documentation/networking/decnet.rst
deleted file mode 100644
index b8bc11ff8370..000000000000
--- a/Documentation/networking/decnet.rst
+++ /dev/null
@@ -1,243 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-=========================================
-Linux DECnet Networking Layer Information
-=========================================
-
-1. Other documentation....
-==========================
-
- - Project Home Pages
- - http://www.chygwyn.com/ - Kernel info
- - http://linux-decnet.sourceforge.net/ - Userland tools
- - http://www.sourceforge.net/projects/linux-decnet/ - Status page
-
-2. Configuring the kernel
-=========================
-
-Be sure to turn on the following options:
-
- - CONFIG_DECNET (obviously)
- - CONFIG_PROC_FS (to see what's going on)
- - CONFIG_SYSCTL (for easy configuration)
-
-if you want to try out router support (not properly debugged yet)
-you'll need the following options as well...
-
- - CONFIG_DECNET_ROUTER (to be able to add/delete routes)
- - CONFIG_NETFILTER (will be required for the DECnet routing daemon)
-
-Don't turn on SIOCGIFCONF support for DECnet unless you are really sure
-that you need it, in general you won't and it can cause ifconfig to
-malfunction.
-
-Run time configuration has changed slightly from the 2.4 system. If you
-want to configure an endnode, then the simplified procedure is as follows:
-
- - Set the MAC address on your ethernet card before starting _any_ other
- network protocols.
-
-As soon as your network card is brought into the UP state, DECnet should
-start working. If you need something more complicated or are unsure how
-to set the MAC address, see the next section. Also all configurations which
-worked with 2.4 will work under 2.5 with no change.
-
-3. Command line options
-=======================
-
-You can set a DECnet address on the kernel command line for compatibility
-with the 2.4 configuration procedure, but in general it's not needed any more.
-If you do st a DECnet address on the command line, it has only one purpose
-which is that its added to the addresses on the loopback device.
-
-With 2.4 kernels, DECnet would only recognise addresses as local if they
-were added to the loopback device. In 2.5, any local interface address
-can be used to loop back to the local machine. Of course this does not
-prevent you adding further addresses to the loopback device if you
-want to.
-
-N.B. Since the address list of an interface determines the addresses for
-which "hello" messages are sent, if you don't set an address on the loopback
-interface then you won't see any entries in /proc/net/neigh for the local
-host until such time as you start a connection. This doesn't affect the
-operation of the local communications in any other way though.
-
-The kernel command line takes options looking like the following::
-
- decnet.addr=1,2
-
-the two numbers are the node address 1,2 = 1.2 For 2.2.xx kernels
-and early 2.3.xx kernels, you must use a comma when specifying the
-DECnet address like this. For more recent 2.3.xx kernels, you may
-use almost any character except space, although a `.` would be the most
-obvious choice :-)
-
-There used to be a third number specifying the node type. This option
-has gone away in favour of a per interface node type. This is now set
-using /proc/sys/net/decnet/conf/<dev>/forwarding. This file can be
-set with a single digit, 0=EndNode, 1=L1 Router and 2=L2 Router.
-
-There are also equivalent options for modules. The node address can
-also be set through the /proc/sys/net/decnet/ files, as can other system
-parameters.
-
-Currently the only supported devices are ethernet and ip_gre. The
-ethernet address of your ethernet card has to be set according to the DECnet
-address of the node in order for it to be autoconfigured (and then appear in
-/proc/net/decnet_dev). There is a utility available at the above
-FTP sites called dn2ethaddr which can compute the correct ethernet
-address to use. The address can be set by ifconfig either before or
-at the time the device is brought up. If you are using RedHat you can
-add the line::
-
- MACADDR=AA:00:04:00:03:04
-
-or something similar, to /etc/sysconfig/network-scripts/ifcfg-eth0 or
-wherever your network card's configuration lives. Setting the MAC address
-of your ethernet card to an address starting with "hi-ord" will cause a
-DECnet address which matches to be added to the interface (which you can
-verify with iproute2).
-
-The default device for routing can be set through the /proc filesystem
-by setting /proc/sys/net/decnet/default_device to the
-device you want DECnet to route packets out of when no specific route
-is available. Usually this will be eth0, for example::
-
- echo -n "eth0" >/proc/sys/net/decnet/default_device
-
-If you don't set the default device, then it will default to the first
-ethernet card which has been autoconfigured as described above. You can
-confirm that by looking in the default_device file of course.
-
-There is a list of what the other files under /proc/sys/net/decnet/ do
-on the kernel patch web site (shown above).
-
-4. Run time kernel configuration
-================================
-
-
-This is either done through the sysctl/proc interface (see the kernel web
-pages for details on what the various options do) or through the iproute2
-package in the same way as IPv4/6 configuration is performed.
-
-Documentation for iproute2 is included with the package, although there is
-as yet no specific section on DECnet, most of the features apply to both
-IP and DECnet, albeit with DECnet addresses instead of IP addresses and
-a reduced functionality.
-
-If you want to configure a DECnet router you'll need the iproute2 package
-since its the _only_ way to add and delete routes currently. Eventually
-there will be a routing daemon to send and receive routing messages for
-each interface and update the kernel routing tables accordingly. The
-routing daemon will use netfilter to listen to routing packets, and
-rtnetlink to update the kernels routing tables.
-
-The DECnet raw socket layer has been removed since it was there purely
-for use by the routing daemon which will now use netfilter (a much cleaner
-and more generic solution) instead.
-
-5. How can I tell if its working?
-=================================
-
-Here is a quick guide of what to look for in order to know if your DECnet
-kernel subsystem is working.
-
- - Is the node address set (see /proc/sys/net/decnet/node_address)
- - Is the node of the correct type
- (see /proc/sys/net/decnet/conf/<dev>/forwarding)
- - Is the Ethernet MAC address of each Ethernet card set to match
- the DECnet address. If in doubt use the dn2ethaddr utility available
- at the ftp archive.
- - If the previous two steps are satisfied, and the Ethernet card is up,
- you should find that it is listed in /proc/net/decnet_dev and also
- that it appears as a directory in /proc/sys/net/decnet/conf/. The
- loopback device (lo) should also appear and is required to communicate
- within a node.
- - If you have any DECnet routers on your network, they should appear
- in /proc/net/decnet_neigh, otherwise this file will only contain the
- entry for the node itself (if it doesn't check to see if lo is up).
- - If you want to send to any node which is not listed in the
- /proc/net/decnet_neigh file, you'll need to set the default device
- to point to an Ethernet card with connection to a router. This is
- again done with the /proc/sys/net/decnet/default_device file.
- - Try starting a simple server and client, like the dnping/dnmirror
- over the loopback interface. With luck they should communicate.
- For this step and those after, you'll need the DECnet library
- which can be obtained from the above ftp sites as well as the
- actual utilities themselves.
- - If this seems to work, then try talking to a node on your local
- network, and see if you can obtain the same results.
- - At this point you are on your own... :-)
-
-6. How to send a bug report
-===========================
-
-If you've found a bug and want to report it, then there are several things
-you can do to help me work out exactly what it is that is wrong. Useful
-information (_most_ of which _is_ _essential_) includes:
-
- - What kernel version are you running ?
- - What version of the patch are you running ?
- - How far though the above set of tests can you get ?
- - What is in the /proc/decnet* files and /proc/sys/net/decnet/* files ?
- - Which services are you running ?
- - Which client caused the problem ?
- - How much data was being transferred ?
- - Was the network congested ?
- - How can the problem be reproduced ?
- - Can you use tcpdump to get a trace ? (N.B. Most (all?) versions of
- tcpdump don't understand how to dump DECnet properly, so including
- the hex listing of the packet contents is _essential_, usually the -x flag.
- You may also need to increase the length grabbed with the -s flag. The
- -e flag also provides very useful information (ethernet MAC addresses))
-
-7. MAC FAQ
-==========
-
-A quick FAQ on ethernet MAC addresses to explain how Linux and DECnet
-interact and how to get the best performance from your hardware.
-
-Ethernet cards are designed to normally only pass received network frames
-to a host computer when they are addressed to it, or to the broadcast address.
-
-Linux has an interface which allows the setting of extra addresses for
-an ethernet card to listen to. If the ethernet card supports it, the
-filtering operation will be done in hardware, if not the extra unwanted packets
-received will be discarded by the host computer. In the latter case,
-significant processor time and bus bandwidth can be used up on a busy
-network (see the NAPI documentation for a longer explanation of these
-effects).
-
-DECnet makes use of this interface to allow running DECnet on an ethernet
-card which has already been configured using TCP/IP (presumably using the
-built in MAC address of the card, as usual) and/or to allow multiple DECnet
-addresses on each physical interface. If you do this, be aware that if your
-ethernet card doesn't support perfect hashing in its MAC address filter
-then your computer will be doing more work than required. Some cards
-will simply set themselves into promiscuous mode in order to receive
-packets from the DECnet specified addresses. So if you have one of these
-cards its better to set the MAC address of the card as described above
-to gain the best efficiency. Better still is to use a card which supports
-NAPI as well.
-
-
-8. Mailing list
-===============
-
-If you are keen to get involved in development, or want to ask questions
-about configuration, or even just report bugs, then there is a mailing
-list that you can join, details are at:
-
-http://sourceforge.net/mail/?group_id=4993
-
-9. Legal Info
-=============
-
-The Linux DECnet project team have placed their code under the GPL. The
-software is provided "as is" and without warranty express or implied.
-DECnet is a trademark of Compaq. This software is not a product of
-Compaq. We acknowledge the help of people at Compaq in providing extra
-documentation above and beyond what was previously publicly available.
-
-Steve Whitehouse <SteveW@ACM.org>
-
diff --git a/Documentation/networking/devlink/index.rst b/Documentation/networking/devlink/index.rst
index e3a5f985673e..4b653d040627 100644
--- a/Documentation/networking/devlink/index.rst
+++ b/Documentation/networking/devlink/index.rst
@@ -13,10 +13,8 @@ new APIs prefixed by ``devl_*``. The older APIs handle all the locking
in devlink core, but don't allow registration of most sub-objects once
the main devlink object is itself registered. The newer ``devl_*`` APIs assume
the devlink instance lock is already held. Drivers can take the instance
-lock by calling ``devl_lock()``. It is also held in most of the callbacks.
-Eventually all callbacks will be invoked under the devlink instance lock,
-refer to the use of the ``DEVLINK_NL_FLAG_NO_LOCK`` flag in devlink core
-to find out which callbacks are not converted, yet.
+lock by calling ``devl_lock()``. It is also held all callbacks of devlink
+netlink commands.
Drivers are encouraged to use the devlink instance lock for their own needs.
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 03b215bddde8..bacadd09e570 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -47,7 +47,6 @@ Contents:
cdc_mbim
dccp
dctcp
- decnet
dns_resolver
driver
eql
diff --git a/Documentation/networking/phy.rst b/Documentation/networking/phy.rst
index 704f31da5167..712e44caebd0 100644
--- a/Documentation/networking/phy.rst
+++ b/Documentation/networking/phy.rst
@@ -308,6 +308,15 @@ Some of the interface modes are described below:
rate of 125Mpbs using a 4B/5B encoding scheme, resulting in an underlying
data rate of 100Mpbs.
+``PHY_INTERFACE_MODE_QUSGMII``
+ This defines the Cisco the Quad USGMII mode, which is the Quad variant of
+ the USGMII (Universal SGMII) link. It's very similar to QSGMII, but uses
+ a Packet Control Header (PCH) instead of the 7 bytes preamble to carry not
+ only the port id, but also so-called "extensions". The only documented
+ extension so-far in the specification is the inclusion of timestamps, for
+ PTP-enabled PHYs. This mode isn't compatible with QSGMII, but offers the
+ same capabilities in terms of link speed and negociation.
+
Pause frames / flow control
===========================
diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst
index a61eac0c73f8..c78da9ce0ec4 100644
--- a/Documentation/userspace-api/index.rst
+++ b/Documentation/userspace-api/index.rst
@@ -26,6 +26,7 @@ place where this information is gathered.
ioctl/index
iommu
media/index
+ netlink/index
sysfs-platform_profile
vduse
futex2
diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 3b985b19f39d..5f81e2a24a5c 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -308,7 +308,6 @@ Code Seq# Include File Comments
0x89 00-06 arch/x86/include/asm/sockios.h
0x89 0B-DF linux/sockios.h
0x89 E0-EF linux/sockios.h SIOCPROTOPRIVATE range
-0x89 E0-EF linux/dn.h PROTOPRIVATE range
0x89 F0-FF linux/sockios.h SIOCDEVPRIVATE range
0x8B all linux/wireless.h
0x8C 00-3F WiNRADiO driver
diff --git a/Documentation/userspace-api/netlink/index.rst b/Documentation/userspace-api/netlink/index.rst
new file mode 100644
index 000000000000..b0c21538d97d
--- /dev/null
+++ b/Documentation/userspace-api/netlink/index.rst
@@ -0,0 +1,12 @@
+.. SPDX-License-Identifier: BSD-3-Clause
+
+================
+Netlink Handbook
+================
+
+Netlink documentation for users.
+
+.. toctree::
+ :maxdepth: 2
+
+ intro
diff --git a/Documentation/userspace-api/netlink/intro.rst b/Documentation/userspace-api/netlink/intro.rst
new file mode 100644
index 000000000000..94337f79e077
--- /dev/null
+++ b/Documentation/userspace-api/netlink/intro.rst
@@ -0,0 +1,643 @@
+.. SPDX-License-Identifier: BSD-3-Clause
+
+=======================
+Introduction to Netlink
+=======================
+
+Netlink is often described as an ioctl() replacement.
+It aims to replace fixed-format C structures as supplied
+to ioctl() with a format which allows an easy way to add
+or extended the arguments.
+
+To achieve this Netlink uses a minimal fixed-format metadata header
+followed by multiple attributes in the TLV (type, length, value) format.
+
+Unfortunately the protocol has evolved over the years, in an organic
+and undocumented fashion, making it hard to coherently explain.
+To make the most practical sense this document starts by describing
+netlink as it is used today and dives into more "historical" uses
+in later sections.
+
+Opening a socket
+================
+
+Netlink communication happens over sockets, a socket needs to be
+opened first:
+
+.. code-block:: c
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+
+The use of sockets allows for a natural way of exchanging information
+in both directions (to and from the kernel). The operations are still
+performed synchronously when applications send() the request but
+a separate recv() system call is needed to read the reply.
+
+A very simplified flow of a Netlink "call" will therefore look
+something like:
+
+.. code-block:: c
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+
+ /* format the request */
+ send(fd, &request, sizeof(request));
+ n = recv(fd, &response, RSP_BUFFER_SIZE);
+ /* interpret the response */
+
+Netlink also provides natural support for "dumping", i.e. communicating
+to user space all objects of a certain type (e.g. dumping all network
+interfaces).
+
+.. code-block:: c
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+
+ /* format the dump request */
+ send(fd, &request, sizeof(request));
+ while (1) {
+ n = recv(fd, &buffer, RSP_BUFFER_SIZE);
+ /* one recv() call can read multiple messages, hence the loop below */
+ for (nl_msg in buffer) {
+ if (nl_msg.nlmsg_type == NLMSG_DONE)
+ goto dump_finished;
+ /* process the object */
+ }
+ }
+ dump_finished:
+
+The first two arguments of the socket() call require little explanation -
+it is opening a Netlink socket, with all headers provided by the user
+(hence NETLINK, RAW). The last argument is the protocol within Netlink.
+This field used to identify the subsystem with which the socket will
+communicate.
+
+Classic vs Generic Netlink
+--------------------------
+
+Initial implementation of Netlink depended on a static allocation
+of IDs to subsystems and provided little supporting infrastructure.
+Let us refer to those protocols collectively as **Classic Netlink**.
+The list of them is defined on top of the ``include/uapi/linux/netlink.h``
+file, they include among others - general networking (NETLINK_ROUTE),
+iSCSI (NETLINK_ISCSI), and audit (NETLINK_AUDIT).
+
+**Generic Netlink** (introduced in 2005) allows for dynamic registration of
+subsystems (and subsystem ID allocation), introspection and simplifies
+implementing the kernel side of the interface.
+
+The following section describes how to use Generic Netlink, as the
+number of subsystems using Generic Netlink outnumbers the older
+protocols by an order of magnitude. There are also no plans for adding
+more Classic Netlink protocols to the kernel.
+Basic information on how communicating with core networking parts of
+the Linux kernel (or another of the 20 subsystems using Classic
+Netlink) differs from Generic Netlink is provided later in this document.
+
+Generic Netlink
+===============
+
+In addition to the Netlink fixed metadata header each Netlink protocol
+defines its own fixed metadata header. (Similarly to how network
+headers stack - Ethernet > IP > TCP we have Netlink > Generic N. > Family.)
+
+A Netlink message always starts with struct nlmsghdr, which is followed
+by a protocol-specific header. In case of Generic Netlink the protocol
+header is struct genlmsghdr.
+
+The practical meaning of the fields in case of Generic Netlink is as follows:
+
+.. code-block:: c
+
+ struct nlmsghdr {
+ __u32 nlmsg_len; /* Length of message including headers */
+ __u16 nlmsg_type; /* Generic Netlink Family (subsystem) ID */
+ __u16 nlmsg_flags; /* Flags - request or dump */
+ __u32 nlmsg_seq; /* Sequence number */
+ __u32 nlmsg_pid; /* Port ID, set to 0 */
+ };
+ struct genlmsghdr {
+ __u8 cmd; /* Command, as defined by the Family */
+ __u8 version; /* Irrelevant, set to 1 */
+ __u16 reserved; /* Reserved, set to 0 */
+ };
+ /* TLV attributes follow... */
+
+In Classic Netlink :c:member:`nlmsghdr.nlmsg_type` used to identify
+which operation within the subsystem the message was referring to
+(e.g. get information about a netdev). Generic Netlink needs to mux
+multiple subsystems in a single protocol so it uses this field to
+identify the subsystem, and :c:member:`genlmsghdr.cmd` identifies
+the operation instead. (See :ref:`res_fam` for
+information on how to find the Family ID of the subsystem of interest.)
+Note that the first 16 values (0 - 15) of this field are reserved for
+control messages both in Classic Netlink and Generic Netlink.
+See :ref:`nl_msg_type` for more details.
+
+There are 3 usual types of message exchanges on a Netlink socket:
+
+ - performing a single action (``do``);
+ - dumping information (``dump``);
+ - getting asynchronous notifications (``multicast``).
+
+Classic Netlink is very flexible and presumably allows other types
+of exchanges to happen, but in practice those are the three that get
+used.
+
+Asynchronous notifications are sent by the kernel and received by
+the user sockets which subscribed to them. ``do`` and ``dump`` requests
+are initiated by the user. :c:member:`nlmsghdr.nlmsg_flags` should
+be set as follows:
+
+ - for ``do``: ``NLM_F_REQUEST | NLM_F_ACK``
+ - for ``dump``: ``NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP``
+
+:c:member:`nlmsghdr.nlmsg_seq` should be a set to a monotonically
+increasing value. The value gets echoed back in responses and doesn't
+matter in practice, but setting it to an increasing value for each
+message sent is considered good hygiene. The purpose of the field is
+matching responses to requests. Asynchronous notifications will have
+:c:member:`nlmsghdr.nlmsg_seq` of ``0``.
+
+:c:member:`nlmsghdr.nlmsg_pid` is the Netlink equivalent of an address.
+This field can be set to ``0`` when talking to the kernel.
+See :ref:`nlmsg_pid` for the (uncommon) uses of the field.
+
+The expected use for :c:member:`genlmsghdr.version` was to allow
+versioning of the APIs provided by the subsystems. No subsystem to
+date made significant use of this field, so setting it to ``1`` seems
+like a safe bet.
+
+.. _nl_msg_type:
+
+Netlink message types
+---------------------
+
+As previously mentioned :c:member:`nlmsghdr.nlmsg_type` carries
+protocol specific values but the first 16 identifiers are reserved
+(first subsystem specific message type should be equal to
+``NLMSG_MIN_TYPE`` which is ``0x10``).
+
+There are only 4 Netlink control messages defined:
+
+ - ``NLMSG_NOOP`` - ignore the message, not used in practice;
+ - ``NLMSG_ERROR`` - carries the return code of an operation;
+ - ``NLMSG_DONE`` - marks the end of a dump;
+ - ``NLMSG_OVERRUN`` - socket buffer has overflown, not used to date.
+
+``NLMSG_ERROR`` and ``NLMSG_DONE`` are of practical importance.
+They carry return codes for operations. Note that unless
+the ``NLM_F_ACK`` flag is set on the request Netlink will not respond
+with ``NLMSG_ERROR`` if there is no error. To avoid having to special-case
+this quirk it is recommended to always set ``NLM_F_ACK``.
+
+The format of ``NLMSG_ERROR`` is described by struct nlmsgerr::
+
+ ----------------------------------------------
+ | struct nlmsghdr - response header |
+ ----------------------------------------------
+ | int error |
+ ----------------------------------------------
+ | struct nlmsghdr - original request header |
+ ----------------------------------------------
+ | ** optionally (1) payload of the request |
+ ----------------------------------------------
+ | ** optionally (2) extended ACK |
+ ----------------------------------------------
+
+There are two instances of struct nlmsghdr here, first of the response
+and second of the request. ``NLMSG_ERROR`` carries the information about
+the request which led to the error. This could be useful when trying
+to match requests to responses or re-parse the request to dump it into
+logs.
+
+The payload of the request is not echoed in messages reporting success
+(``error == 0``) or if ``NETLINK_CAP_ACK`` setsockopt() was set.
+The latter is common
+and perhaps recommended as having to read a copy of every request back
+from the kernel is rather wasteful. The absence of request payload
+is indicated by ``NLM_F_CAPPED`` in :c:member:`nlmsghdr.nlmsg_flags`.
+
+The second optional element of ``NLMSG_ERROR`` are the extended ACK
+attributes. See :ref:`ext_ack` for more details. The presence
+of extended ACK is indicated by ``NLM_F_ACK_TLVS`` in
+:c:member:`nlmsghdr.nlmsg_flags`.
+
+``NLMSG_DONE`` is simpler, the request is never echoed but the extended
+ACK attributes may be present::
+
+ ----------------------------------------------
+ | struct nlmsghdr - response header |
+ ----------------------------------------------
+ | int error |
+ ----------------------------------------------
+ | ** optionally extended ACK |
+ ----------------------------------------------
+
+.. _res_fam:
+
+Resolving the Family ID
+-----------------------
+
+This section explains how to find the Family ID of a subsystem.
+It also serves as an example of Generic Netlink communication.
+
+Generic Netlink is itself a subsystem exposed via the Generic Netlink API.
+To avoid a circular dependency Generic Netlink has a statically allocated
+Family ID (``GENL_ID_CTRL`` which is equal to ``NLMSG_MIN_TYPE``).
+The Generic Netlink family implements a command used to find out information
+about other families (``CTRL_CMD_GETFAMILY``).
+
+To get information about the Generic Netlink family named for example
+``"test1"`` we need to send a message on the previously opened Generic Netlink
+socket. The message should target the Generic Netlink Family (1), be a
+``do`` (2) call to ``CTRL_CMD_GETFAMILY`` (3). A ``dump`` version of this
+call would make the kernel respond with information about *all* the families
+it knows about. Last but not least the name of the family in question has
+to be specified (4) as an attribute with the appropriate type::
+
+ struct nlmsghdr:
+ __u32 nlmsg_len: 32
+ __u16 nlmsg_type: GENL_ID_CTRL // (1)
+ __u16 nlmsg_flags: NLM_F_REQUEST | NLM_F_ACK // (2)
+ __u32 nlmsg_seq: 1
+ __u32 nlmsg_pid: 0
+
+ struct genlmsghdr:
+ __u8 cmd: CTRL_CMD_GETFAMILY // (3)
+ __u8 version: 2 /* or 1, doesn't matter */
+ __u16 reserved: 0
+
+ struct nlattr: // (4)
+ __u16 nla_len: 10
+ __u16 nla_type: CTRL_ATTR_FAMILY_NAME
+ char data: test1\0
+
+ (padding:)
+ char data: \0\0
+
+The length fields in Netlink (:c:member:`nlmsghdr.nlmsg_len`
+and :c:member:`nlattr.nla_len`) always *include* the header.
+Attribute headers in netlink must be aligned to 4 bytes from the start
+of the message, hence the extra ``\0\0`` after ``CTRL_ATTR_FAMILY_NAME``.
+The attribute lengths *exclude* the padding.
+
+If the family is found kernel will reply with two messages, the response
+with all the information about the family::
+
+ /* Message #1 - reply */
+ struct nlmsghdr:
+ __u32 nlmsg_len: 136
+ __u16 nlmsg_type: GENL_ID_CTRL
+ __u16 nlmsg_flags: 0
+ __u32 nlmsg_seq: 1 /* echoed from our request */
+ __u32 nlmsg_pid: 5831 /* The PID of our user space process */
+
+ struct genlmsghdr:
+ __u8 cmd: CTRL_CMD_GETFAMILY
+ __u8 version: 2
+ __u16 reserved: 0
+
+ struct nlattr:
+ __u16 nla_len: 10
+ __u16 nla_type: CTRL_ATTR_FAMILY_NAME
+ char data: test1\0
+
+ (padding:)
+ data: \0\0
+
+ struct nlattr:
+ __u16 nla_len: 6
+ __u16 nla_type: CTRL_ATTR_FAMILY_ID
+ __u16: 123 /* The Family ID we are after */
+
+ (padding:)
+ char data: \0\0
+
+ struct nlattr:
+ __u16 nla_len: 9
+ __u16 nla_type: CTRL_ATTR_FAMILY_VERSION
+ __u16: 1
+
+ /* ... etc, more attributes will follow. */
+
+And the error code (success) since ``NLM_F_ACK`` had been set on the request::
+
+ /* Message #2 - the ACK */
+ struct nlmsghdr:
+ __u32 nlmsg_len: 36
+ __u16 nlmsg_type: NLMSG_ERROR
+ __u16 nlmsg_flags: NLM_F_CAPPED /* There won't be a payload */
+ __u32 nlmsg_seq: 1 /* echoed from our request */
+ __u32 nlmsg_pid: 5831 /* The PID of our user space process */
+
+ int error: 0
+
+ struct nlmsghdr: /* Copy of the request header as we sent it */
+ __u32 nlmsg_len: 32
+ __u16 nlmsg_type: GENL_ID_CTRL
+ __u16 nlmsg_flags: NLM_F_REQUEST | NLM_F_ACK
+ __u32 nlmsg_seq: 1
+ __u32 nlmsg_pid: 0
+
+The order of attributes (struct nlattr) is not guaranteed so the user
+has to walk the attributes and parse them.
+
+Note that Generic Netlink sockets are not associated or bound to a single
+family. A socket can be used to exchange messages with many different
+families, selecting the recipient family on message-by-message basis using
+the :c:member:`nlmsghdr.nlmsg_type` field.
+
+.. _ext_ack:
+
+Extended ACK
+------------
+
+Extended ACK controls reporting of additional error/warning TLVs
+in ``NLMSG_ERROR`` and ``NLMSG_DONE`` messages. To maintain backward
+compatibility this feature has to be explicitly enabled by setting
+the ``NETLINK_EXT_ACK`` setsockopt() to ``1``.
+
+Types of extended ack attributes are defined in enum nlmsgerr_attrs.
+The two most commonly used attributes are ``NLMSGERR_ATTR_MSG``
+and ``NLMSGERR_ATTR_OFFS``.
+
+``NLMSGERR_ATTR_MSG`` carries a message in English describing
+the encountered problem. These messages are far more detailed
+than what can be expressed thru standard UNIX error codes.
+
+``NLMSGERR_ATTR_OFFS`` points to the attribute which caused the problem.
+
+Extended ACKs can be reported on errors as well as in case of success.
+The latter should be treated as a warning.
+
+Extended ACKs greatly improve the usability of Netlink and should
+always be enabled, appropriately parsed and reported to the user.
+
+Advanced topics
+===============
+
+Dump consistency
+----------------
+
+Some of the data structures kernel uses for storing objects make
+it hard to provide an atomic snapshot of all the objects in a dump
+(without impacting the fast-paths updating them).
+
+Kernel may set the ``NLM_F_DUMP_INTR`` flag on any message in a dump
+(including the ``NLMSG_DONE`` message) if the dump was interrupted and
+may be inconsistent (e.g. missing objects). User space should retry
+the dump if it sees the flag set.
+
+Introspection
+-------------
+
+The basic introspection abilities are enabled by access to the Family
+object as reported in :ref:`res_fam`. User can query information about
+the Generic Netlink family, including which operations are supported
+by the kernel and what attributes the kernel understands.
+Family information includes the highest ID of an attribute kernel can parse,
+a separate command (``CTRL_CMD_GETPOLICY``) provides detailed information
+about supported attributes, including ranges of values the kernel accepts.
+
+Querying family information is useful in cases when user space needs
+to make sure that the kernel has support for a feature before issuing
+a request.
+
+.. _nlmsg_pid:
+
+nlmsg_pid
+---------
+
+:c:member:`nlmsghdr.nlmsg_pid` is the Netlink equivalent of an address.
+It is referred to as Port ID, sometimes Process ID because for historical
+reasons if the application does not select (bind() to) an explicit Port ID
+kernel will automatically assign it the ID equal to its Process ID
+(as reported by the getpid() system call).
+
+Similarly to the bind() semantics of the TCP/IP network protocols the value
+of zero means "assign automatically", hence it is common for applications
+to leave the :c:member:`nlmsghdr.nlmsg_pid` field initialized to ``0``.
+
+The field is still used today in rare cases when kernel needs to send
+a unicast notification. User space application can use bind() to associate
+its socket with a specific PID, it then communicates its PID to the kernel.
+This way the kernel can reach the specific user space process.
+
+This sort of communication is utilized in UMH (User Mode Helper)-like
+scenarios when kernel needs to trigger user space processing or ask user
+space for a policy decision.
+
+Multicast notifications
+-----------------------
+
+One of the strengths of Netlink is the ability to send event notifications
+to user space. This is a unidirectional form of communication (kernel ->
+user) and does not involve any control messages like ``NLMSG_ERROR`` or
+``NLMSG_DONE``.
+
+For example the Generic Netlink family itself defines a set of multicast
+notifications about registered families. When a new family is added the
+sockets subscribed to the notifications will get the following message::
+
+ struct nlmsghdr:
+ __u32 nlmsg_len: 136
+ __u16 nlmsg_type: GENL_ID_CTRL
+ __u16 nlmsg_flags: 0
+ __u32 nlmsg_seq: 0
+ __u32 nlmsg_pid: 0
+
+ struct genlmsghdr:
+ __u8 cmd: CTRL_CMD_NEWFAMILY
+ __u8 version: 2
+ __u16 reserved: 0
+
+ struct nlattr:
+ __u16 nla_len: 10
+ __u16 nla_type: CTRL_ATTR_FAMILY_NAME
+ char data: test1\0
+
+ (padding:)
+ data: \0\0
+
+ struct nlattr:
+ __u16 nla_len: 6
+ __u16 nla_type: CTRL_ATTR_FAMILY_ID
+ __u16: 123 /* The Family ID we are after */
+
+ (padding:)
+ char data: \0\0
+
+ struct nlattr:
+ __u16 nla_len: 9
+ __u16 nla_type: CTRL_ATTR_FAMILY_VERSION
+ __u16: 1
+
+ /* ... etc, more attributes will follow. */
+
+The notification contains the same information as the response
+to the ``CTRL_CMD_GETFAMILY`` request.
+
+The Netlink headers of the notification are mostly 0 and irrelevant.
+The :c:member:`nlmsghdr.nlmsg_seq` may be either zero or a monotonically
+increasing notification sequence number maintained by the family.
+
+To receive notifications the user socket must subscribe to the relevant
+notification group. Much like the Family ID, the Group ID for a given
+multicast group is dynamic and can be found inside the Family information.
+The ``CTRL_ATTR_MCAST_GROUPS`` attribute contains nests with names
+(``CTRL_ATTR_MCAST_GRP_NAME``) and IDs (``CTRL_ATTR_MCAST_GRP_ID``) of
+the groups family.
+
+Once the Group ID is known a setsockopt() call adds the socket to the group:
+
+.. code-block:: c
+
+ unsigned int group_id;
+
+ /* .. find the group ID... */
+
+ setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP,
+ &group_id, sizeof(group_id));
+
+The socket will now receive notifications.
+
+It is recommended to use separate sockets for receiving notifications
+and sending requests to the kernel. The asynchronous nature of notifications
+means that they may get mixed in with the responses making the message
+handling much harder.
+
+Buffer sizing
+-------------
+
+Netlink sockets are datagram sockets rather than stream sockets,
+meaning that each message must be received in its entirety by a single
+recv()/recvmsg() system call. If the buffer provided by the user is too
+short, the message will be truncated and the ``MSG_TRUNC`` flag set
+in struct msghdr (struct msghdr is the second argument
+of the recvmsg() system call, *not* a Netlink header).
+
+Upon truncation the remaining part of the message is discarded.
+
+Netlink expects that the user buffer will be at least 8kB or a page
+size of the CPU architecture, whichever is bigger. Particular Netlink
+families may, however, require a larger buffer. 32kB buffer is recommended
+for most efficient handling of dumps (larger buffer fits more dumped
+objects and therefore fewer recvmsg() calls are needed).
+
+Classic Netlink
+===============
+
+The main differences between Classic and Generic Netlink are the dynamic
+allocation of subsystem identifiers and availability of introspection.
+In theory the protocol does not differ significantly, however, in practice
+Classic Netlink experimented with concepts which were abandoned in Generic
+Netlink (really, they usually only found use in a small corner of a single
+subsystem). This section is meant as an explainer of a few of such concepts,
+with the explicit goal of giving the Generic Netlink
+users the confidence to ignore them when reading the uAPI headers.
+
+Most of the concepts and examples here refer to the ``NETLINK_ROUTE`` family,
+which covers much of the configuration of the Linux networking stack.
+Real documentation of that family, deserves a chapter (or a book) of its own.
+
+Families
+--------
+
+Netlink refers to subsystems as families. This is a remnant of using
+sockets and the concept of protocol families, which are part of message
+demultiplexing in ``NETLINK_ROUTE``.
+
+Sadly every layer of encapsulation likes to refer to whatever it's carrying
+as "families" making the term very confusing:
+
+ 1. AF_NETLINK is a bona fide socket protocol family
+ 2. AF_NETLINK's documentation refers to what comes after its own
+ header (struct nlmsghdr) in a message as a "Family Header"
+ 3. Generic Netlink is a family for AF_NETLINK (struct genlmsghdr follows
+ struct nlmsghdr), yet it also calls its users "Families".
+
+Note that the Generic Netlink Family IDs are in a different "ID space"
+and overlap with Classic Netlink protocol numbers (e.g. ``NETLINK_CRYPTO``
+has the Classic Netlink protocol ID of 21 which Generic Netlink will
+happily allocate to one of its families as well).
+
+Strict checking
+---------------
+
+The ``NETLINK_GET_STRICT_CHK`` socket option enables strict input checking
+in ``NETLINK_ROUTE``. It was needed because historically kernel did not
+validate the fields of structures it didn't process. This made it impossible
+to start using those fields later without risking regressions in applications
+which initialized them incorrectly or not at all.
+
+``NETLINK_GET_STRICT_CHK`` declares that the application is initializing
+all fields correctly. It also opts into validating that message does not
+contain trailing data and requests that kernel rejects attributes with
+type higher than largest attribute type known to the kernel.
+
+``NETLINK_GET_STRICT_CHK`` is not used outside of ``NETLINK_ROUTE``.
+
+Unknown attributes
+------------------
+
+Historically Netlink ignored all unknown attributes. The thinking was that
+it would free the application from having to probe what kernel supports.
+The application could make a request to change the state and check which
+parts of the request "stuck".
+
+This is no longer the case for new Generic Netlink families and those opting
+in to strict checking. See enum netlink_validation for validation types
+performed.
+
+Fixed metadata and structures
+-----------------------------
+
+Classic Netlink made liberal use of fixed-format structures within
+the messages. Messages would commonly have a structure with
+a considerable number of fields after struct nlmsghdr. It was also
+common to put structures with multiple members inside attributes,
+without breaking each member into an attribute of its own.
+
+This has caused problems with validation and extensibility and
+therefore using binary structures is actively discouraged for new
+attributes.
+
+Request types
+-------------
+
+``NETLINK_ROUTE`` categorized requests into 4 types ``NEW``, ``DEL``, ``GET``,
+and ``SET``. Each object can handle all or some of those requests
+(objects being netdevs, routes, addresses, qdiscs etc.) Request type
+is defined by the 2 lowest bits of the message type, so commands for
+new objects would always be allocated with a stride of 4.
+
+Each object would also have it's own fixed metadata shared by all request
+types (e.g. struct ifinfomsg for netdev requests, struct ifaddrmsg for address
+requests, struct tcmsg for qdisc requests).
+
+Even though other protocols and Generic Netlink commands often use
+the same verbs in their message names (``GET``, ``SET``) the concept
+of request types did not find wider adoption.
+
+Message flags
+-------------
+
+The earlier section has already covered the basic request flags
+(``NLM_F_REQUEST``, ``NLM_F_ACK``, ``NLM_F_DUMP``) and the ``NLMSG_ERROR`` /
+``NLMSG_DONE`` flags (``NLM_F_CAPPED``, ``NLM_F_ACK_TLVS``).
+Dump flags were also mentioned (``NLM_F_MULTI``, ``NLM_F_DUMP_INTR``).
+
+Those are the main flags of note, with a small exception (of ``ieee802154``)
+Generic Netlink does not make use of other flags. If the protocol needs
+to communicate special constraints for a request it should use
+an attribute, not the flags in struct nlmsghdr.
+
+Classic Netlink, however, defined various flags for its ``GET``, ``NEW``
+and ``DEL`` requests. Since request types have not been generalized
+the request type specific flags should not be used either.
+
+uAPI reference
+==============
+
+.. kernel-doc:: include/uapi/linux/netlink.h
diff --git a/MAINTAINERS b/MAINTAINERS
index af4848466a08..ba954d5a9a2f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5721,13 +5721,6 @@ F: include/linux/tfrc.h
F: include/uapi/linux/dccp.h
F: net/dccp/
-DECnet NETWORK LAYER
-L: linux-decnet-user@lists.sourceforge.net
-S: Orphan
-W: http://linux-decnet.sourceforge.net
-F: Documentation/networking/decnet.rst
-F: net/decnet/
-
DECSTATION PLATFORM SUPPORT
M: "Maciej W. Rozycki" <macro@orcam.me.uk>
L: linux-mips@vger.kernel.org
diff --git a/arch/mips/configs/decstation_64_defconfig b/arch/mips/configs/decstation_64_defconfig
index 0021427a1bbe..4044f2829759 100644
--- a/arch/mips/configs/decstation_64_defconfig
+++ b/arch/mips/configs/decstation_64_defconfig
@@ -53,8 +53,6 @@ CONFIG_IPV6_SUBTREES=y
CONFIG_NETWORK_SECMARK=y
CONFIG_IP_SCTP=m
CONFIG_VLAN_8021Q=m
-CONFIG_DECNET=m
-CONFIG_DECNET_ROUTER=y
# CONFIG_WIRELESS is not set
# CONFIG_UEVENT_HELPER is not set
# CONFIG_FW_LOADER is not set
diff --git a/arch/mips/configs/decstation_defconfig b/arch/mips/configs/decstation_defconfig
index 7a97a0818ce4..157fc57520a7 100644
--- a/arch/mips/configs/decstation_defconfig
+++ b/arch/mips/configs/decstation_defconfig
@@ -49,8 +49,6 @@ CONFIG_IPV6_SUBTREES=y
CONFIG_NETWORK_SECMARK=y
CONFIG_IP_SCTP=m
CONFIG_VLAN_8021Q=m
-CONFIG_DECNET=m
-CONFIG_DECNET_ROUTER=y
# CONFIG_WIRELESS is not set
# CONFIG_UEVENT_HELPER is not set
# CONFIG_FW_LOADER is not set
diff --git a/arch/mips/configs/decstation_r4k_defconfig b/arch/mips/configs/decstation_r4k_defconfig
index a0643363526d..f73c26ebfc83 100644
--- a/arch/mips/configs/decstation_r4k_defconfig
+++ b/arch/mips/configs/decstation_r4k_defconfig
@@ -48,8 +48,6 @@ CONFIG_IPV6_SUBTREES=y
CONFIG_NETWORK_SECMARK=y
CONFIG_IP_SCTP=m
CONFIG_VLAN_8021Q=m
-CONFIG_DECNET=m
-CONFIG_DECNET_ROUTER=y
# CONFIG_WIRELESS is not set
# CONFIG_UEVENT_HELPER is not set
# CONFIG_FW_LOADER is not set
diff --git a/arch/mips/configs/gpr_defconfig b/arch/mips/configs/gpr_defconfig
index d82f4ebf687f..ce8a444957c1 100644
--- a/arch/mips/configs/gpr_defconfig
+++ b/arch/mips/configs/gpr_defconfig
@@ -69,7 +69,6 @@ CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_DECNET_NF_GRABULATOR=m
CONFIG_BRIDGE_NF_EBTABLES=m
CONFIG_BRIDGE_EBT_BROUTE=m
CONFIG_BRIDGE_EBT_T_FILTER=m
@@ -99,7 +98,6 @@ CONFIG_ATM_MPOA=m
CONFIG_ATM_BR2684=m
CONFIG_BRIDGE=m
CONFIG_VLAN_8021Q=m
-CONFIG_DECNET=m
CONFIG_LLC2=m
CONFIG_ATALK=m
CONFIG_DEV_APPLETALK=m
diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig
index 4194e79b435c..1339c062a042 100644
--- a/arch/mips/configs/mtx1_defconfig
+++ b/arch/mips/configs/mtx1_defconfig
@@ -116,7 +116,6 @@ CONFIG_IP6_NF_FILTER=m
CONFIG_IP6_NF_TARGET_REJECT=m
CONFIG_IP6_NF_MANGLE=m
CONFIG_IP6_NF_RAW=m
-CONFIG_DECNET_NF_GRABULATOR=m
CONFIG_BRIDGE_NF_EBTABLES=m
CONFIG_BRIDGE_EBT_BROUTE=m
CONFIG_BRIDGE_EBT_T_FILTER=m
@@ -146,7 +145,6 @@ CONFIG_ATM_MPOA=m
CONFIG_ATM_BR2684=m
CONFIG_BRIDGE=m
CONFIG_VLAN_8021Q=m
-CONFIG_DECNET=m
CONFIG_LLC2=m
CONFIG_ATALK=m
CONFIG_DEV_APPLETALK=m
diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig
index 7d6f235e8ccb..04c681bd716e 100644
--- a/arch/mips/configs/rm200_defconfig
+++ b/arch/mips/configs/rm200_defconfig
@@ -116,7 +116,6 @@ CONFIG_IP6_NF_FILTER=m
CONFIG_IP6_NF_TARGET_REJECT=m
CONFIG_IP6_NF_MANGLE=m
CONFIG_IP6_NF_RAW=m
-CONFIG_DECNET_NF_GRABULATOR=m
CONFIG_BRIDGE_NF_EBTABLES=m
CONFIG_BRIDGE_EBT_BROUTE=m
CONFIG_BRIDGE_EBT_T_FILTER=m
@@ -137,7 +136,6 @@ CONFIG_BRIDGE_EBT_REDIRECT=m
CONFIG_BRIDGE_EBT_SNAT=m
CONFIG_BRIDGE_EBT_LOG=m
CONFIG_BRIDGE=m
-CONFIG_DECNET=m
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_CBQ=m
CONFIG_NET_SCH_HTB=m
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index 91967824272e..a24f484bfbd2 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -243,8 +243,6 @@ CONFIG_ATM_LANE=m
CONFIG_ATM_BR2684=m
CONFIG_BRIDGE=m
CONFIG_VLAN_8021Q=m
-CONFIG_DECNET=m
-CONFIG_DECNET_ROUTER=y
CONFIG_ATALK=m
CONFIG_DEV_APPLETALK=m
CONFIG_IPDDP=m
diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c
index e69c4bf557bf..ae24848af233 100644
--- a/drivers/isdn/capi/kcapi.c
+++ b/drivers/isdn/capi/kcapi.c
@@ -798,7 +798,7 @@ u16 capi20_get_serial(u32 contr, u8 serial[CAPI_SERIAL_LEN])
u16 ret;
if (contr == 0) {
- strlcpy(serial, driver_serial, CAPI_SERIAL_LEN);
+ strscpy(serial, driver_serial, CAPI_SERIAL_LEN);
return CAPI_NOERROR;
}
@@ -806,7 +806,7 @@ u16 capi20_get_serial(u32 contr, u8 serial[CAPI_SERIAL_LEN])
ctr = get_capi_ctr_by_nr(contr);
if (ctr && ctr->state == CAPI_CTR_RUNNING) {
- strlcpy(serial, ctr->serial, CAPI_SERIAL_LEN);
+ strscpy(serial, ctr->serial, CAPI_SERIAL_LEN);
ret = CAPI_NOERROR;
} else
ret = CAPI_REGNOTINSTALLED;
diff --git a/drivers/net/amt.c b/drivers/net/amt.c
index 9a247eb7679c..2d20be6ffb7e 100644
--- a/drivers/net/amt.c
+++ b/drivers/net/amt.c
@@ -2894,8 +2894,7 @@ static void amt_event_work(struct work_struct *work)
amt_event_send_request(amt);
break;
default:
- if (skb)
- kfree_skb(skb);
+ kfree_skb(skb);
break;
}
}
@@ -3033,8 +3032,7 @@ static int amt_dev_stop(struct net_device *dev)
cancel_work_sync(&amt->event_wq);
for (i = 0; i < AMT_MAX_EVENTS; i++) {
skb = amt->events[i].skb;
- if (skb)
- kfree_skb(skb);
+ kfree_skb(skb);
amt->events[i].event = AMT_EVENT_NONE;
amt->events[i].skb = NULL;
}
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index be0edfa093d0..572f7450b527 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -94,6 +94,24 @@ static u16 bcm_sf2_reg_led_base(struct bcm_sf2_priv *priv, int port)
return REG_SWITCH_STATUS;
}
+static u32 bcm_sf2_port_override_offset(struct bcm_sf2_priv *priv, int port)
+{
+ switch (priv->type) {
+ case BCM4908_DEVICE_ID:
+ case BCM7445_DEVICE_ID:
+ return port == 8 ? CORE_STS_OVERRIDE_IMP :
+ CORE_STS_OVERRIDE_GMIIP_PORT(port);
+ case BCM7278_DEVICE_ID:
+ return port == 8 ? CORE_STS_OVERRIDE_IMP2 :
+ CORE_STS_OVERRIDE_GMIIP2_PORT(port);
+ default:
+ WARN_ONCE(1, "Unsupported device: %d\n", priv->type);
+ }
+
+ /* RO fallback register */
+ return REG_SWITCH_STATUS;
+}
+
/* Return the number of active ports, not counting the IMP (CPU) port */
static unsigned int bcm_sf2_num_active_ports(struct dsa_switch *ds)
{
@@ -141,7 +159,7 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port)
{
struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
unsigned int i;
- u32 reg, offset;
+ u32 reg;
/* Enable the port memories */
reg = core_readl(priv, CORE_MEM_PSM_VDD_CTRL);
@@ -167,21 +185,6 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port)
b53_brcm_hdr_setup(ds, port);
if (port == 8) {
- if (priv->type == BCM4908_DEVICE_ID ||
- priv->type == BCM7445_DEVICE_ID)
- offset = CORE_STS_OVERRIDE_IMP;
- else
- offset = CORE_STS_OVERRIDE_IMP2;
-
- /* Force link status for IMP port */
- reg = core_readl(priv, offset);
- reg |= (MII_SW_OR | LINK_STS);
- if (priv->type == BCM4908_DEVICE_ID)
- reg |= GMII_SPEED_UP_2G;
- else
- reg &= ~GMII_SPEED_UP_2G;
- core_writel(priv, reg, offset);
-
/* Enable Broadcast, Multicast, Unicast forwarding to IMP port */
reg = core_readl(priv, CORE_IMP_CTL);
reg |= (RX_BCST_EN | RX_MCST_EN | RX_UCST_EN);
@@ -812,17 +815,10 @@ static void bcm_sf2_sw_mac_link_down(struct dsa_switch *ds, int port,
if (priv->wol_ports_mask & BIT(port))
return;
- if (port != core_readl(priv, CORE_IMP0_PRT_ID)) {
- if (priv->type == BCM4908_DEVICE_ID ||
- priv->type == BCM7445_DEVICE_ID)
- offset = CORE_STS_OVERRIDE_GMIIP_PORT(port);
- else
- offset = CORE_STS_OVERRIDE_GMIIP2_PORT(port);
-
- reg = core_readl(priv, offset);
- reg &= ~LINK_STS;
- core_writel(priv, reg, offset);
- }
+ offset = bcm_sf2_port_override_offset(priv, port);
+ reg = core_readl(priv, offset);
+ reg &= ~LINK_STS;
+ core_writel(priv, reg, offset);
bcm_sf2_sw_mac_link_set(ds, port, interface, false);
}
@@ -836,56 +832,56 @@ static void bcm_sf2_sw_mac_link_up(struct dsa_switch *ds, int port,
{
struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
struct ethtool_eee *p = &priv->dev->ports[port].eee;
+ u32 reg_rgmii_ctrl = 0;
+ u32 reg, offset;
bcm_sf2_sw_mac_link_set(ds, port, interface, true);
- if (port != core_readl(priv, CORE_IMP0_PRT_ID)) {
- u32 reg_rgmii_ctrl = 0;
- u32 reg, offset;
+ offset = bcm_sf2_port_override_offset(priv, port);
- if (priv->type == BCM4908_DEVICE_ID ||
- priv->type == BCM7445_DEVICE_ID)
- offset = CORE_STS_OVERRIDE_GMIIP_PORT(port);
- else
- offset = CORE_STS_OVERRIDE_GMIIP2_PORT(port);
-
- if (interface == PHY_INTERFACE_MODE_RGMII ||
- interface == PHY_INTERFACE_MODE_RGMII_TXID ||
- interface == PHY_INTERFACE_MODE_MII ||
- interface == PHY_INTERFACE_MODE_REVMII) {
- reg_rgmii_ctrl = bcm_sf2_reg_rgmii_cntrl(priv, port);
- reg = reg_readl(priv, reg_rgmii_ctrl);
- reg &= ~(RX_PAUSE_EN | TX_PAUSE_EN);
-
- if (tx_pause)
- reg |= TX_PAUSE_EN;
- if (rx_pause)
- reg |= RX_PAUSE_EN;
-
- reg_writel(priv, reg, reg_rgmii_ctrl);
- }
-
- reg = SW_OVERRIDE | LINK_STS;
- switch (speed) {
- case SPEED_1000:
- reg |= SPDSTS_1000 << SPEED_SHIFT;
- break;
- case SPEED_100:
- reg |= SPDSTS_100 << SPEED_SHIFT;
- break;
- }
-
- if (duplex == DUPLEX_FULL)
- reg |= DUPLX_MODE;
+ if (phy_interface_mode_is_rgmii(interface) ||
+ interface == PHY_INTERFACE_MODE_MII ||
+ interface == PHY_INTERFACE_MODE_REVMII) {
+ reg_rgmii_ctrl = bcm_sf2_reg_rgmii_cntrl(priv, port);
+ reg = reg_readl(priv, reg_rgmii_ctrl);
+ reg &= ~(RX_PAUSE_EN | TX_PAUSE_EN);
if (tx_pause)
- reg |= TXFLOW_CNTL;
+ reg |= TX_PAUSE_EN;
if (rx_pause)
- reg |= RXFLOW_CNTL;
+ reg |= RX_PAUSE_EN;
+
+ reg_writel(priv, reg, reg_rgmii_ctrl);
+ }
+
+ reg = LINK_STS;
+ if (port == 8) {
+ if (priv->type == BCM4908_DEVICE_ID)
+ reg |= GMII_SPEED_UP_2G;
+ reg |= MII_SW_OR;
+ } else {
+ reg |= SW_OVERRIDE;
+ }
- core_writel(priv, reg, offset);
+ switch (speed) {
+ case SPEED_1000:
+ reg |= SPDSTS_1000 << SPEED_SHIFT;
+ break;
+ case SPEED_100:
+ reg |= SPDSTS_100 << SPEED_SHIFT;
+ break;
}
+ if (duplex == DUPLEX_FULL)
+ reg |= DUPLX_MODE;
+
+ if (tx_pause)
+ reg |= TXFLOW_CNTL;
+ if (rx_pause)
+ reg |= RXFLOW_CNTL;
+
+ core_writel(priv, reg, offset);
+
if (mode == MLO_AN_PHY && phydev)
p->eee_enabled = b53_eee_init(ds, port, phydev);
}
diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index aadb0bd7c24f..ee19ed96f284 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -445,6 +445,9 @@ static int felix_tag_8021q_setup(struct dsa_switch *ds)
if (err)
return err;
+ dsa_switch_for_each_cpu_port(dp, ds)
+ ocelot_port_setup_dsa_8021q_cpu(ocelot, dp->index);
+
dsa_switch_for_each_user_port(dp, ds)
ocelot_port_assign_dsa_8021q_cpu(ocelot, dp->index,
dp->cpu_dp->index);
@@ -493,6 +496,9 @@ static void felix_tag_8021q_teardown(struct dsa_switch *ds)
dsa_switch_for_each_user_port(dp, ds)
ocelot_port_unassign_dsa_8021q_cpu(ocelot, dp->index);
+ dsa_switch_for_each_cpu_port(dp, ds)
+ ocelot_port_teardown_dsa_8021q_cpu(ocelot, dp->index);
+
dsa_tag_8021q_unregister(ds);
}
diff --git a/drivers/net/ethernet/altera/altera_tse_ethtool.c b/drivers/net/ethernet/altera/altera_tse_ethtool.c
index 4299f1301149..3081e5874ac5 100644
--- a/drivers/net/ethernet/altera/altera_tse_ethtool.c
+++ b/drivers/net/ethernet/altera/altera_tse_ethtool.c
@@ -233,6 +233,7 @@ static const struct ethtool_ops tse_ethtool_ops = {
.set_msglevel = tse_set_msglevel,
.get_link_ksettings = phy_ethtool_get_link_ksettings,
.set_link_ksettings = phy_ethtool_set_link_ksettings,
+ .get_ts_info = ethtool_op_get_ts_info,
};
void altera_tse_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/engleder/tsnep.h b/drivers/net/ethernet/engleder/tsnep.h
index 23bbece6b7de..147fe03ca979 100644
--- a/drivers/net/ethernet/engleder/tsnep.h
+++ b/drivers/net/ethernet/engleder/tsnep.h
@@ -87,6 +87,7 @@ struct tsnep_rx_entry {
struct tsnep_rx {
struct tsnep_adapter *adapter;
void __iomem *addr;
+ int queue_index;
void *page[TSNEP_RING_PAGE_COUNT];
dma_addr_t page_dma[TSNEP_RING_PAGE_COUNT];
diff --git a/drivers/net/ethernet/engleder/tsnep_hw.h b/drivers/net/ethernet/engleder/tsnep_hw.h
index 916ceac3ada2..e03aaafab559 100644
--- a/drivers/net/ethernet/engleder/tsnep_hw.h
+++ b/drivers/net/ethernet/engleder/tsnep_hw.h
@@ -92,8 +92,7 @@
/* tsnep register */
#define TSNEP_INFO 0x0100
-#define TSNEP_INFO_RX_ASSIGN 0x00010000
-#define TSNEP_INFO_TX_TIME 0x00020000
+#define TSNEP_INFO_TX_TIME 0x00010000
#define TSNEP_CONTROL 0x0108
#define TSNEP_CONTROL_TX_RESET 0x00000001
#define TSNEP_CONTROL_TX_ENABLE 0x00000002
diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
index a5f7152a1716..19db8b1dddc4 100644
--- a/drivers/net/ethernet/engleder/tsnep_main.c
+++ b/drivers/net/ethernet/engleder/tsnep_main.c
@@ -124,30 +124,51 @@ static int tsnep_mdiobus_write(struct mii_bus *bus, int addr, int regnum,
return 0;
}
+static void tsnep_set_link_mode(struct tsnep_adapter *adapter)
+{
+ u32 mode;
+
+ switch (adapter->phydev->speed) {
+ case SPEED_100:
+ mode = ECM_LINK_MODE_100;
+ break;
+ case SPEED_1000:
+ mode = ECM_LINK_MODE_1000;
+ break;
+ default:
+ mode = ECM_LINK_MODE_OFF;
+ break;
+ }
+ iowrite32(mode, adapter->addr + ECM_STATUS);
+}
+
static void tsnep_phy_link_status_change(struct net_device *netdev)
{
struct tsnep_adapter *adapter = netdev_priv(netdev);
struct phy_device *phydev = netdev->phydev;
- u32 mode;
- if (phydev->link) {
- switch (phydev->speed) {
- case SPEED_100:
- mode = ECM_LINK_MODE_100;
- break;
- case SPEED_1000:
- mode = ECM_LINK_MODE_1000;
- break;
- default:
- mode = ECM_LINK_MODE_OFF;
- break;
- }
- iowrite32(mode, adapter->addr + ECM_STATUS);
- }
+ if (phydev->link)
+ tsnep_set_link_mode(adapter);
phy_print_status(netdev->phydev);
}
+static int tsnep_phy_loopback(struct tsnep_adapter *adapter, bool enable)
+{
+ int retval;
+
+ retval = phy_loopback(adapter->phydev, enable);
+
+ /* PHY link state change is not signaled if loopback is enabled, it
+ * would delay a working loopback anyway, let's ensure that loopback
+ * is working immediately by setting link mode directly
+ */
+ if (!retval && enable)
+ tsnep_set_link_mode(adapter);
+
+ return retval;
+}
+
static int tsnep_phy_open(struct tsnep_adapter *adapter)
{
struct phy_device *phydev;
@@ -241,14 +262,14 @@ alloc_failed:
return retval;
}
-static void tsnep_tx_activate(struct tsnep_tx *tx, int index, bool last)
+static void tsnep_tx_activate(struct tsnep_tx *tx, int index, int length,
+ bool last)
{
struct tsnep_tx_entry *entry = &tx->entry[index];
entry->properties = 0;
if (entry->skb) {
- entry->properties =
- skb_pagelen(entry->skb) & TSNEP_DESC_LENGTH_MASK;
+ entry->properties = length & TSNEP_DESC_LENGTH_MASK;
entry->properties |= TSNEP_DESC_INTERRUPT_FLAG;
if (skb_shinfo(entry->skb)->tx_flags & SKBTX_IN_PROGRESS)
entry->properties |= TSNEP_DESC_EXTENDED_WRITEBACK_FLAG;
@@ -313,6 +334,7 @@ static int tsnep_tx_map(struct sk_buff *skb, struct tsnep_tx *tx, int count)
struct tsnep_tx_entry *entry;
unsigned int len;
dma_addr_t dma;
+ int map_len = 0;
int i;
for (i = 0; i < count; i++) {
@@ -335,15 +357,18 @@ static int tsnep_tx_map(struct sk_buff *skb, struct tsnep_tx *tx, int count)
dma_unmap_addr_set(entry, dma, dma);
entry->desc->tx = __cpu_to_le64(dma);
+
+ map_len += len;
}
- return 0;
+ return map_len;
}
-static void tsnep_tx_unmap(struct tsnep_tx *tx, int index, int count)
+static int tsnep_tx_unmap(struct tsnep_tx *tx, int index, int count)
{
struct device *dmadev = tx->adapter->dmadev;
struct tsnep_tx_entry *entry;
+ int map_len = 0;
int i;
for (i = 0; i < count; i++) {
@@ -360,9 +385,12 @@ static void tsnep_tx_unmap(struct tsnep_tx *tx, int index, int count)
dma_unmap_addr(entry, dma),
dma_unmap_len(entry, len),
DMA_TO_DEVICE);
+ map_len += entry->len;
entry->len = 0;
}
}
+
+ return map_len;
}
static netdev_tx_t tsnep_xmit_frame_ring(struct sk_buff *skb,
@@ -371,6 +399,7 @@ static netdev_tx_t tsnep_xmit_frame_ring(struct sk_buff *skb,
unsigned long flags;
int count = 1;
struct tsnep_tx_entry *entry;
+ int length;
int i;
int retval;
@@ -394,7 +423,7 @@ static netdev_tx_t tsnep_xmit_frame_ring(struct sk_buff *skb,
entry->skb = skb;
retval = tsnep_tx_map(skb, tx, count);
- if (retval != 0) {
+ if (retval < 0) {
tsnep_tx_unmap(tx, tx->write, count);
dev_kfree_skb_any(entry->skb);
entry->skb = NULL;
@@ -407,12 +436,13 @@ static netdev_tx_t tsnep_xmit_frame_ring(struct sk_buff *skb,
return NETDEV_TX_OK;
}
+ length = retval;
if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
for (i = 0; i < count; i++)
- tsnep_tx_activate(tx, (tx->write + i) % TSNEP_RING_SIZE,
+ tsnep_tx_activate(tx, (tx->write + i) % TSNEP_RING_SIZE, length,
i == (count - 1));
tx->write = (tx->write + count) % TSNEP_RING_SIZE;
@@ -428,9 +458,6 @@ static netdev_tx_t tsnep_xmit_frame_ring(struct sk_buff *skb,
netif_stop_queue(tx->adapter->netdev);
}
- tx->packets++;
- tx->bytes += skb_pagelen(entry->skb) + ETH_FCS_LEN;
-
spin_unlock_irqrestore(&tx->lock, flags);
return NETDEV_TX_OK;
@@ -442,6 +469,7 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
int budget = 128;
struct tsnep_tx_entry *entry;
int count;
+ int length;
spin_lock_irqsave(&tx->lock, flags);
@@ -464,7 +492,7 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
if (skb_shinfo(entry->skb)->nr_frags > 0)
count += skb_shinfo(entry->skb)->nr_frags;
- tsnep_tx_unmap(tx, tx->read, count);
+ length = tsnep_tx_unmap(tx, tx->read, count);
if ((skb_shinfo(entry->skb)->tx_flags & SKBTX_IN_PROGRESS) &&
(__le32_to_cpu(entry->desc_wb->properties) &
@@ -491,6 +519,9 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
tx->read = (tx->read + count) % TSNEP_RING_SIZE;
+ tx->packets++;
+ tx->bytes += length + ETH_FCS_LEN;
+
budget--;
} while (likely(budget));
@@ -718,6 +749,7 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi,
hwtstamps->netdev_data = rx_inline;
}
skb_pull(skb, TSNEP_RX_INLINE_METADATA_SIZE);
+ skb_record_rx_queue(skb, rx->queue_index);
skb->protocol = eth_type_trans(skb,
rx->adapter->netdev);
@@ -752,7 +784,7 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi,
}
static int tsnep_rx_open(struct tsnep_adapter *adapter, void __iomem *addr,
- struct tsnep_rx *rx)
+ int queue_index, struct tsnep_rx *rx)
{
dma_addr_t dma;
int i;
@@ -761,6 +793,7 @@ static int tsnep_rx_open(struct tsnep_adapter *adapter, void __iomem *addr,
memset(rx, 0, sizeof(*rx));
rx->adapter = adapter;
rx->addr = addr;
+ rx->queue_index = queue_index;
retval = tsnep_rx_ring_init(rx);
if (retval)
@@ -847,6 +880,7 @@ static int tsnep_netdev_open(struct net_device *netdev)
if (adapter->queue[i].rx) {
addr = adapter->addr + TSNEP_QUEUE(rx_queue_index);
retval = tsnep_rx_open(adapter, addr,
+ rx_queue_index,
adapter->queue[i].rx);
if (retval)
goto failed;
@@ -1017,6 +1051,22 @@ static int tsnep_netdev_set_mac_address(struct net_device *netdev, void *addr)
return 0;
}
+static int tsnep_netdev_set_features(struct net_device *netdev,
+ netdev_features_t features)
+{
+ struct tsnep_adapter *adapter = netdev_priv(netdev);
+ netdev_features_t changed = netdev->features ^ features;
+ bool enable;
+ int retval = 0;
+
+ if (changed & NETIF_F_LOOPBACK) {
+ enable = !!(features & NETIF_F_LOOPBACK);
+ retval = tsnep_phy_loopback(adapter, enable);
+ }
+
+ return retval;
+}
+
static ktime_t tsnep_netdev_get_tstamp(struct net_device *netdev,
const struct skb_shared_hwtstamps *hwtstamps,
bool cycles)
@@ -1038,9 +1088,9 @@ static const struct net_device_ops tsnep_netdev_ops = {
.ndo_start_xmit = tsnep_netdev_xmit_frame,
.ndo_eth_ioctl = tsnep_netdev_ioctl,
.ndo_set_rx_mode = tsnep_netdev_set_multicast,
-
.ndo_get_stats64 = tsnep_netdev_get_stats64,
.ndo_set_mac_address = tsnep_netdev_set_mac_address,
+ .ndo_set_features = tsnep_netdev_set_features,
.ndo_get_tstamp = tsnep_netdev_get_tstamp,
.ndo_setup_tc = tsnep_tc_setup,
};
@@ -1192,6 +1242,13 @@ static int tsnep_probe(struct platform_device *pdev)
adapter->queue[0].rx = &adapter->rx[0];
adapter->queue[0].irq_mask = ECM_INT_TX_0 | ECM_INT_RX_0;
+ retval = dma_set_mask_and_coherent(&adapter->pdev->dev,
+ DMA_BIT_MASK(64));
+ if (retval) {
+ dev_err(&adapter->pdev->dev, "no usable DMA configuration.\n");
+ return retval;
+ }
+
tsnep_disable_irq(adapter, ECM_INT_ALL);
retval = devm_request_irq(&adapter->pdev->dev, adapter->irq, tsnep_irq,
0, TSNEP, adapter);
@@ -1225,7 +1282,7 @@ static int tsnep_probe(struct platform_device *pdev)
netdev->netdev_ops = &tsnep_netdev_ops;
netdev->ethtool_ops = &tsnep_ethtool_ops;
netdev->features = NETIF_F_SG;
- netdev->hw_features = netdev->features;
+ netdev->hw_features = netdev->features | NETIF_F_LOOPBACK;
/* carrier off reporting is important to ethtool even BEFORE open */
netif_carrier_off(netdev);
diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c
index 8a341e2d5833..2e6524009b19 100644
--- a/drivers/net/ethernet/faraday/ftmac100.c
+++ b/drivers/net/ethernet/faraday/ftmac100.c
@@ -1075,6 +1075,7 @@ static int ftmac100_probe(struct platform_device *pdev)
SET_NETDEV_DEV(netdev, &pdev->dev);
netdev->ethtool_ops = &ftmac100_ethtool_ops;
netdev->netdev_ops = &ftmac100_netdev_ops;
+ netdev->max_mtu = MAX_PKT_SIZE;
platform_set_drvdata(pdev, netdev);
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index a770bab4d1ed..e974d90f15e3 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -288,9 +288,11 @@ static int dpaa_stop(struct net_device *net_dev)
*/
msleep(200);
- err = mac_dev->stop(mac_dev);
+ if (mac_dev->phy_dev)
+ phy_stop(mac_dev->phy_dev);
+ err = mac_dev->disable(mac_dev->fman_mac);
if (err < 0)
- netif_err(priv, ifdown, net_dev, "mac_dev->stop() = %d\n",
+ netif_err(priv, ifdown, net_dev, "mac_dev->disable() = %d\n",
err);
for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++) {
@@ -2946,11 +2948,12 @@ static int dpaa_open(struct net_device *net_dev)
goto mac_start_failed;
}
- err = priv->mac_dev->start(mac_dev);
+ err = priv->mac_dev->enable(mac_dev->fman_mac);
if (err < 0) {
- netif_err(priv, ifup, net_dev, "mac_dev->start() = %d\n", err);
+ netif_err(priv, ifup, net_dev, "mac_dev->enable() = %d\n", err);
goto mac_start_failed;
}
+ phy_start(priv->mac_dev->phy_dev);
netif_tx_start_all_queues(net_dev);
diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c
index 8f0db61cb1f6..9d85fb136e34 100644
--- a/drivers/net/ethernet/freescale/fman/fman.c
+++ b/drivers/net/ethernet/freescale/fman/fman.c
@@ -1,34 +1,7 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later
/*
- * Copyright 2008-2015 Freescale Semiconductor Inc.
+ * Copyright 2008 - 2015 Freescale Semiconductor Inc.
* Copyright 2020 NXP
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Freescale Semiconductor nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- *
- * ALTERNATIVELY, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") as published by the Free Software
- * Foundation, either version 2 of that License or (at your option) any
- * later version.
- *
- * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/drivers/net/ethernet/freescale/fman/fman.h b/drivers/net/ethernet/freescale/fman/fman.h
index f2ede1360f03..2ea575a46675 100644
--- a/drivers/net/ethernet/freescale/fman/fman.h
+++ b/drivers/net/ethernet/freescale/fman/fman.h
@@ -1,34 +1,7 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later */
/*
- * Copyright 2008-2015 Freescale Semiconductor Inc.
+ * Copyright 2008 - 2015 Freescale Semiconductor Inc.
* Copyright 2020 NXP
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Freescale Semiconductor nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- *
- * ALTERNATIVELY, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") as published by the Free Software
- * Foundation, either version 2 of that License or (at your option) any
- * later version.
- *
- * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __FM_H
diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.c b/drivers/net/ethernet/freescale/fman/fman_dtsec.c
index 1950a8936bc0..7f4f3d797a8d 100644
--- a/drivers/net/ethernet/freescale/fman/fman_dtsec.c
+++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.c
@@ -1,33 +1,6 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later
/*
- * Copyright 2008-2015 Freescale Semiconductor Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Freescale Semiconductor nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- *
- * ALTERNATIVELY, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") as published by the Free Software
- * Foundation, either version 2 of that License or (at your option) any
- * later version.
- *
- * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * Copyright 2008 - 2015 Freescale Semiconductor Inc.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -860,53 +833,45 @@ int dtsec_cfg_pad_and_crc(struct fman_mac *dtsec, bool new_val)
return 0;
}
-static void graceful_start(struct fman_mac *dtsec, enum comm_mode mode)
+static void graceful_start(struct fman_mac *dtsec)
{
struct dtsec_regs __iomem *regs = dtsec->regs;
- if (mode & COMM_MODE_TX)
- iowrite32be(ioread32be(&regs->tctrl) &
- ~TCTRL_GTS, &regs->tctrl);
- if (mode & COMM_MODE_RX)
- iowrite32be(ioread32be(&regs->rctrl) &
- ~RCTRL_GRS, &regs->rctrl);
+ iowrite32be(ioread32be(&regs->tctrl) & ~TCTRL_GTS, &regs->tctrl);
+ iowrite32be(ioread32be(&regs->rctrl) & ~RCTRL_GRS, &regs->rctrl);
}
-static void graceful_stop(struct fman_mac *dtsec, enum comm_mode mode)
+static void graceful_stop(struct fman_mac *dtsec)
{
struct dtsec_regs __iomem *regs = dtsec->regs;
u32 tmp;
/* Graceful stop - Assert the graceful Rx stop bit */
- if (mode & COMM_MODE_RX) {
- tmp = ioread32be(&regs->rctrl) | RCTRL_GRS;
- iowrite32be(tmp, &regs->rctrl);
+ tmp = ioread32be(&regs->rctrl) | RCTRL_GRS;
+ iowrite32be(tmp, &regs->rctrl);
- if (dtsec->fm_rev_info.major == 2) {
- /* Workaround for dTSEC Errata A002 */
- usleep_range(100, 200);
- } else {
- /* Workaround for dTSEC Errata A004839 */
- usleep_range(10, 50);
- }
+ if (dtsec->fm_rev_info.major == 2) {
+ /* Workaround for dTSEC Errata A002 */
+ usleep_range(100, 200);
+ } else {
+ /* Workaround for dTSEC Errata A004839 */
+ usleep_range(10, 50);
}
/* Graceful stop - Assert the graceful Tx stop bit */
- if (mode & COMM_MODE_TX) {
- if (dtsec->fm_rev_info.major == 2) {
- /* dTSEC Errata A004: Do not use TCTRL[GTS]=1 */
- pr_debug("GTS not supported due to DTSEC_A004 Errata.\n");
- } else {
- tmp = ioread32be(&regs->tctrl) | TCTRL_GTS;
- iowrite32be(tmp, &regs->tctrl);
+ if (dtsec->fm_rev_info.major == 2) {
+ /* dTSEC Errata A004: Do not use TCTRL[GTS]=1 */
+ pr_debug("GTS not supported due to DTSEC_A004 Errata.\n");
+ } else {
+ tmp = ioread32be(&regs->tctrl) | TCTRL_GTS;
+ iowrite32be(tmp, &regs->tctrl);
- /* Workaround for dTSEC Errata A0012, A0014 */
- usleep_range(10, 50);
- }
+ /* Workaround for dTSEC Errata A0012, A0014 */
+ usleep_range(10, 50);
}
}
-int dtsec_enable(struct fman_mac *dtsec, enum comm_mode mode)
+int dtsec_enable(struct fman_mac *dtsec)
{
struct dtsec_regs __iomem *regs = dtsec->regs;
u32 tmp;
@@ -916,20 +881,16 @@ int dtsec_enable(struct fman_mac *dtsec, enum comm_mode mode)
/* Enable */
tmp = ioread32be(&regs->maccfg1);
- if (mode & COMM_MODE_RX)
- tmp |= MACCFG1_RX_EN;
- if (mode & COMM_MODE_TX)
- tmp |= MACCFG1_TX_EN;
-
+ tmp |= MACCFG1_RX_EN | MACCFG1_TX_EN;
iowrite32be(tmp, &regs->maccfg1);
/* Graceful start - clear the graceful Rx/Tx stop bit */
- graceful_start(dtsec, mode);
+ graceful_start(dtsec);
return 0;
}
-int dtsec_disable(struct fman_mac *dtsec, enum comm_mode mode)
+int dtsec_disable(struct fman_mac *dtsec)
{
struct dtsec_regs __iomem *regs = dtsec->regs;
u32 tmp;
@@ -938,14 +899,10 @@ int dtsec_disable(struct fman_mac *dtsec, enum comm_mode mode)
return -EINVAL;
/* Graceful stop - Assert the graceful Rx/Tx stop bit */
- graceful_stop(dtsec, mode);
+ graceful_stop(dtsec);
tmp = ioread32be(&regs->maccfg1);
- if (mode & COMM_MODE_RX)
- tmp &= ~MACCFG1_RX_EN;
- if (mode & COMM_MODE_TX)
- tmp &= ~MACCFG1_TX_EN;
-
+ tmp &= ~(MACCFG1_RX_EN | MACCFG1_TX_EN);
iowrite32be(tmp, &regs->maccfg1);
return 0;
@@ -956,18 +913,12 @@ int dtsec_set_tx_pause_frames(struct fman_mac *dtsec,
u16 pause_time, u16 __maybe_unused thresh_time)
{
struct dtsec_regs __iomem *regs = dtsec->regs;
- enum comm_mode mode = COMM_MODE_NONE;
u32 ptv = 0;
if (!is_init_done(dtsec->dtsec_drv_param))
return -EINVAL;
- if ((ioread32be(&regs->rctrl) & RCTRL_GRS) == 0)
- mode |= COMM_MODE_RX;
- if ((ioread32be(&regs->tctrl) & TCTRL_GTS) == 0)
- mode |= COMM_MODE_TX;
-
- graceful_stop(dtsec, mode);
+ graceful_stop(dtsec);
if (pause_time) {
/* FM_BAD_TX_TS_IN_B_2_B_ERRATA_DTSEC_A003 Errata workaround */
@@ -989,7 +940,7 @@ int dtsec_set_tx_pause_frames(struct fman_mac *dtsec,
iowrite32be(ioread32be(&regs->maccfg1) & ~MACCFG1_TX_FLOW,
&regs->maccfg1);
- graceful_start(dtsec, mode);
+ graceful_start(dtsec);
return 0;
}
@@ -997,18 +948,12 @@ int dtsec_set_tx_pause_frames(struct fman_mac *dtsec,
int dtsec_accept_rx_pause_frames(struct fman_mac *dtsec, bool en)
{
struct dtsec_regs __iomem *regs = dtsec->regs;
- enum comm_mode mode = COMM_MODE_NONE;
u32 tmp;
if (!is_init_done(dtsec->dtsec_drv_param))
return -EINVAL;
- if ((ioread32be(&regs->rctrl) & RCTRL_GRS) == 0)
- mode |= COMM_MODE_RX;
- if ((ioread32be(&regs->tctrl) & TCTRL_GTS) == 0)
- mode |= COMM_MODE_TX;
-
- graceful_stop(dtsec, mode);
+ graceful_stop(dtsec);
tmp = ioread32be(&regs->maccfg1);
if (en)
@@ -1017,25 +962,17 @@ int dtsec_accept_rx_pause_frames(struct fman_mac *dtsec, bool en)
tmp &= ~MACCFG1_RX_FLOW;
iowrite32be(tmp, &regs->maccfg1);
- graceful_start(dtsec, mode);
+ graceful_start(dtsec);
return 0;
}
int dtsec_modify_mac_address(struct fman_mac *dtsec, const enet_addr_t *enet_addr)
{
- struct dtsec_regs __iomem *regs = dtsec->regs;
- enum comm_mode mode = COMM_MODE_NONE;
-
if (!is_init_done(dtsec->dtsec_drv_param))
return -EINVAL;
- if ((ioread32be(&regs->rctrl) & RCTRL_GRS) == 0)
- mode |= COMM_MODE_RX;
- if ((ioread32be(&regs->tctrl) & TCTRL_GTS) == 0)
- mode |= COMM_MODE_TX;
-
- graceful_stop(dtsec, mode);
+ graceful_stop(dtsec);
/* Initialize MAC Station Address registers (1 & 2)
* Station address have to be swapped (big endian to little endian
@@ -1043,7 +980,7 @@ int dtsec_modify_mac_address(struct fman_mac *dtsec, const enet_addr_t *enet_add
dtsec->addr = ENET_ADDR_TO_UINT64(*enet_addr);
set_mac_address(dtsec->regs, (const u8 *)(*enet_addr));
- graceful_start(dtsec, mode);
+ graceful_start(dtsec);
return 0;
}
@@ -1261,18 +1198,12 @@ int dtsec_set_promiscuous(struct fman_mac *dtsec, bool new_val)
int dtsec_adjust_link(struct fman_mac *dtsec, u16 speed)
{
struct dtsec_regs __iomem *regs = dtsec->regs;
- enum comm_mode mode = COMM_MODE_NONE;
u32 tmp;
if (!is_init_done(dtsec->dtsec_drv_param))
return -EINVAL;
- if ((ioread32be(&regs->rctrl) & RCTRL_GRS) == 0)
- mode |= COMM_MODE_RX;
- if ((ioread32be(&regs->tctrl) & TCTRL_GTS) == 0)
- mode |= COMM_MODE_TX;
-
- graceful_stop(dtsec, mode);
+ graceful_stop(dtsec);
tmp = ioread32be(&regs->maccfg2);
@@ -1293,7 +1224,7 @@ int dtsec_adjust_link(struct fman_mac *dtsec, u16 speed)
tmp &= ~DTSEC_ECNTRL_R100M;
iowrite32be(tmp, &regs->ecntrl);
- graceful_start(dtsec, mode);
+ graceful_start(dtsec);
return 0;
}
diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.h b/drivers/net/ethernet/freescale/fman/fman_dtsec.h
index 68512c3bd6e5..f072cdc560ba 100644
--- a/drivers/net/ethernet/freescale/fman/fman_dtsec.h
+++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.h
@@ -1,33 +1,6 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later */
/*
- * Copyright 2008-2015 Freescale Semiconductor Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Freescale Semiconductor nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- *
- * ALTERNATIVELY, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") as published by the Free Software
- * Foundation, either version 2 of that License or (at your option) any
- * later version.
- *
- * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * Copyright 2008 - 2015 Freescale Semiconductor Inc.
*/
#ifndef __DTSEC_H
@@ -43,8 +16,8 @@ int dtsec_adjust_link(struct fman_mac *dtsec,
int dtsec_restart_autoneg(struct fman_mac *dtsec);
int dtsec_cfg_max_frame_len(struct fman_mac *dtsec, u16 new_val);
int dtsec_cfg_pad_and_crc(struct fman_mac *dtsec, bool new_val);
-int dtsec_enable(struct fman_mac *dtsec, enum comm_mode mode);
-int dtsec_disable(struct fman_mac *dtsec, enum comm_mode mode);
+int dtsec_enable(struct fman_mac *dtsec);
+int dtsec_disable(struct fman_mac *dtsec);
int dtsec_init(struct fman_mac *dtsec);
int dtsec_free(struct fman_mac *dtsec);
int dtsec_accept_rx_pause_frames(struct fman_mac *dtsec, bool en);
diff --git a/drivers/net/ethernet/freescale/fman/fman_keygen.c b/drivers/net/ethernet/freescale/fman/fman_keygen.c
index e1bdfed16134..e73f6ef3c6ee 100644
--- a/drivers/net/ethernet/freescale/fman/fman_keygen.c
+++ b/drivers/net/ethernet/freescale/fman/fman_keygen.c
@@ -1,33 +1,6 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later
/*
* Copyright 2017 NXP
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of NXP nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- *
- * ALTERNATIVELY, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") as published by the Free Software
- * Foundation, either version 2 of that License or (at your option) any
- * later version.
- *
- * THIS SOFTWARE IS PROVIDED BY NXP ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL NXP BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/drivers/net/ethernet/freescale/fman/fman_keygen.h b/drivers/net/ethernet/freescale/fman/fman_keygen.h
index c4640de3f4cb..2cb0df453074 100644
--- a/drivers/net/ethernet/freescale/fman/fman_keygen.h
+++ b/drivers/net/ethernet/freescale/fman/fman_keygen.h
@@ -1,33 +1,6 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later */
/*
* Copyright 2017 NXP
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of NXP nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- *
- * ALTERNATIVELY, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") as published by the Free Software
- * Foundation, either version 2 of that License or (at your option) any
- * later version.
- *
- * THIS SOFTWARE IS PROVIDED BY NXP ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL NXP BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __KEYGEN_H
diff --git a/drivers/net/ethernet/freescale/fman/fman_mac.h b/drivers/net/ethernet/freescale/fman/fman_mac.h
index 19f327efdaff..418d1de85702 100644
--- a/drivers/net/ethernet/freescale/fman/fman_mac.h
+++ b/drivers/net/ethernet/freescale/fman/fman_mac.h
@@ -75,16 +75,6 @@ typedef u8 enet_addr_t[ETH_ALEN];
#define ETH_HASH_ENTRY_OBJ(ptr) \
hlist_entry_safe(ptr, struct eth_hash_entry, node)
-/* Enumeration (bit flags) of communication modes (Transmit,
- * receive or both).
- */
-enum comm_mode {
- COMM_MODE_NONE = 0, /* No transmit/receive communication */
- COMM_MODE_RX = 1, /* Only receive communication */
- COMM_MODE_TX = 2, /* Only transmit communication */
- COMM_MODE_RX_AND_TX = 3 /* Both transmit and receive communication */
-};
-
/* FM MAC Exceptions */
enum fman_mac_exceptions {
FM_MAC_EX_10G_MDIO_SCAN_EVENT = 0
diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c
index 2216b7f51d26..c34da49aed31 100644
--- a/drivers/net/ethernet/freescale/fman/fman_memac.c
+++ b/drivers/net/ethernet/freescale/fman/fman_memac.c
@@ -1,33 +1,6 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later
/*
- * Copyright 2008-2015 Freescale Semiconductor Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Freescale Semiconductor nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- *
- * ALTERNATIVELY, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") as published by the Free Software
- * Foundation, either version 2 of that License or (at your option) any
- * later version.
- *
- * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * Copyright 2008 - 2015 Freescale Semiconductor Inc.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -712,7 +685,7 @@ static bool is_init_done(struct memac_cfg *memac_drv_params)
return false;
}
-int memac_enable(struct fman_mac *memac, enum comm_mode mode)
+int memac_enable(struct fman_mac *memac)
{
struct memac_regs __iomem *regs = memac->regs;
u32 tmp;
@@ -721,17 +694,13 @@ int memac_enable(struct fman_mac *memac, enum comm_mode mode)
return -EINVAL;
tmp = ioread32be(&regs->command_config);
- if (mode & COMM_MODE_RX)
- tmp |= CMD_CFG_RX_EN;
- if (mode & COMM_MODE_TX)
- tmp |= CMD_CFG_TX_EN;
-
+ tmp |= CMD_CFG_RX_EN | CMD_CFG_TX_EN;
iowrite32be(tmp, &regs->command_config);
return 0;
}
-int memac_disable(struct fman_mac *memac, enum comm_mode mode)
+int memac_disable(struct fman_mac *memac)
{
struct memac_regs __iomem *regs = memac->regs;
u32 tmp;
@@ -740,11 +709,7 @@ int memac_disable(struct fman_mac *memac, enum comm_mode mode)
return -EINVAL;
tmp = ioread32be(&regs->command_config);
- if (mode & COMM_MODE_RX)
- tmp &= ~CMD_CFG_RX_EN;
- if (mode & COMM_MODE_TX)
- tmp &= ~CMD_CFG_TX_EN;
-
+ tmp &= ~(CMD_CFG_RX_EN | CMD_CFG_TX_EN);
iowrite32be(tmp, &regs->command_config);
return 0;
diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.h b/drivers/net/ethernet/freescale/fman/fman_memac.h
index 3820f7a22983..535ecd2b2ab4 100644
--- a/drivers/net/ethernet/freescale/fman/fman_memac.h
+++ b/drivers/net/ethernet/freescale/fman/fman_memac.h
@@ -1,33 +1,6 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later */
/*
- * Copyright 2008-2015 Freescale Semiconductor Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Freescale Semiconductor nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- *
- * ALTERNATIVELY, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") as published by the Free Software
- * Foundation, either version 2 of that License or (at your option) any
- * later version.
- *
- * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * Copyright 2008 - 2015 Freescale Semiconductor Inc.
*/
#ifndef __MEMAC_H
@@ -46,8 +19,8 @@ int memac_cfg_max_frame_len(struct fman_mac *memac, u16 new_val);
int memac_cfg_reset_on_init(struct fman_mac *memac, bool enable);
int memac_cfg_fixed_link(struct fman_mac *memac,
struct fixed_phy_status *fixed_link);
-int memac_enable(struct fman_mac *memac, enum comm_mode mode);
-int memac_disable(struct fman_mac *memac, enum comm_mode mode);
+int memac_enable(struct fman_mac *memac);
+int memac_disable(struct fman_mac *memac);
int memac_init(struct fman_mac *memac);
int memac_free(struct fman_mac *memac);
int memac_accept_rx_pause_frames(struct fman_mac *memac, bool en);
diff --git a/drivers/net/ethernet/freescale/fman/fman_muram.c b/drivers/net/ethernet/freescale/fman/fman_muram.c
index 7ad317e622bc..f557d68e5b76 100644
--- a/drivers/net/ethernet/freescale/fman/fman_muram.c
+++ b/drivers/net/ethernet/freescale/fman/fman_muram.c
@@ -1,33 +1,6 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later
/*
- * Copyright 2008-2015 Freescale Semiconductor Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Freescale Semiconductor nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- *
- * ALTERNATIVELY, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") as published by the Free Software
- * Foundation, either version 2 of that License or (at your option) any
- * later version.
- *
- * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * Copyright 2008 - 2015 Freescale Semiconductor Inc.
*/
#include "fman_muram.h"
diff --git a/drivers/net/ethernet/freescale/fman/fman_muram.h b/drivers/net/ethernet/freescale/fman/fman_muram.h
index 453bf849eee1..3643af61bae2 100644
--- a/drivers/net/ethernet/freescale/fman/fman_muram.h
+++ b/drivers/net/ethernet/freescale/fman/fman_muram.h
@@ -1,34 +1,8 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later */
/*
- * Copyright 2008-2015 Freescale Semiconductor Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Freescale Semiconductor nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- *
- * ALTERNATIVELY, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") as published by the Free Software
- * Foundation, either version 2 of that License or (at your option) any
- * later version.
- *
- * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * Copyright 2008 - 2015 Freescale Semiconductor Inc.
*/
+
#ifndef __FM_MURAM_EXT
#define __FM_MURAM_EXT
diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c
index 4c9d05c45c03..ab90fe2bee5e 100644
--- a/drivers/net/ethernet/freescale/fman/fman_port.c
+++ b/drivers/net/ethernet/freescale/fman/fman_port.c
@@ -1,33 +1,6 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later
/*
* Copyright 2008 - 2015 Freescale Semiconductor Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Freescale Semiconductor nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- *
- * ALTERNATIVELY, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") as published by the Free Software
- * Foundation, either version 2 of that License or (at your option) any
- * later version.
- *
- * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/drivers/net/ethernet/freescale/fman/fman_port.h b/drivers/net/ethernet/freescale/fman/fman_port.h
index 82f12661a46d..4917fe8f0617 100644
--- a/drivers/net/ethernet/freescale/fman/fman_port.h
+++ b/drivers/net/ethernet/freescale/fman/fman_port.h
@@ -1,33 +1,6 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later */
/*
* Copyright 2008 - 2015 Freescale Semiconductor Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Freescale Semiconductor nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- *
- * ALTERNATIVELY, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") as published by the Free Software
- * Foundation, either version 2 of that License or (at your option) any
- * later version.
- *
- * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __FMAN_PORT_H
diff --git a/drivers/net/ethernet/freescale/fman/fman_sp.c b/drivers/net/ethernet/freescale/fman/fman_sp.c
index 248f5bcca468..0fac60aa5283 100644
--- a/drivers/net/ethernet/freescale/fman/fman_sp.c
+++ b/drivers/net/ethernet/freescale/fman/fman_sp.c
@@ -1,33 +1,6 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later
/*
* Copyright 2008 - 2015 Freescale Semiconductor Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Freescale Semiconductor nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- *
- * ALTERNATIVELY, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") as published by the Free Software
- * Foundation, either version 2 of that License or (at your option) any
- * later version.
- *
- * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "fman_sp.h"
diff --git a/drivers/net/ethernet/freescale/fman/fman_sp.h b/drivers/net/ethernet/freescale/fman/fman_sp.h
index 820b7f63088f..a62dd21c81f1 100644
--- a/drivers/net/ethernet/freescale/fman/fman_sp.h
+++ b/drivers/net/ethernet/freescale/fman/fman_sp.h
@@ -1,32 +1,6 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later */
/*
* Copyright 2008 - 2015 Freescale Semiconductor Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Freescale Semiconductor nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * ALTERNATIVELY, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") as published by the Free Software
- * Foundation, either version 2 of that License or (at your option) any
- * later version.
- *
- * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __FM_SP_H
diff --git a/drivers/net/ethernet/freescale/fman/fman_tgec.c b/drivers/net/ethernet/freescale/fman/fman_tgec.c
index 311c1906e044..2b38d22c863d 100644
--- a/drivers/net/ethernet/freescale/fman/fman_tgec.c
+++ b/drivers/net/ethernet/freescale/fman/fman_tgec.c
@@ -1,33 +1,6 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later
/*
- * Copyright 2008-2015 Freescale Semiconductor Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Freescale Semiconductor nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- *
- * ALTERNATIVELY, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") as published by the Free Software
- * Foundation, either version 2 of that License or (at your option) any
- * later version.
- *
- * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * Copyright 2008 - 2015 Freescale Semiconductor Inc.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -419,7 +392,7 @@ static bool is_init_done(struct tgec_cfg *cfg)
return false;
}
-int tgec_enable(struct fman_mac *tgec, enum comm_mode mode)
+int tgec_enable(struct fman_mac *tgec)
{
struct tgec_regs __iomem *regs = tgec->regs;
u32 tmp;
@@ -428,16 +401,13 @@ int tgec_enable(struct fman_mac *tgec, enum comm_mode mode)
return -EINVAL;
tmp = ioread32be(&regs->command_config);
- if (mode & COMM_MODE_RX)
- tmp |= CMD_CFG_RX_EN;
- if (mode & COMM_MODE_TX)
- tmp |= CMD_CFG_TX_EN;
+ tmp |= CMD_CFG_RX_EN | CMD_CFG_TX_EN;
iowrite32be(tmp, &regs->command_config);
return 0;
}
-int tgec_disable(struct fman_mac *tgec, enum comm_mode mode)
+int tgec_disable(struct fman_mac *tgec)
{
struct tgec_regs __iomem *regs = tgec->regs;
u32 tmp;
@@ -446,10 +416,7 @@ int tgec_disable(struct fman_mac *tgec, enum comm_mode mode)
return -EINVAL;
tmp = ioread32be(&regs->command_config);
- if (mode & COMM_MODE_RX)
- tmp &= ~CMD_CFG_RX_EN;
- if (mode & COMM_MODE_TX)
- tmp &= ~CMD_CFG_TX_EN;
+ tmp &= ~(CMD_CFG_RX_EN | CMD_CFG_TX_EN);
iowrite32be(tmp, &regs->command_config);
return 0;
diff --git a/drivers/net/ethernet/freescale/fman/fman_tgec.h b/drivers/net/ethernet/freescale/fman/fman_tgec.h
index b28b20b26148..5b256758cbec 100644
--- a/drivers/net/ethernet/freescale/fman/fman_tgec.h
+++ b/drivers/net/ethernet/freescale/fman/fman_tgec.h
@@ -1,33 +1,6 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later */
/*
- * Copyright 2008-2015 Freescale Semiconductor Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Freescale Semiconductor nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- *
- * ALTERNATIVELY, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") as published by the Free Software
- * Foundation, either version 2 of that License or (at your option) any
- * later version.
- *
- * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * Copyright 2008 - 2015 Freescale Semiconductor Inc.
*/
#ifndef __TGEC_H
@@ -39,8 +12,8 @@ struct fman_mac *tgec_config(struct fman_mac_params *params);
int tgec_set_promiscuous(struct fman_mac *tgec, bool new_val);
int tgec_modify_mac_address(struct fman_mac *tgec, const enet_addr_t *enet_addr);
int tgec_cfg_max_frame_len(struct fman_mac *tgec, u16 new_val);
-int tgec_enable(struct fman_mac *tgec, enum comm_mode mode);
-int tgec_disable(struct fman_mac *tgec, enum comm_mode mode);
+int tgec_enable(struct fman_mac *tgec);
+int tgec_disable(struct fman_mac *tgec);
int tgec_init(struct fman_mac *tgec);
int tgec_free(struct fman_mac *tgec);
int tgec_accept_rx_pause_frames(struct fman_mac *tgec, bool en);
diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c
index 39ae965cd4f6..c376b9bf657d 100644
--- a/drivers/net/ethernet/freescale/fman/mac.c
+++ b/drivers/net/ethernet/freescale/fman/mac.c
@@ -1,32 +1,6 @@
-/* Copyright 2008-2015 Freescale Semiconductor, Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Freescale Semiconductor nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- *
- * ALTERNATIVELY, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") as published by the Free Software
- * Foundation, either version 2 of that License or (at your option) any
- * later version.
- *
- * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later
+/*
+ * Copyright 2008 - 2015 Freescale Semiconductor Inc.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -54,20 +28,14 @@ MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("FSL FMan MAC API based driver");
struct mac_priv_s {
- struct device *dev;
void __iomem *vaddr;
u8 cell_index;
struct fman *fman;
- struct device_node *internal_phy_node;
/* List of multicast addresses */
struct list_head mc_addr_list;
struct platform_device *eth_dev;
- struct fixed_phy_status *fixed_link;
u16 speed;
u16 max_speed;
-
- int (*enable)(struct fman_mac *mac_dev, enum comm_mode mode);
- int (*disable)(struct fman_mac *mac_dev, enum comm_mode mode);
};
struct mac_address {
@@ -77,30 +45,26 @@ struct mac_address {
static void mac_exception(void *handle, enum fman_mac_exceptions ex)
{
- struct mac_device *mac_dev;
- struct mac_priv_s *priv;
-
- mac_dev = handle;
- priv = mac_dev->priv;
+ struct mac_device *mac_dev = handle;
if (ex == FM_MAC_EX_10G_RX_FIFO_OVFL) {
/* don't flag RX FIFO after the first */
mac_dev->set_exception(mac_dev->fman_mac,
FM_MAC_EX_10G_RX_FIFO_OVFL, false);
- dev_err(priv->dev, "10G MAC got RX FIFO Error = %x\n", ex);
+ dev_err(mac_dev->dev, "10G MAC got RX FIFO Error = %x\n", ex);
}
- dev_dbg(priv->dev, "%s:%s() -> %d\n", KBUILD_BASENAME ".c",
+ dev_dbg(mac_dev->dev, "%s:%s() -> %d\n", KBUILD_BASENAME ".c",
__func__, ex);
}
-static int set_fman_mac_params(struct mac_device *mac_dev,
- struct fman_mac_params *params)
+int set_fman_mac_params(struct mac_device *mac_dev,
+ struct fman_mac_params *params)
{
struct mac_priv_s *priv = mac_dev->priv;
params->base_addr = (typeof(params->base_addr))
- devm_ioremap(priv->dev, mac_dev->res->start,
+ devm_ioremap(mac_dev->dev, mac_dev->res->start,
resource_size(mac_dev->res));
if (!params->base_addr)
return -ENOMEM;
@@ -114,183 +78,11 @@ static int set_fman_mac_params(struct mac_device *mac_dev,
params->exception_cb = mac_exception;
params->event_cb = mac_exception;
params->dev_id = mac_dev;
- params->internal_phy_node = priv->internal_phy_node;
return 0;
}
-static int tgec_initialization(struct mac_device *mac_dev)
-{
- int err;
- struct mac_priv_s *priv;
- struct fman_mac_params params;
- u32 version;
-
- priv = mac_dev->priv;
-
- err = set_fman_mac_params(mac_dev, &params);
- if (err)
- goto _return;
-
- mac_dev->fman_mac = tgec_config(&params);
- if (!mac_dev->fman_mac) {
- err = -EINVAL;
- goto _return;
- }
-
- err = tgec_cfg_max_frame_len(mac_dev->fman_mac, fman_get_max_frm());
- if (err < 0)
- goto _return_fm_mac_free;
-
- err = tgec_init(mac_dev->fman_mac);
- if (err < 0)
- goto _return_fm_mac_free;
-
- /* For 10G MAC, disable Tx ECC exception */
- err = mac_dev->set_exception(mac_dev->fman_mac,
- FM_MAC_EX_10G_TX_ECC_ER, false);
- if (err < 0)
- goto _return_fm_mac_free;
-
- err = tgec_get_version(mac_dev->fman_mac, &version);
- if (err < 0)
- goto _return_fm_mac_free;
-
- dev_info(priv->dev, "FMan XGEC version: 0x%08x\n", version);
-
- goto _return;
-
-_return_fm_mac_free:
- tgec_free(mac_dev->fman_mac);
-
-_return:
- return err;
-}
-
-static int dtsec_initialization(struct mac_device *mac_dev)
-{
- int err;
- struct mac_priv_s *priv;
- struct fman_mac_params params;
- u32 version;
-
- priv = mac_dev->priv;
-
- err = set_fman_mac_params(mac_dev, &params);
- if (err)
- goto _return;
-
- mac_dev->fman_mac = dtsec_config(&params);
- if (!mac_dev->fman_mac) {
- err = -EINVAL;
- goto _return;
- }
-
- err = dtsec_cfg_max_frame_len(mac_dev->fman_mac, fman_get_max_frm());
- if (err < 0)
- goto _return_fm_mac_free;
-
- err = dtsec_cfg_pad_and_crc(mac_dev->fman_mac, true);
- if (err < 0)
- goto _return_fm_mac_free;
-
- err = dtsec_init(mac_dev->fman_mac);
- if (err < 0)
- goto _return_fm_mac_free;
-
- /* For 1G MAC, disable by default the MIB counters overflow interrupt */
- err = mac_dev->set_exception(mac_dev->fman_mac,
- FM_MAC_EX_1G_RX_MIB_CNT_OVFL, false);
- if (err < 0)
- goto _return_fm_mac_free;
-
- err = dtsec_get_version(mac_dev->fman_mac, &version);
- if (err < 0)
- goto _return_fm_mac_free;
-
- dev_info(priv->dev, "FMan dTSEC version: 0x%08x\n", version);
-
- goto _return;
-
-_return_fm_mac_free:
- dtsec_free(mac_dev->fman_mac);
-
-_return:
- return err;
-}
-
-static int memac_initialization(struct mac_device *mac_dev)
-{
- int err;
- struct mac_priv_s *priv;
- struct fman_mac_params params;
-
- priv = mac_dev->priv;
-
- err = set_fman_mac_params(mac_dev, &params);
- if (err)
- goto _return;
-
- if (priv->max_speed == SPEED_10000)
- params.phy_if = PHY_INTERFACE_MODE_XGMII;
-
- mac_dev->fman_mac = memac_config(&params);
- if (!mac_dev->fman_mac) {
- err = -EINVAL;
- goto _return;
- }
-
- err = memac_cfg_max_frame_len(mac_dev->fman_mac, fman_get_max_frm());
- if (err < 0)
- goto _return_fm_mac_free;
-
- err = memac_cfg_reset_on_init(mac_dev->fman_mac, true);
- if (err < 0)
- goto _return_fm_mac_free;
-
- err = memac_cfg_fixed_link(mac_dev->fman_mac, priv->fixed_link);
- if (err < 0)
- goto _return_fm_mac_free;
-
- err = memac_init(mac_dev->fman_mac);
- if (err < 0)
- goto _return_fm_mac_free;
-
- dev_info(priv->dev, "FMan MEMAC\n");
-
- goto _return;
-
-_return_fm_mac_free:
- memac_free(mac_dev->fman_mac);
-
-_return:
- return err;
-}
-
-static int start(struct mac_device *mac_dev)
-{
- int err;
- struct phy_device *phy_dev = mac_dev->phy_dev;
- struct mac_priv_s *priv = mac_dev->priv;
-
- err = priv->enable(mac_dev->fman_mac, COMM_MODE_RX_AND_TX);
- if (!err && phy_dev)
- phy_start(phy_dev);
-
- return err;
-}
-
-static int stop(struct mac_device *mac_dev)
-{
- struct mac_priv_s *priv = mac_dev->priv;
-
- if (mac_dev->phy_dev)
- phy_stop(mac_dev->phy_dev);
-
- return priv->disable(mac_dev->fman_mac, COMM_MODE_RX_AND_TX);
-}
-
-static int set_multi(struct net_device *net_dev, struct mac_device *mac_dev)
+int fman_set_multi(struct net_device *net_dev, struct mac_device *mac_dev)
{
struct mac_priv_s *priv;
struct mac_address *old_addr, *tmp;
@@ -446,7 +238,7 @@ static void adjust_link_dtsec(struct mac_device *mac_dev)
fman_get_pause_cfg(mac_dev, &rx_pause, &tx_pause);
err = fman_set_mac_active_pause(mac_dev, rx_pause, tx_pause);
if (err < 0)
- dev_err(mac_dev->priv->dev, "fman_set_mac_active_pause() = %d\n",
+ dev_err(mac_dev->dev, "fman_set_mac_active_pause() = %d\n",
err);
}
@@ -463,33 +255,17 @@ static void adjust_link_memac(struct mac_device *mac_dev)
fman_get_pause_cfg(mac_dev, &rx_pause, &tx_pause);
err = fman_set_mac_active_pause(mac_dev, rx_pause, tx_pause);
if (err < 0)
- dev_err(mac_dev->priv->dev, "fman_set_mac_active_pause() = %d\n",
+ dev_err(mac_dev->dev, "fman_set_mac_active_pause() = %d\n",
err);
}
-static void setup_dtsec(struct mac_device *mac_dev)
+static int tgec_initialization(struct mac_device *mac_dev,
+ struct device_node *mac_node)
{
- mac_dev->init = dtsec_initialization;
- mac_dev->set_promisc = dtsec_set_promiscuous;
- mac_dev->change_addr = dtsec_modify_mac_address;
- mac_dev->add_hash_mac_addr = dtsec_add_hash_mac_address;
- mac_dev->remove_hash_mac_addr = dtsec_del_hash_mac_address;
- mac_dev->set_tx_pause = dtsec_set_tx_pause_frames;
- mac_dev->set_rx_pause = dtsec_accept_rx_pause_frames;
- mac_dev->set_exception = dtsec_set_exception;
- mac_dev->set_allmulti = dtsec_set_allmulti;
- mac_dev->set_tstamp = dtsec_set_tstamp;
- mac_dev->set_multi = set_multi;
- mac_dev->start = start;
- mac_dev->stop = stop;
- mac_dev->adjust_link = adjust_link_dtsec;
- mac_dev->priv->enable = dtsec_enable;
- mac_dev->priv->disable = dtsec_disable;
-}
+ int err;
+ struct fman_mac_params params;
+ u32 version;
-static void setup_tgec(struct mac_device *mac_dev)
-{
- mac_dev->init = tgec_initialization;
mac_dev->set_promisc = tgec_set_promiscuous;
mac_dev->change_addr = tgec_modify_mac_address;
mac_dev->add_hash_mac_addr = tgec_add_hash_mac_address;
@@ -499,17 +275,122 @@ static void setup_tgec(struct mac_device *mac_dev)
mac_dev->set_exception = tgec_set_exception;
mac_dev->set_allmulti = tgec_set_allmulti;
mac_dev->set_tstamp = tgec_set_tstamp;
- mac_dev->set_multi = set_multi;
- mac_dev->start = start;
- mac_dev->stop = stop;
+ mac_dev->set_multi = fman_set_multi;
mac_dev->adjust_link = adjust_link_void;
- mac_dev->priv->enable = tgec_enable;
- mac_dev->priv->disable = tgec_disable;
+ mac_dev->enable = tgec_enable;
+ mac_dev->disable = tgec_disable;
+
+ err = set_fman_mac_params(mac_dev, &params);
+ if (err)
+ goto _return;
+
+ mac_dev->fman_mac = tgec_config(&params);
+ if (!mac_dev->fman_mac) {
+ err = -EINVAL;
+ goto _return;
+ }
+
+ err = tgec_cfg_max_frame_len(mac_dev->fman_mac, fman_get_max_frm());
+ if (err < 0)
+ goto _return_fm_mac_free;
+
+ err = tgec_init(mac_dev->fman_mac);
+ if (err < 0)
+ goto _return_fm_mac_free;
+
+ /* For 10G MAC, disable Tx ECC exception */
+ err = mac_dev->set_exception(mac_dev->fman_mac,
+ FM_MAC_EX_10G_TX_ECC_ER, false);
+ if (err < 0)
+ goto _return_fm_mac_free;
+
+ err = tgec_get_version(mac_dev->fman_mac, &version);
+ if (err < 0)
+ goto _return_fm_mac_free;
+
+ dev_info(mac_dev->dev, "FMan XGEC version: 0x%08x\n", version);
+
+ goto _return;
+
+_return_fm_mac_free:
+ tgec_free(mac_dev->fman_mac);
+
+_return:
+ return err;
+}
+
+static int dtsec_initialization(struct mac_device *mac_dev,
+ struct device_node *mac_node)
+{
+ int err;
+ struct fman_mac_params params;
+ u32 version;
+
+ mac_dev->set_promisc = dtsec_set_promiscuous;
+ mac_dev->change_addr = dtsec_modify_mac_address;
+ mac_dev->add_hash_mac_addr = dtsec_add_hash_mac_address;
+ mac_dev->remove_hash_mac_addr = dtsec_del_hash_mac_address;
+ mac_dev->set_tx_pause = dtsec_set_tx_pause_frames;
+ mac_dev->set_rx_pause = dtsec_accept_rx_pause_frames;
+ mac_dev->set_exception = dtsec_set_exception;
+ mac_dev->set_allmulti = dtsec_set_allmulti;
+ mac_dev->set_tstamp = dtsec_set_tstamp;
+ mac_dev->set_multi = fman_set_multi;
+ mac_dev->adjust_link = adjust_link_dtsec;
+ mac_dev->enable = dtsec_enable;
+ mac_dev->disable = dtsec_disable;
+
+ err = set_fman_mac_params(mac_dev, &params);
+ if (err)
+ goto _return;
+ params.internal_phy_node = of_parse_phandle(mac_node, "tbi-handle", 0);
+
+ mac_dev->fman_mac = dtsec_config(&params);
+ if (!mac_dev->fman_mac) {
+ err = -EINVAL;
+ goto _return;
+ }
+
+ err = dtsec_cfg_max_frame_len(mac_dev->fman_mac, fman_get_max_frm());
+ if (err < 0)
+ goto _return_fm_mac_free;
+
+ err = dtsec_cfg_pad_and_crc(mac_dev->fman_mac, true);
+ if (err < 0)
+ goto _return_fm_mac_free;
+
+ err = dtsec_init(mac_dev->fman_mac);
+ if (err < 0)
+ goto _return_fm_mac_free;
+
+ /* For 1G MAC, disable by default the MIB counters overflow interrupt */
+ err = mac_dev->set_exception(mac_dev->fman_mac,
+ FM_MAC_EX_1G_RX_MIB_CNT_OVFL, false);
+ if (err < 0)
+ goto _return_fm_mac_free;
+
+ err = dtsec_get_version(mac_dev->fman_mac, &version);
+ if (err < 0)
+ goto _return_fm_mac_free;
+
+ dev_info(mac_dev->dev, "FMan dTSEC version: 0x%08x\n", version);
+
+ goto _return;
+
+_return_fm_mac_free:
+ dtsec_free(mac_dev->fman_mac);
+
+_return:
+ return err;
}
-static void setup_memac(struct mac_device *mac_dev)
+static int memac_initialization(struct mac_device *mac_dev,
+ struct device_node *mac_node)
{
- mac_dev->init = memac_initialization;
+ int err;
+ struct fman_mac_params params;
+ struct fixed_phy_status *fixed_link;
+
mac_dev->set_promisc = memac_set_promiscuous;
mac_dev->change_addr = memac_modify_mac_address;
mac_dev->add_hash_mac_addr = memac_add_hash_mac_address;
@@ -519,12 +400,81 @@ static void setup_memac(struct mac_device *mac_dev)
mac_dev->set_exception = memac_set_exception;
mac_dev->set_allmulti = memac_set_allmulti;
mac_dev->set_tstamp = memac_set_tstamp;
- mac_dev->set_multi = set_multi;
- mac_dev->start = start;
- mac_dev->stop = stop;
+ mac_dev->set_multi = fman_set_multi;
mac_dev->adjust_link = adjust_link_memac;
- mac_dev->priv->enable = memac_enable;
- mac_dev->priv->disable = memac_disable;
+ mac_dev->enable = memac_enable;
+ mac_dev->disable = memac_disable;
+
+ err = set_fman_mac_params(mac_dev, &params);
+ if (err)
+ goto _return;
+ params.internal_phy_node = of_parse_phandle(mac_node, "pcsphy-handle", 0);
+
+ if (params.max_speed == SPEED_10000)
+ params.phy_if = PHY_INTERFACE_MODE_XGMII;
+
+ mac_dev->fman_mac = memac_config(&params);
+ if (!mac_dev->fman_mac) {
+ err = -EINVAL;
+ goto _return;
+ }
+
+ err = memac_cfg_max_frame_len(mac_dev->fman_mac, fman_get_max_frm());
+ if (err < 0)
+ goto _return_fm_mac_free;
+
+ err = memac_cfg_reset_on_init(mac_dev->fman_mac, true);
+ if (err < 0)
+ goto _return_fm_mac_free;
+
+ if (!mac_dev->phy_node && of_phy_is_fixed_link(mac_node)) {
+ struct phy_device *phy;
+
+ err = of_phy_register_fixed_link(mac_node);
+ if (err)
+ goto _return_fm_mac_free;
+
+ fixed_link = kzalloc(sizeof(*fixed_link), GFP_KERNEL);
+ if (!fixed_link) {
+ err = -ENOMEM;
+ goto _return_fm_mac_free;
+ }
+
+ mac_dev->phy_node = of_node_get(mac_node);
+ phy = of_phy_find_device(mac_dev->phy_node);
+ if (!phy) {
+ err = -EINVAL;
+ of_node_put(mac_dev->phy_node);
+ goto _return_fixed_link_free;
+ }
+
+ fixed_link->link = phy->link;
+ fixed_link->speed = phy->speed;
+ fixed_link->duplex = phy->duplex;
+ fixed_link->pause = phy->pause;
+ fixed_link->asym_pause = phy->asym_pause;
+
+ put_device(&phy->mdio.dev);
+
+ err = memac_cfg_fixed_link(mac_dev->fman_mac, fixed_link);
+ if (err < 0)
+ goto _return_fixed_link_free;
+ }
+
+ err = memac_init(mac_dev->fman_mac);
+ if (err < 0)
+ goto _return_fixed_link_free;
+
+ dev_info(mac_dev->dev, "FMan MEMAC\n");
+
+ goto _return;
+
+_return_fixed_link_free:
+ kfree(fixed_link);
+_return_fm_mac_free:
+ memac_free(mac_dev->fman_mac);
+_return:
+ return err;
}
#define DTSEC_SUPPORTED \
@@ -577,7 +527,7 @@ static struct platform_device *dpaa_eth_add_device(int fman_id,
goto no_mem;
}
- pdev->dev.parent = priv->dev;
+ pdev->dev.parent = mac_dev->dev;
ret = platform_device_add_data(pdev, &data, sizeof(data));
if (ret)
@@ -601,9 +551,9 @@ no_mem:
}
static const struct of_device_id mac_match[] = {
- { .compatible = "fsl,fman-dtsec" },
- { .compatible = "fsl,fman-xgec" },
- { .compatible = "fsl,fman-memac" },
+ { .compatible = "fsl,fman-dtsec", .data = dtsec_initialization },
+ { .compatible = "fsl,fman-xgec", .data = tgec_initialization },
+ { .compatible = "fsl,fman-memac", .data = memac_initialization },
{}
};
MODULE_DEVICE_TABLE(of, mac_match);
@@ -611,6 +561,7 @@ MODULE_DEVICE_TABLE(of, mac_match);
static int mac_probe(struct platform_device *_of_dev)
{
int err, i, nph;
+ int (*init)(struct mac_device *mac_dev, struct device_node *mac_node);
struct device *dev;
struct device_node *mac_node, *dev_node;
struct mac_device *mac_dev;
@@ -623,6 +574,7 @@ static int mac_probe(struct platform_device *_of_dev)
dev = &_of_dev->dev;
mac_node = dev->of_node;
+ init = of_device_get_match_data(dev);
mac_dev = devm_kzalloc(dev, sizeof(*mac_dev), GFP_KERNEL);
if (!mac_dev) {
@@ -637,24 +589,7 @@ static int mac_probe(struct platform_device *_of_dev)
/* Save private information */
mac_dev->priv = priv;
- priv->dev = dev;
-
- if (of_device_is_compatible(mac_node, "fsl,fman-dtsec")) {
- setup_dtsec(mac_dev);
- priv->internal_phy_node = of_parse_phandle(mac_node,
- "tbi-handle", 0);
- } else if (of_device_is_compatible(mac_node, "fsl,fman-xgec")) {
- setup_tgec(mac_dev);
- } else if (of_device_is_compatible(mac_node, "fsl,fman-memac")) {
- setup_memac(mac_dev);
- priv->internal_phy_node = of_parse_phandle(mac_node,
- "pcsphy-handle", 0);
- } else {
- dev_err(dev, "MAC node (%pOF) contains unsupported MAC\n",
- mac_node);
- err = -EINVAL;
- goto _return;
- }
+ mac_dev->dev = dev;
INIT_LIST_HEAD(&priv->mc_addr_list);
@@ -664,7 +599,7 @@ static int mac_probe(struct platform_device *_of_dev)
dev_err(dev, "of_get_parent(%pOF) failed\n",
mac_node);
err = -EINVAL;
- goto _return_of_get_parent;
+ goto _return_of_node_put;
}
of_dev = of_find_device_by_node(dev_node);
@@ -698,7 +633,7 @@ static int mac_probe(struct platform_device *_of_dev)
if (err < 0) {
dev_err(dev, "of_address_to_resource(%pOF) = %d\n",
mac_node, err);
- goto _return_of_get_parent;
+ goto _return_of_node_put;
}
mac_dev->res = __devm_request_region(dev,
@@ -708,7 +643,7 @@ static int mac_probe(struct platform_device *_of_dev)
if (!mac_dev->res) {
dev_err(dev, "__devm_request_mem_region(mac) failed\n");
err = -EBUSY;
- goto _return_of_get_parent;
+ goto _return_of_node_put;
}
priv->vaddr = devm_ioremap(dev, mac_dev->res->start,
@@ -716,12 +651,12 @@ static int mac_probe(struct platform_device *_of_dev)
if (!priv->vaddr) {
dev_err(dev, "devm_ioremap() failed\n");
err = -EIO;
- goto _return_of_get_parent;
+ goto _return_of_node_put;
}
if (!of_device_is_available(mac_node)) {
err = -ENODEV;
- goto _return_of_get_parent;
+ goto _return_of_node_put;
}
/* Get the cell-index */
@@ -729,7 +664,7 @@ static int mac_probe(struct platform_device *_of_dev)
if (err) {
dev_err(dev, "failed to read cell-index for %pOF\n", mac_node);
err = -EINVAL;
- goto _return_of_get_parent;
+ goto _return_of_node_put;
}
priv->cell_index = (u8)val;
@@ -744,14 +679,14 @@ static int mac_probe(struct platform_device *_of_dev)
dev_err(dev, "of_count_phandle_with_args(%pOF, fsl,fman-ports) failed\n",
mac_node);
err = nph;
- goto _return_of_get_parent;
+ goto _return_of_node_put;
}
if (nph != ARRAY_SIZE(mac_dev->port)) {
dev_err(dev, "Not supported number of fman-ports handles of mac node %pOF from device tree\n",
mac_node);
err = -EINVAL;
- goto _return_of_get_parent;
+ goto _return_of_node_put;
}
for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++) {
@@ -810,42 +745,12 @@ static int mac_probe(struct platform_device *_of_dev)
/* Get the rest of the PHY information */
mac_dev->phy_node = of_parse_phandle(mac_node, "phy-handle", 0);
- if (!mac_dev->phy_node && of_phy_is_fixed_link(mac_node)) {
- struct phy_device *phy;
-
- err = of_phy_register_fixed_link(mac_node);
- if (err)
- goto _return_of_get_parent;
-
- priv->fixed_link = kzalloc(sizeof(*priv->fixed_link),
- GFP_KERNEL);
- if (!priv->fixed_link) {
- err = -ENOMEM;
- goto _return_of_get_parent;
- }
-
- mac_dev->phy_node = of_node_get(mac_node);
- phy = of_phy_find_device(mac_dev->phy_node);
- if (!phy) {
- err = -EINVAL;
- of_node_put(mac_dev->phy_node);
- goto _return_of_get_parent;
- }
- priv->fixed_link->link = phy->link;
- priv->fixed_link->speed = phy->speed;
- priv->fixed_link->duplex = phy->duplex;
- priv->fixed_link->pause = phy->pause;
- priv->fixed_link->asym_pause = phy->asym_pause;
-
- put_device(&phy->mdio.dev);
- }
-
- err = mac_dev->init(mac_dev);
+ err = init(mac_dev, mac_node);
if (err < 0) {
dev_err(dev, "mac_dev->init() = %d\n", err);
of_node_put(mac_dev->phy_node);
- goto _return_of_get_parent;
+ goto _return_of_node_put;
}
/* pause frame autonegotiation enabled */
@@ -876,8 +781,6 @@ static int mac_probe(struct platform_device *_of_dev)
_return_of_node_put:
of_node_put(dev_node);
-_return_of_get_parent:
- kfree(priv->fixed_link);
_return:
return err;
}
diff --git a/drivers/net/ethernet/freescale/fman/mac.h b/drivers/net/ethernet/freescale/fman/mac.h
index daa285a9b8b2..da410a7d00c9 100644
--- a/drivers/net/ethernet/freescale/fman/mac.h
+++ b/drivers/net/ethernet/freescale/fman/mac.h
@@ -1,32 +1,6 @@
-/* Copyright 2008-2015 Freescale Semiconductor, Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Freescale Semiconductor nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- *
- * ALTERNATIVELY, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") as published by the Free Software
- * Foundation, either version 2 of that License or (at your option) any
- * later version.
- *
- * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later */
+/*
+ * Copyright 2008 - 2015 Freescale Semiconductor Inc.
*/
#ifndef __MAC_H
@@ -45,6 +19,7 @@ struct fman_mac;
struct mac_priv_s;
struct mac_device {
+ struct device *dev;
struct resource *res;
u8 addr[ETH_ALEN];
struct fman_port *port[2];
@@ -61,9 +36,8 @@ struct mac_device {
bool promisc;
bool allmulti;
- int (*init)(struct mac_device *mac_dev);
- int (*start)(struct mac_device *mac_dev);
- int (*stop)(struct mac_device *mac_dev);
+ int (*enable)(struct fman_mac *mac_dev);
+ int (*disable)(struct fman_mac *mac_dev);
void (*adjust_link)(struct mac_device *mac_dev);
int (*set_promisc)(struct fman_mac *mac_dev, bool enable);
int (*change_addr)(struct fman_mac *mac_dev, const enet_addr_t *enet_addr);
@@ -97,5 +71,8 @@ int fman_set_mac_active_pause(struct mac_device *mac_dev, bool rx, bool tx);
void fman_get_pause_cfg(struct mac_device *mac_dev, bool *rx_pause,
bool *tx_pause);
+int set_fman_mac_params(struct mac_device *mac_dev,
+ struct fman_mac_params *params);
+int fman_set_multi(struct net_device *net_dev, struct mac_device *mac_dev);
#endif /* __MAC_H */
diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c
index ec90da1de030..d7d39a58cd80 100644
--- a/drivers/net/ethernet/freescale/xgmac_mdio.c
+++ b/drivers/net/ethernet/freescale/xgmac_mdio.c
@@ -355,7 +355,7 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
if (ret)
return ret;
- fwnode = pdev->dev.fwnode;
+ fwnode = dev_fwnode(&pdev->dev);
if (is_of_node(fwnode))
ret = of_mdiobus_register(bus, to_of_node(fwnode));
else if (is_acpi_node(fwnode))
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 841fa149c407..001500afc4a6 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -864,6 +864,7 @@ ice_fetch_u64_stats_per_ring(struct u64_stats_sync *syncp,
struct ice_q_stats stats, u64 *pkts, u64 *bytes);
int ice_up(struct ice_vsi *vsi);
int ice_down(struct ice_vsi *vsi);
+int ice_down_up(struct ice_vsi *vsi);
int ice_vsi_cfg(struct ice_vsi *vsi);
struct ice_vsi *ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi);
int ice_vsi_determine_xdp_res(struct ice_vsi *vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index 136d7911adb4..6f092e06054e 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -417,7 +417,7 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring)
/* Strip the Ethernet CRC bytes before the packet is posted to host
* memory.
*/
- rlan_ctx.crcstrip = 1;
+ rlan_ctx.crcstrip = !(ring->flags & ICE_RX_FLAGS_CRC_STRIP_DIS);
/* L2TSEL flag defines the reported L2 Tags in the receive descriptor
* and it needs to remain 1 for non-DVM capable configurations to not
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 27d0cbbd29da..c8d95b299fee 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -2776,6 +2776,26 @@ ice_aq_set_port_params(struct ice_port_info *pi, bool double_vlan,
}
/**
+ * ice_is_100m_speed_supported
+ * @hw: pointer to the HW struct
+ *
+ * returns true if 100M speeds are supported by the device,
+ * false otherwise.
+ */
+bool ice_is_100m_speed_supported(struct ice_hw *hw)
+{
+ switch (hw->device_id) {
+ case ICE_DEV_ID_E822C_SGMII:
+ case ICE_DEV_ID_E822L_SGMII:
+ case ICE_DEV_ID_E823L_1GBE:
+ case ICE_DEV_ID_E823C_SGMII:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/**
* ice_get_link_speed_based_on_phy_type - returns link speed
* @phy_type_low: lower part of phy_type
* @phy_type_high: higher part of phy_type
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index 61b7c60db689..d08f7f9ea8b7 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -204,6 +204,7 @@ ice_aq_set_gpio(struct ice_hw *hw, u16 gpio_ctrl_handle, u8 pin_idx, bool value,
int
ice_aq_get_gpio(struct ice_hw *hw, u16 gpio_ctrl_handle, u8 pin_idx,
bool *value, struct ice_sq_cd *cd);
+bool ice_is_100m_speed_supported(struct ice_hw *hw);
int
ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size,
struct ice_sq_cd *cd);
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index a6fff8ebaf9d..b7be84bbe72d 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -136,6 +136,11 @@ static const struct ice_stats ice_gstrings_pf_stats[] = {
ICE_PF_STAT("mac_remote_faults.nic", stats.mac_remote_faults),
ICE_PF_STAT("fdir_sb_match.nic", stats.fd_sb_match),
ICE_PF_STAT("fdir_sb_status.nic", stats.fd_sb_status),
+ ICE_PF_STAT("tx_hwtstamp_skipped", ptp.tx_hwtstamp_skipped),
+ ICE_PF_STAT("tx_hwtstamp_timeouts", ptp.tx_hwtstamp_timeouts),
+ ICE_PF_STAT("tx_hwtstamp_flushed", ptp.tx_hwtstamp_flushed),
+ ICE_PF_STAT("tx_hwtstamp_discarded", ptp.tx_hwtstamp_discarded),
+ ICE_PF_STAT("late_cached_phc_updates", ptp.late_cached_phc_updates),
};
static const u32 ice_regs_dump_list[] = {
@@ -1284,10 +1289,7 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
}
if (test_bit(ICE_FLAG_LEGACY_RX, change_flags)) {
/* down and up VSI so that changes of Rx cfg are reflected. */
- if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) {
- ice_down(vsi);
- ice_up(vsi);
- }
+ ice_down_up(vsi);
}
/* don't allow modification of this flag when a single VF is in
* promiscuous mode because it's not supported
@@ -1468,20 +1470,22 @@ ice_get_ethtool_stats(struct net_device *netdev,
/**
* ice_mask_min_supported_speeds
+ * @hw: pointer to the HW structure
* @phy_types_high: PHY type high
* @phy_types_low: PHY type low to apply minimum supported speeds mask
*
* Apply minimum supported speeds mask to PHY type low. These are the speeds
* for ethtool supported link mode.
*/
-static
-void ice_mask_min_supported_speeds(u64 phy_types_high, u64 *phy_types_low)
+static void
+ice_mask_min_supported_speeds(struct ice_hw *hw,
+ u64 phy_types_high, u64 *phy_types_low)
{
/* if QSFP connection with 100G speed, minimum supported speed is 25G */
if (*phy_types_low & ICE_PHY_TYPE_LOW_MASK_100G ||
phy_types_high & ICE_PHY_TYPE_HIGH_MASK_100G)
*phy_types_low &= ~ICE_PHY_TYPE_LOW_MASK_MIN_25G;
- else
+ else if (!ice_is_100m_speed_supported(hw))
*phy_types_low &= ~ICE_PHY_TYPE_LOW_MASK_MIN_1G;
}
@@ -1531,7 +1535,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
phy_types_low = le64_to_cpu(pf->nvm_phy_type_lo);
phy_types_high = le64_to_cpu(pf->nvm_phy_type_hi);
- ice_mask_min_supported_speeds(phy_types_high, &phy_types_low);
+ ice_mask_min_supported_speeds(&pf->hw, phy_types_high,
+ &phy_types_low);
/* determine advertised modes based on link override only
* if it's supported and if the FW doesn't abstract the
* driver from having to account for link overrides
@@ -2826,6 +2831,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
tx_rings[i].count = new_tx_cnt;
tx_rings[i].desc = NULL;
tx_rings[i].tx_buf = NULL;
+ tx_rings[i].tx_tstamps = &pf->ptp.port.tx;
err = ice_setup_tx_ring(&tx_rings[i]);
if (err) {
while (i--)
@@ -2884,6 +2890,7 @@ process_rx:
/* clone ring and setup updated count */
rx_rings[i] = *vsi->rx_rings[i];
rx_rings[i].count = new_rx_cnt;
+ rx_rings[i].cached_phctime = pf->ptp.cached_phc_time;
rx_rings[i].desc = NULL;
rx_rings[i].rx_buf = NULL;
/* this is to allow wr32 to have something to write to
diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c
index c9f7393b783d..ee5b36941ba3 100644
--- a/drivers/net/ethernet/intel/ice/ice_lag.c
+++ b/drivers/net/ethernet/intel/ice/ice_lag.c
@@ -61,13 +61,13 @@ static void ice_lag_set_backup(struct ice_lag *lag)
*/
static void ice_display_lag_info(struct ice_lag *lag)
{
- const char *name, *peer, *upper, *role, *bonded, *master;
+ const char *name, *peer, *upper, *role, *bonded, *primary;
struct device *dev = &lag->pf->pdev->dev;
name = lag->netdev ? netdev_name(lag->netdev) : "unset";
peer = lag->peer_netdev ? netdev_name(lag->peer_netdev) : "unset";
upper = lag->upper_netdev ? netdev_name(lag->upper_netdev) : "unset";
- master = lag->master ? "TRUE" : "FALSE";
+ primary = lag->primary ? "TRUE" : "FALSE";
bonded = lag->bonded ? "BONDED" : "UNBONDED";
switch (lag->role) {
@@ -87,8 +87,8 @@ static void ice_display_lag_info(struct ice_lag *lag)
role = "ERROR";
}
- dev_dbg(dev, "%s %s, peer:%s, upper:%s, role:%s, master:%s\n", name,
- bonded, peer, upper, role, master);
+ dev_dbg(dev, "%s %s, peer:%s, upper:%s, role:%s, primary:%s\n", name,
+ bonded, peer, upper, role, primary);
}
/**
@@ -119,7 +119,7 @@ static void ice_lag_info_event(struct ice_lag *lag, void *ptr)
}
if (strcmp(bonding_info->slave.slave_name, lag_netdev_name)) {
- netdev_dbg(lag->netdev, "Bonding event recv, but slave info not for us\n");
+ netdev_dbg(lag->netdev, "Bonding event recv, but secondary info not for us\n");
goto lag_out;
}
@@ -164,8 +164,8 @@ ice_lag_link(struct ice_lag *lag, struct netdev_notifier_changeupper_info *info)
lag->bonded = true;
lag->role = ICE_LAG_UNSET;
- /* if this is the first element in an LAG mark as master */
- lag->master = !!(peers == 1);
+ /* if this is the first element in an LAG mark as primary */
+ lag->primary = !!(peers == 1);
}
/**
@@ -264,7 +264,7 @@ static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr)
netdev_dbg(netdev, "bonding %s\n", info->linking ? "LINK" : "UNLINK");
if (!netif_is_lag_master(info->upper_dev)) {
- netdev_dbg(netdev, "changeupper rcvd, but not master. bail\n");
+ netdev_dbg(netdev, "changeupper rcvd, but not primary. bail\n");
return;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_lag.h b/drivers/net/ethernet/intel/ice/ice_lag.h
index c2e3688dd8fd..51b5cf467ce2 100644
--- a/drivers/net/ethernet/intel/ice/ice_lag.h
+++ b/drivers/net/ethernet/intel/ice/ice_lag.h
@@ -24,7 +24,7 @@ struct ice_lag {
struct net_device *upper_netdev; /* upper bonding netdev */
struct notifier_block notif_block;
u8 bonded:1; /* currently bonded */
- u8 master:1; /* this is a master */
+ u8 primary:1; /* this is primary */
u8 handler:1; /* did we register a rx_netdev_handler */
/* each thing blocking bonding will increment this value by one.
* If this value is zero, then bonding is allowed.
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 0c4ec9264071..d126f4cb3ba8 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1522,6 +1522,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi)
ring->netdev = vsi->netdev;
ring->dev = dev;
ring->count = vsi->num_rx_desc;
+ ring->cached_phctime = pf->ptp.cached_phc_time;
WRITE_ONCE(vsi->rx_rings[i], ring);
}
@@ -1562,6 +1563,22 @@ void ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena)
}
/**
+ * ice_vsi_cfg_crc_strip - Configure CRC stripping for a VSI
+ * @vsi: VSI to be configured
+ * @disable: set to true to have FCS / CRC in the frame data
+ */
+void ice_vsi_cfg_crc_strip(struct ice_vsi *vsi, bool disable)
+{
+ int i;
+
+ ice_for_each_rxq(vsi, i)
+ if (disable)
+ vsi->rx_rings[i]->flags |= ICE_RX_FLAGS_CRC_STRIP_DIS;
+ else
+ vsi->rx_rings[i]->flags &= ~ICE_RX_FLAGS_CRC_STRIP_DIS;
+}
+
+/**
* ice_vsi_cfg_rss_lut_key - Configure RSS params for a VSI
* @vsi: VSI to be configured
*/
@@ -3276,6 +3293,12 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi)
*/
if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
ice_vsi_cfg_rss_lut_key(vsi);
+
+ /* disable or enable CRC stripping */
+ if (vsi->netdev)
+ ice_vsi_cfg_crc_strip(vsi, !!(vsi->netdev->features &
+ NETIF_F_RXFCS));
+
break;
case ICE_VSI_VF:
ret = ice_vsi_alloc_q_vectors(vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index 8712b1d2ceec..ec4bf0c89857 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -89,6 +89,8 @@ void ice_vsi_free_tx_rings(struct ice_vsi *vsi);
void ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena);
+void ice_vsi_cfg_crc_strip(struct ice_vsi *vsi, bool disable);
+
void ice_update_tx_ring_stats(struct ice_tx_ring *ring, u64 pkts, u64 bytes);
void ice_update_rx_ring_stats(struct ice_rx_ring *ring, u64 pkts, u64 bytes);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 173fe6c31341..14edf7614406 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -3385,6 +3385,11 @@ static void ice_set_netdev_features(struct net_device *netdev)
if (is_dvm_ena)
netdev->hw_features |= NETIF_F_HW_VLAN_STAG_RX |
NETIF_F_HW_VLAN_STAG_TX;
+
+ /* Leave CRC / FCS stripping enabled by default, but allow the value to
+ * be changed at runtime
+ */
+ netdev->hw_features |= NETIF_F_RXFCS;
}
/**
@@ -4676,8 +4681,6 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
ice_set_safe_mode_caps(hw);
}
- hw->ucast_shared = true;
-
err = ice_init_pf(pf);
if (err) {
dev_err(dev, "ice_init_pf failed: %d\n", err);
@@ -5736,6 +5739,9 @@ ice_fdb_del(struct ndmsg *ndm, __always_unused struct nlattr *tb[],
NETIF_F_HW_VLAN_STAG_RX | \
NETIF_F_HW_VLAN_STAG_TX)
+#define NETIF_VLAN_STRIPPING_FEATURES (NETIF_F_HW_VLAN_CTAG_RX | \
+ NETIF_F_HW_VLAN_STAG_RX)
+
#define NETIF_VLAN_FILTERING_FEATURES (NETIF_F_HW_VLAN_CTAG_FILTER | \
NETIF_F_HW_VLAN_STAG_FILTER)
@@ -5822,6 +5828,14 @@ ice_fix_features(struct net_device *netdev, netdev_features_t features)
NETIF_F_HW_VLAN_STAG_TX);
}
+ if (!(netdev->features & NETIF_F_RXFCS) &&
+ (features & NETIF_F_RXFCS) &&
+ (features & NETIF_VLAN_STRIPPING_FEATURES) &&
+ !ice_vsi_has_non_zero_vlans(np->vsi)) {
+ netdev_warn(netdev, "Disabling VLAN stripping as FCS/CRC stripping is also disabled and there is no VLAN configured\n");
+ features &= ~NETIF_VLAN_STRIPPING_FEATURES;
+ }
+
return features;
}
@@ -5915,6 +5929,13 @@ ice_set_vlan_features(struct net_device *netdev, netdev_features_t features)
current_vlan_features = netdev->features & NETIF_VLAN_OFFLOAD_FEATURES;
requested_vlan_features = features & NETIF_VLAN_OFFLOAD_FEATURES;
if (current_vlan_features ^ requested_vlan_features) {
+ if ((features & NETIF_F_RXFCS) &&
+ (features & NETIF_VLAN_STRIPPING_FEATURES)) {
+ dev_err(ice_pf_to_dev(vsi->back),
+ "To enable VLAN stripping, you must first enable FCS/CRC stripping\n");
+ return -EIO;
+ }
+
err = ice_set_vlan_offload_features(vsi, features);
if (err)
return err;
@@ -5996,6 +6017,23 @@ ice_set_features(struct net_device *netdev, netdev_features_t features)
if (ret)
return ret;
+ /* Turn on receive of FCS aka CRC, and after setting this
+ * flag the packet data will have the 4 byte CRC appended
+ */
+ if (changed & NETIF_F_RXFCS) {
+ if ((features & NETIF_F_RXFCS) &&
+ (features & NETIF_VLAN_STRIPPING_FEATURES)) {
+ dev_err(ice_pf_to_dev(vsi->back),
+ "To disable FCS/CRC stripping, you must first disable VLAN stripping\n");
+ return -EIO;
+ }
+
+ ice_vsi_cfg_crc_strip(vsi, !!(features & NETIF_F_RXFCS));
+ ret = ice_down_up(vsi);
+ if (ret)
+ return ret;
+ }
+
if (changed & NETIF_F_NTUPLE) {
bool ena = !!(features & NETIF_F_NTUPLE);
@@ -6700,6 +6738,31 @@ int ice_down(struct ice_vsi *vsi)
}
/**
+ * ice_down_up - shutdown the VSI connection and bring it up
+ * @vsi: the VSI to be reconnected
+ */
+int ice_down_up(struct ice_vsi *vsi)
+{
+ int ret;
+
+ /* if DOWN already set, nothing to do */
+ if (test_and_set_bit(ICE_VSI_DOWN, vsi->state))
+ return 0;
+
+ ret = ice_down(vsi);
+ if (ret)
+ return ret;
+
+ ret = ice_up(vsi);
+ if (ret) {
+ netdev_err(vsi->netdev, "reallocating resources failed during netdev features change, may need to reload driver\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
* ice_vsi_setup_tx_rings - Allocate VSI Tx queue resources
* @vsi: VSI having resources allocated
*
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
index 72b663108a4a..5a2fd4d690f3 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -491,56 +491,6 @@ ice_ptp_read_src_clk_reg(struct ice_pf *pf, struct ptp_system_timestamp *sts)
}
/**
- * ice_ptp_update_cached_phctime - Update the cached PHC time values
- * @pf: Board specific private structure
- *
- * This function updates the system time values which are cached in the PF
- * structure and the Rx rings.
- *
- * This function must be called periodically to ensure that the cached value
- * is never more than 2 seconds old. It must also be called whenever the PHC
- * time has been changed.
- *
- * Return:
- * * 0 - OK, successfully updated
- * * -EAGAIN - PF was busy, need to reschedule the update
- */
-static int ice_ptp_update_cached_phctime(struct ice_pf *pf)
-{
- u64 systime;
- int i;
-
- if (test_and_set_bit(ICE_CFG_BUSY, pf->state))
- return -EAGAIN;
-
- /* Read the current PHC time */
- systime = ice_ptp_read_src_clk_reg(pf, NULL);
-
- /* Update the cached PHC time stored in the PF structure */
- WRITE_ONCE(pf->ptp.cached_phc_time, systime);
-
- ice_for_each_vsi(pf, i) {
- struct ice_vsi *vsi = pf->vsi[i];
- int j;
-
- if (!vsi)
- continue;
-
- if (vsi->type != ICE_VSI_PF)
- continue;
-
- ice_for_each_rxq(vsi, j) {
- if (!vsi->rx_rings[j])
- continue;
- WRITE_ONCE(vsi->rx_rings[j]->cached_phctime, systime);
- }
- }
- clear_bit(ICE_CFG_BUSY, pf->state);
-
- return 0;
-}
-
-/**
* ice_ptp_extend_32b_ts - Convert a 32b nanoseconds timestamp to 64b
* @cached_phc_time: recently cached copy of PHC time
* @in_tstamp: Ingress/egress 32b nanoseconds timestamp value
@@ -636,12 +586,404 @@ static u64 ice_ptp_extend_32b_ts(u64 cached_phc_time, u32 in_tstamp)
static u64 ice_ptp_extend_40b_ts(struct ice_pf *pf, u64 in_tstamp)
{
const u64 mask = GENMASK_ULL(31, 0);
+ unsigned long discard_time;
+
+ /* Discard the hardware timestamp if the cached PHC time is too old */
+ discard_time = pf->ptp.cached_phc_jiffies + msecs_to_jiffies(2000);
+ if (time_is_before_jiffies(discard_time)) {
+ pf->ptp.tx_hwtstamp_discarded++;
+ return 0;
+ }
return ice_ptp_extend_32b_ts(pf->ptp.cached_phc_time,
(in_tstamp >> 8) & mask);
}
/**
+ * ice_ptp_tx_tstamp_work - Process Tx timestamps for a port
+ * @work: pointer to the kthread_work struct
+ *
+ * Process timestamps captured by the PHY associated with this port. To do
+ * this, loop over each index with a waiting skb.
+ *
+ * If a given index has a valid timestamp, perform the following steps:
+ *
+ * 1) copy the timestamp out of the PHY register
+ * 4) clear the timestamp valid bit in the PHY register
+ * 5) unlock the index by clearing the associated in_use bit.
+ * 2) extend the 40b timestamp value to get a 64bit timestamp
+ * 3) send that timestamp to the stack
+ *
+ * After looping, if we still have waiting SKBs, then re-queue the work. This
+ * may cause us effectively poll even when not strictly necessary. We do this
+ * because it's possible a new timestamp was requested around the same time as
+ * the interrupt. In some cases hardware might not interrupt us again when the
+ * timestamp is captured.
+ *
+ * Note that we only take the tracking lock when clearing the bit and when
+ * checking if we need to re-queue this task. The only place where bits can be
+ * set is the hard xmit routine where an SKB has a request flag set. The only
+ * places where we clear bits are this work function, or the periodic cleanup
+ * thread. If the cleanup thread clears a bit we're processing we catch it
+ * when we lock to clear the bit and then grab the SKB pointer. If a Tx thread
+ * starts a new timestamp, we might not begin processing it right away but we
+ * will notice it at the end when we re-queue the work item. If a Tx thread
+ * starts a new timestamp just after this function exits without re-queuing,
+ * the interrupt when the timestamp finishes should trigger. Avoiding holding
+ * the lock for the entire function is important in order to ensure that Tx
+ * threads do not get blocked while waiting for the lock.
+ */
+static void ice_ptp_tx_tstamp_work(struct kthread_work *work)
+{
+ struct ice_ptp_port *ptp_port;
+ struct ice_ptp_tx *tx;
+ struct ice_pf *pf;
+ struct ice_hw *hw;
+ u8 idx;
+
+ tx = container_of(work, struct ice_ptp_tx, work);
+ if (!tx->init)
+ return;
+
+ ptp_port = container_of(tx, struct ice_ptp_port, tx);
+ pf = ptp_port_to_pf(ptp_port);
+ hw = &pf->hw;
+
+ for_each_set_bit(idx, tx->in_use, tx->len) {
+ struct skb_shared_hwtstamps shhwtstamps = {};
+ u8 phy_idx = idx + tx->quad_offset;
+ u64 raw_tstamp, tstamp;
+ struct sk_buff *skb;
+ int err;
+
+ ice_trace(tx_tstamp_fw_req, tx->tstamps[idx].skb, idx);
+
+ err = ice_read_phy_tstamp(hw, tx->quad, phy_idx,
+ &raw_tstamp);
+ if (err)
+ continue;
+
+ ice_trace(tx_tstamp_fw_done, tx->tstamps[idx].skb, idx);
+
+ /* Check if the timestamp is invalid or stale */
+ if (!(raw_tstamp & ICE_PTP_TS_VALID) ||
+ raw_tstamp == tx->tstamps[idx].cached_tstamp)
+ continue;
+
+ /* The timestamp is valid, so we'll go ahead and clear this
+ * index and then send the timestamp up to the stack.
+ */
+ spin_lock(&tx->lock);
+ tx->tstamps[idx].cached_tstamp = raw_tstamp;
+ clear_bit(idx, tx->in_use);
+ skb = tx->tstamps[idx].skb;
+ tx->tstamps[idx].skb = NULL;
+ spin_unlock(&tx->lock);
+
+ /* it's (unlikely but) possible we raced with the cleanup
+ * thread for discarding old timestamp requests.
+ */
+ if (!skb)
+ continue;
+
+ /* Extend the timestamp using cached PHC time */
+ tstamp = ice_ptp_extend_40b_ts(pf, raw_tstamp);
+ if (tstamp) {
+ shhwtstamps.hwtstamp = ns_to_ktime(tstamp);
+ ice_trace(tx_tstamp_complete, skb, idx);
+ }
+
+ skb_tstamp_tx(skb, &shhwtstamps);
+ dev_kfree_skb_any(skb);
+ }
+
+ /* Check if we still have work to do. If so, re-queue this task to
+ * poll for remaining timestamps.
+ */
+ spin_lock(&tx->lock);
+ if (!bitmap_empty(tx->in_use, tx->len))
+ kthread_queue_work(pf->ptp.kworker, &tx->work);
+ spin_unlock(&tx->lock);
+}
+
+/**
+ * ice_ptp_alloc_tx_tracker - Initialize tracking for Tx timestamps
+ * @tx: Tx tracking structure to initialize
+ *
+ * Assumes that the length has already been initialized. Do not call directly,
+ * use the ice_ptp_init_tx_e822 or ice_ptp_init_tx_e810 instead.
+ */
+static int
+ice_ptp_alloc_tx_tracker(struct ice_ptp_tx *tx)
+{
+ tx->tstamps = kcalloc(tx->len, sizeof(*tx->tstamps), GFP_KERNEL);
+ if (!tx->tstamps)
+ return -ENOMEM;
+
+ tx->in_use = bitmap_zalloc(tx->len, GFP_KERNEL);
+ if (!tx->in_use) {
+ kfree(tx->tstamps);
+ tx->tstamps = NULL;
+ return -ENOMEM;
+ }
+
+ spin_lock_init(&tx->lock);
+ kthread_init_work(&tx->work, ice_ptp_tx_tstamp_work);
+
+ tx->init = 1;
+
+ return 0;
+}
+
+/**
+ * ice_ptp_flush_tx_tracker - Flush any remaining timestamps from the tracker
+ * @pf: Board private structure
+ * @tx: the tracker to flush
+ */
+static void
+ice_ptp_flush_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx)
+{
+ u8 idx;
+
+ for (idx = 0; idx < tx->len; idx++) {
+ u8 phy_idx = idx + tx->quad_offset;
+
+ spin_lock(&tx->lock);
+ if (tx->tstamps[idx].skb) {
+ dev_kfree_skb_any(tx->tstamps[idx].skb);
+ tx->tstamps[idx].skb = NULL;
+ pf->ptp.tx_hwtstamp_flushed++;
+ }
+ clear_bit(idx, tx->in_use);
+ spin_unlock(&tx->lock);
+
+ /* Clear any potential residual timestamp in the PHY block */
+ if (!pf->hw.reset_ongoing)
+ ice_clear_phy_tstamp(&pf->hw, tx->quad, phy_idx);
+ }
+}
+
+/**
+ * ice_ptp_release_tx_tracker - Release allocated memory for Tx tracker
+ * @pf: Board private structure
+ * @tx: Tx tracking structure to release
+ *
+ * Free memory associated with the Tx timestamp tracker.
+ */
+static void
+ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx)
+{
+ tx->init = 0;
+
+ kthread_cancel_work_sync(&tx->work);
+
+ ice_ptp_flush_tx_tracker(pf, tx);
+
+ kfree(tx->tstamps);
+ tx->tstamps = NULL;
+
+ bitmap_free(tx->in_use);
+ tx->in_use = NULL;
+
+ tx->len = 0;
+}
+
+/**
+ * ice_ptp_init_tx_e822 - Initialize tracking for Tx timestamps
+ * @pf: Board private structure
+ * @tx: the Tx tracking structure to initialize
+ * @port: the port this structure tracks
+ *
+ * Initialize the Tx timestamp tracker for this port. For generic MAC devices,
+ * the timestamp block is shared for all ports in the same quad. To avoid
+ * ports using the same timestamp index, logically break the block of
+ * registers into chunks based on the port number.
+ */
+static int
+ice_ptp_init_tx_e822(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port)
+{
+ tx->quad = port / ICE_PORTS_PER_QUAD;
+ tx->quad_offset = (port % ICE_PORTS_PER_QUAD) * INDEX_PER_PORT;
+ tx->len = INDEX_PER_PORT;
+
+ return ice_ptp_alloc_tx_tracker(tx);
+}
+
+/**
+ * ice_ptp_init_tx_e810 - Initialize tracking for Tx timestamps
+ * @pf: Board private structure
+ * @tx: the Tx tracking structure to initialize
+ *
+ * Initialize the Tx timestamp tracker for this PF. For E810 devices, each
+ * port has its own block of timestamps, independent of the other ports.
+ */
+static int
+ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx)
+{
+ tx->quad = pf->hw.port_info->lport;
+ tx->quad_offset = 0;
+ tx->len = INDEX_PER_QUAD;
+
+ return ice_ptp_alloc_tx_tracker(tx);
+}
+
+/**
+ * ice_ptp_tx_tstamp_cleanup - Cleanup old timestamp requests that got dropped
+ * @pf: pointer to the PF struct
+ * @tx: PTP Tx tracker to clean up
+ *
+ * Loop through the Tx timestamp requests and see if any of them have been
+ * waiting for a long time. Discard any SKBs that have been waiting for more
+ * than 2 seconds. This is long enough to be reasonably sure that the
+ * timestamp will never be captured. This might happen if the packet gets
+ * discarded before it reaches the PHY timestamping block.
+ */
+static void ice_ptp_tx_tstamp_cleanup(struct ice_pf *pf, struct ice_ptp_tx *tx)
+{
+ struct ice_hw *hw = &pf->hw;
+ u8 idx;
+
+ if (!tx->init)
+ return;
+
+ for_each_set_bit(idx, tx->in_use, tx->len) {
+ struct sk_buff *skb;
+ u64 raw_tstamp;
+
+ /* Check if this SKB has been waiting for too long */
+ if (time_is_after_jiffies(tx->tstamps[idx].start + 2 * HZ))
+ continue;
+
+ /* Read tstamp to be able to use this register again */
+ ice_read_phy_tstamp(hw, tx->quad, idx + tx->quad_offset,
+ &raw_tstamp);
+
+ spin_lock(&tx->lock);
+ skb = tx->tstamps[idx].skb;
+ tx->tstamps[idx].skb = NULL;
+ clear_bit(idx, tx->in_use);
+ spin_unlock(&tx->lock);
+
+ /* Count the number of Tx timestamps which have timed out */
+ pf->ptp.tx_hwtstamp_timeouts++;
+
+ /* Free the SKB after we've cleared the bit */
+ dev_kfree_skb_any(skb);
+ }
+}
+
+/**
+ * ice_ptp_update_cached_phctime - Update the cached PHC time values
+ * @pf: Board specific private structure
+ *
+ * This function updates the system time values which are cached in the PF
+ * structure and the Rx rings.
+ *
+ * This function must be called periodically to ensure that the cached value
+ * is never more than 2 seconds old.
+ *
+ * Note that the cached copy in the PF PTP structure is always updated, even
+ * if we can't update the copy in the Rx rings.
+ *
+ * Return:
+ * * 0 - OK, successfully updated
+ * * -EAGAIN - PF was busy, need to reschedule the update
+ */
+static int ice_ptp_update_cached_phctime(struct ice_pf *pf)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ unsigned long update_before;
+ u64 systime;
+ int i;
+
+ update_before = pf->ptp.cached_phc_jiffies + msecs_to_jiffies(2000);
+ if (pf->ptp.cached_phc_time &&
+ time_is_before_jiffies(update_before)) {
+ unsigned long time_taken = jiffies - pf->ptp.cached_phc_jiffies;
+
+ dev_warn(dev, "%u msecs passed between update to cached PHC time\n",
+ jiffies_to_msecs(time_taken));
+ pf->ptp.late_cached_phc_updates++;
+ }
+
+ /* Read the current PHC time */
+ systime = ice_ptp_read_src_clk_reg(pf, NULL);
+
+ /* Update the cached PHC time stored in the PF structure */
+ WRITE_ONCE(pf->ptp.cached_phc_time, systime);
+ WRITE_ONCE(pf->ptp.cached_phc_jiffies, jiffies);
+
+ if (test_and_set_bit(ICE_CFG_BUSY, pf->state))
+ return -EAGAIN;
+
+ ice_for_each_vsi(pf, i) {
+ struct ice_vsi *vsi = pf->vsi[i];
+ int j;
+
+ if (!vsi)
+ continue;
+
+ if (vsi->type != ICE_VSI_PF)
+ continue;
+
+ ice_for_each_rxq(vsi, j) {
+ if (!vsi->rx_rings[j])
+ continue;
+ WRITE_ONCE(vsi->rx_rings[j]->cached_phctime, systime);
+ }
+ }
+ clear_bit(ICE_CFG_BUSY, pf->state);
+
+ return 0;
+}
+
+/**
+ * ice_ptp_reset_cached_phctime - Reset cached PHC time after an update
+ * @pf: Board specific private structure
+ *
+ * This function must be called when the cached PHC time is no longer valid,
+ * such as after a time adjustment. It discards any outstanding Tx timestamps,
+ * and updates the cached PHC time for both the PF and Rx rings. If updating
+ * the PHC time cannot be done immediately, a warning message is logged and
+ * the work item is scheduled.
+ *
+ * These steps are required in order to ensure that we do not accidentally
+ * report a timestamp extended by the wrong PHC cached copy. Note that we
+ * do not directly update the cached timestamp here because it is possible
+ * this might produce an error when ICE_CFG_BUSY is set. If this occurred, we
+ * would have to try again. During that time window, timestamps might be
+ * requested and returned with an invalid extension. Thus, on failure to
+ * immediately update the cached PHC time we would need to zero the value
+ * anyways. For this reason, we just zero the value immediately and queue the
+ * update work item.
+ */
+static void ice_ptp_reset_cached_phctime(struct ice_pf *pf)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ int err;
+
+ /* Update the cached PHC time immediately if possible, otherwise
+ * schedule the work item to execute soon.
+ */
+ err = ice_ptp_update_cached_phctime(pf);
+ if (err) {
+ /* If another thread is updating the Rx rings, we won't
+ * properly reset them here. This could lead to reporting of
+ * invalid timestamps, but there isn't much we can do.
+ */
+ dev_warn(dev, "%s: ICE_CFG_BUSY, unable to immediately update cached PHC time\n",
+ __func__);
+
+ /* Queue the work item to update the Rx rings when possible */
+ kthread_queue_delayed_work(pf->ptp.kworker, &pf->ptp.work,
+ msecs_to_jiffies(10));
+ }
+
+ /* Flush any outstanding Tx timestamps */
+ ice_ptp_flush_tx_tracker(pf, &pf->ptp.port.tx);
+}
+
+/**
* ice_ptp_read_time - Read the time from the device
* @pf: Board private structure
* @ts: timespec structure to hold the current time value
@@ -1509,7 +1851,7 @@ ice_ptp_settime64(struct ptp_clock_info *info, const struct timespec64 *ts)
ice_ptp_unlock(hw);
if (!err)
- ice_ptp_update_cached_phctime(pf);
+ ice_ptp_reset_cached_phctime(pf);
/* Reenable periodic outputs */
ice_ptp_enable_all_clkout(pf);
@@ -1588,7 +1930,7 @@ static int ice_ptp_adjtime(struct ptp_clock_info *info, s64 delta)
return err;
}
- ice_ptp_update_cached_phctime(pf);
+ ice_ptp_reset_cached_phctime(pf);
return 0;
}
@@ -1796,26 +2138,31 @@ void
ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring,
union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb)
{
+ struct skb_shared_hwtstamps *hwtstamps;
+ u64 ts_ns, cached_time;
u32 ts_high;
- u64 ts_ns;
- /* Populate timesync data into skb */
- if (rx_desc->wb.time_stamp_low & ICE_PTP_TS_VALID) {
- struct skb_shared_hwtstamps *hwtstamps;
+ if (!(rx_desc->wb.time_stamp_low & ICE_PTP_TS_VALID))
+ return;
- /* Use ice_ptp_extend_32b_ts directly, using the ring-specific
- * cached PHC value, rather than accessing the PF. This also
- * allows us to simply pass the upper 32bits of nanoseconds
- * directly. Calling ice_ptp_extend_40b_ts is unnecessary as
- * it would just discard these bits itself.
- */
- ts_high = le32_to_cpu(rx_desc->wb.flex_ts.ts_high);
- ts_ns = ice_ptp_extend_32b_ts(rx_ring->cached_phctime, ts_high);
+ cached_time = READ_ONCE(rx_ring->cached_phctime);
- hwtstamps = skb_hwtstamps(skb);
- memset(hwtstamps, 0, sizeof(*hwtstamps));
- hwtstamps->hwtstamp = ns_to_ktime(ts_ns);
- }
+ /* Do not report a timestamp if we don't have a cached PHC time */
+ if (!cached_time)
+ return;
+
+ /* Use ice_ptp_extend_32b_ts directly, using the ring-specific cached
+ * PHC value, rather than accessing the PF. This also allows us to
+ * simply pass the upper 32bits of nanoseconds directly. Calling
+ * ice_ptp_extend_40b_ts is unnecessary as it would just discard these
+ * bits itself.
+ */
+ ts_high = le32_to_cpu(rx_desc->wb.flex_ts.ts_high);
+ ts_ns = ice_ptp_extend_32b_ts(cached_time, ts_high);
+
+ hwtstamps = skb_hwtstamps(skb);
+ memset(hwtstamps, 0, sizeof(*hwtstamps));
+ hwtstamps->hwtstamp = ns_to_ktime(ts_ns);
}
/**
@@ -2016,112 +2363,6 @@ static long ice_ptp_create_clock(struct ice_pf *pf)
}
/**
- * ice_ptp_tx_tstamp_work - Process Tx timestamps for a port
- * @work: pointer to the kthread_work struct
- *
- * Process timestamps captured by the PHY associated with this port. To do
- * this, loop over each index with a waiting skb.
- *
- * If a given index has a valid timestamp, perform the following steps:
- *
- * 1) copy the timestamp out of the PHY register
- * 4) clear the timestamp valid bit in the PHY register
- * 5) unlock the index by clearing the associated in_use bit.
- * 2) extend the 40b timestamp value to get a 64bit timestamp
- * 3) send that timestamp to the stack
- *
- * After looping, if we still have waiting SKBs, then re-queue the work. This
- * may cause us effectively poll even when not strictly necessary. We do this
- * because it's possible a new timestamp was requested around the same time as
- * the interrupt. In some cases hardware might not interrupt us again when the
- * timestamp is captured.
- *
- * Note that we only take the tracking lock when clearing the bit and when
- * checking if we need to re-queue this task. The only place where bits can be
- * set is the hard xmit routine where an SKB has a request flag set. The only
- * places where we clear bits are this work function, or the periodic cleanup
- * thread. If the cleanup thread clears a bit we're processing we catch it
- * when we lock to clear the bit and then grab the SKB pointer. If a Tx thread
- * starts a new timestamp, we might not begin processing it right away but we
- * will notice it at the end when we re-queue the work item. If a Tx thread
- * starts a new timestamp just after this function exits without re-queuing,
- * the interrupt when the timestamp finishes should trigger. Avoiding holding
- * the lock for the entire function is important in order to ensure that Tx
- * threads do not get blocked while waiting for the lock.
- */
-static void ice_ptp_tx_tstamp_work(struct kthread_work *work)
-{
- struct ice_ptp_port *ptp_port;
- struct ice_ptp_tx *tx;
- struct ice_pf *pf;
- struct ice_hw *hw;
- u8 idx;
-
- tx = container_of(work, struct ice_ptp_tx, work);
- if (!tx->init)
- return;
-
- ptp_port = container_of(tx, struct ice_ptp_port, tx);
- pf = ptp_port_to_pf(ptp_port);
- hw = &pf->hw;
-
- for_each_set_bit(idx, tx->in_use, tx->len) {
- struct skb_shared_hwtstamps shhwtstamps = {};
- u8 phy_idx = idx + tx->quad_offset;
- u64 raw_tstamp, tstamp;
- struct sk_buff *skb;
- int err;
-
- ice_trace(tx_tstamp_fw_req, tx->tstamps[idx].skb, idx);
-
- err = ice_read_phy_tstamp(hw, tx->quad, phy_idx,
- &raw_tstamp);
- if (err)
- continue;
-
- ice_trace(tx_tstamp_fw_done, tx->tstamps[idx].skb, idx);
-
- /* Check if the timestamp is invalid or stale */
- if (!(raw_tstamp & ICE_PTP_TS_VALID) ||
- raw_tstamp == tx->tstamps[idx].cached_tstamp)
- continue;
-
- /* The timestamp is valid, so we'll go ahead and clear this
- * index and then send the timestamp up to the stack.
- */
- spin_lock(&tx->lock);
- tx->tstamps[idx].cached_tstamp = raw_tstamp;
- clear_bit(idx, tx->in_use);
- skb = tx->tstamps[idx].skb;
- tx->tstamps[idx].skb = NULL;
- spin_unlock(&tx->lock);
-
- /* it's (unlikely but) possible we raced with the cleanup
- * thread for discarding old timestamp requests.
- */
- if (!skb)
- continue;
-
- /* Extend the timestamp using cached PHC time */
- tstamp = ice_ptp_extend_40b_ts(pf, raw_tstamp);
- shhwtstamps.hwtstamp = ns_to_ktime(tstamp);
-
- ice_trace(tx_tstamp_complete, skb, idx);
-
- skb_tstamp_tx(skb, &shhwtstamps);
- dev_kfree_skb_any(skb);
- }
-
- /* Check if we still have work to do. If so, re-queue this task to
- * poll for remaining timestamps.
- */
- spin_lock(&tx->lock);
- if (!bitmap_empty(tx->in_use, tx->len))
- kthread_queue_work(pf->ptp.kworker, &tx->work);
- spin_unlock(&tx->lock);
-}
-
-/**
* ice_ptp_request_ts - Request an available Tx timestamp index
* @tx: the PTP Tx timestamp tracker to request from
* @skb: the SKB to associate with this timestamp request
@@ -2173,167 +2414,6 @@ void ice_ptp_process_ts(struct ice_pf *pf)
kthread_queue_work(pf->ptp.kworker, &pf->ptp.port.tx.work);
}
-/**
- * ice_ptp_alloc_tx_tracker - Initialize tracking for Tx timestamps
- * @tx: Tx tracking structure to initialize
- *
- * Assumes that the length has already been initialized. Do not call directly,
- * use the ice_ptp_init_tx_e822 or ice_ptp_init_tx_e810 instead.
- */
-static int
-ice_ptp_alloc_tx_tracker(struct ice_ptp_tx *tx)
-{
- tx->tstamps = kcalloc(tx->len, sizeof(*tx->tstamps), GFP_KERNEL);
- if (!tx->tstamps)
- return -ENOMEM;
-
- tx->in_use = bitmap_zalloc(tx->len, GFP_KERNEL);
- if (!tx->in_use) {
- kfree(tx->tstamps);
- tx->tstamps = NULL;
- return -ENOMEM;
- }
-
- spin_lock_init(&tx->lock);
- kthread_init_work(&tx->work, ice_ptp_tx_tstamp_work);
-
- tx->init = 1;
-
- return 0;
-}
-
-/**
- * ice_ptp_flush_tx_tracker - Flush any remaining timestamps from the tracker
- * @pf: Board private structure
- * @tx: the tracker to flush
- */
-static void
-ice_ptp_flush_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx)
-{
- u8 idx;
-
- for (idx = 0; idx < tx->len; idx++) {
- u8 phy_idx = idx + tx->quad_offset;
-
- spin_lock(&tx->lock);
- if (tx->tstamps[idx].skb) {
- dev_kfree_skb_any(tx->tstamps[idx].skb);
- tx->tstamps[idx].skb = NULL;
- }
- clear_bit(idx, tx->in_use);
- spin_unlock(&tx->lock);
-
- /* Clear any potential residual timestamp in the PHY block */
- if (!pf->hw.reset_ongoing)
- ice_clear_phy_tstamp(&pf->hw, tx->quad, phy_idx);
- }
-}
-
-/**
- * ice_ptp_release_tx_tracker - Release allocated memory for Tx tracker
- * @pf: Board private structure
- * @tx: Tx tracking structure to release
- *
- * Free memory associated with the Tx timestamp tracker.
- */
-static void
-ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx)
-{
- tx->init = 0;
-
- kthread_cancel_work_sync(&tx->work);
-
- ice_ptp_flush_tx_tracker(pf, tx);
-
- kfree(tx->tstamps);
- tx->tstamps = NULL;
-
- bitmap_free(tx->in_use);
- tx->in_use = NULL;
-
- tx->len = 0;
-}
-
-/**
- * ice_ptp_init_tx_e822 - Initialize tracking for Tx timestamps
- * @pf: Board private structure
- * @tx: the Tx tracking structure to initialize
- * @port: the port this structure tracks
- *
- * Initialize the Tx timestamp tracker for this port. For generic MAC devices,
- * the timestamp block is shared for all ports in the same quad. To avoid
- * ports using the same timestamp index, logically break the block of
- * registers into chunks based on the port number.
- */
-static int
-ice_ptp_init_tx_e822(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port)
-{
- tx->quad = port / ICE_PORTS_PER_QUAD;
- tx->quad_offset = (port % ICE_PORTS_PER_QUAD) * INDEX_PER_PORT;
- tx->len = INDEX_PER_PORT;
-
- return ice_ptp_alloc_tx_tracker(tx);
-}
-
-/**
- * ice_ptp_init_tx_e810 - Initialize tracking for Tx timestamps
- * @pf: Board private structure
- * @tx: the Tx tracking structure to initialize
- *
- * Initialize the Tx timestamp tracker for this PF. For E810 devices, each
- * port has its own block of timestamps, independent of the other ports.
- */
-static int
-ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx)
-{
- tx->quad = pf->hw.port_info->lport;
- tx->quad_offset = 0;
- tx->len = INDEX_PER_QUAD;
-
- return ice_ptp_alloc_tx_tracker(tx);
-}
-
-/**
- * ice_ptp_tx_tstamp_cleanup - Cleanup old timestamp requests that got dropped
- * @hw: pointer to the hw struct
- * @tx: PTP Tx tracker to clean up
- *
- * Loop through the Tx timestamp requests and see if any of them have been
- * waiting for a long time. Discard any SKBs that have been waiting for more
- * than 2 seconds. This is long enough to be reasonably sure that the
- * timestamp will never be captured. This might happen if the packet gets
- * discarded before it reaches the PHY timestamping block.
- */
-static void ice_ptp_tx_tstamp_cleanup(struct ice_hw *hw, struct ice_ptp_tx *tx)
-{
- u8 idx;
-
- if (!tx->init)
- return;
-
- for_each_set_bit(idx, tx->in_use, tx->len) {
- struct sk_buff *skb;
- u64 raw_tstamp;
-
- /* Check if this SKB has been waiting for too long */
- if (time_is_after_jiffies(tx->tstamps[idx].start + 2 * HZ))
- continue;
-
- /* Read tstamp to be able to use this register again */
- ice_read_phy_tstamp(hw, tx->quad, idx + tx->quad_offset,
- &raw_tstamp);
-
- spin_lock(&tx->lock);
- skb = tx->tstamps[idx].skb;
- tx->tstamps[idx].skb = NULL;
- clear_bit(idx, tx->in_use);
- spin_unlock(&tx->lock);
-
- /* Free the SKB after we've cleared the bit */
- dev_kfree_skb_any(skb);
- }
-}
-
static void ice_ptp_periodic_work(struct kthread_work *work)
{
struct ice_ptp *ptp = container_of(work, struct ice_ptp, work.work);
@@ -2345,7 +2425,7 @@ static void ice_ptp_periodic_work(struct kthread_work *work)
err = ice_ptp_update_cached_phctime(pf);
- ice_ptp_tx_tstamp_cleanup(&pf->hw, &pf->ptp.port.tx);
+ ice_ptp_tx_tstamp_cleanup(pf, &pf->ptp.port.tx);
/* Run twice a second or reschedule if phc update failed */
kthread_queue_delayed_work(ptp->kworker, &ptp->work,
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h
index 10e396abf130..d53dcd03e36b 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.h
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.h
@@ -163,6 +163,7 @@ struct ice_ptp_port {
* @work: delayed work function for periodic tasks
* @extts_work: work function for handling external Tx timestamps
* @cached_phc_time: a cached copy of the PHC time for timestamp extension
+ * @cached_phc_jiffies: jiffies when cached_phc_time was last updated
* @ext_ts_chan: the external timestamp channel in use
* @ext_ts_irq: the external timestamp IRQ in use
* @kworker: kwork thread for handling periodic work
@@ -171,12 +172,19 @@ struct ice_ptp_port {
* @clock: pointer to registered PTP clock device
* @tstamp_config: hardware timestamping configuration
* @reset_time: kernel time after clock stop on reset
+ * @tx_hwtstamp_skipped: number of Tx time stamp requests skipped
+ * @tx_hwtstamp_timeouts: number of Tx skbs discarded with no time stamp
+ * @tx_hwtstamp_flushed: number of Tx skbs flushed due to interface closed
+ * @tx_hwtstamp_discarded: number of Tx skbs discarded due to cached PHC time
+ * being too old to correctly extend timestamp
+ * @late_cached_phc_updates: number of times cached PHC update is late
*/
struct ice_ptp {
struct ice_ptp_port port;
struct kthread_delayed_work work;
struct kthread_work extts_work;
u64 cached_phc_time;
+ unsigned long cached_phc_jiffies;
u8 ext_ts_chan;
u8 ext_ts_irq;
struct kthread_worker *kworker;
@@ -185,6 +193,11 @@ struct ice_ptp {
struct ptp_clock *clock;
struct hwtstamp_config tstamp_config;
u64 reset_time;
+ u32 tx_hwtstamp_skipped;
+ u32 tx_hwtstamp_timeouts;
+ u32 tx_hwtstamp_flushed;
+ u32 tx_hwtstamp_discarded;
+ u32 late_cached_phc_updates;
};
#define __ptp_port_to_ptp(p) \
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index 3808034f7e7e..697feb89188c 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -3449,31 +3449,15 @@ bool ice_vlan_fltr_exist(struct ice_hw *hw, u16 vlan_id, u16 vsi_handle)
* ice_add_mac - Add a MAC address based filter rule
* @hw: pointer to the hardware structure
* @m_list: list of MAC addresses and forwarding information
- *
- * IMPORTANT: When the ucast_shared flag is set to false and m_list has
- * multiple unicast addresses, the function assumes that all the
- * addresses are unique in a given add_mac call. It doesn't
- * check for duplicates in this case, removing duplicates from a given
- * list should be taken care of in the caller of this function.
*/
int ice_add_mac(struct ice_hw *hw, struct list_head *m_list)
{
- struct ice_sw_rule_lkup_rx_tx *s_rule, *r_iter;
struct ice_fltr_list_entry *m_list_itr;
- struct list_head *rule_head;
- u16 total_elem_left, s_rule_size;
- struct ice_switch_info *sw;
- struct mutex *rule_lock; /* Lock to protect filter rule list */
- u16 num_unicast = 0;
int status = 0;
- u8 elem_sent;
if (!m_list || !hw)
return -EINVAL;
- s_rule = NULL;
- sw = hw->switch_info;
- rule_lock = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock;
list_for_each_entry(m_list_itr, m_list, list_entry) {
u8 *add = &m_list_itr->fltr_info.l_data.mac.mac_addr[0];
u16 vsi_handle;
@@ -3492,106 +3476,13 @@ int ice_add_mac(struct ice_hw *hw, struct list_head *m_list)
if (m_list_itr->fltr_info.lkup_type != ICE_SW_LKUP_MAC ||
is_zero_ether_addr(add))
return -EINVAL;
- if (is_unicast_ether_addr(add) && !hw->ucast_shared) {
- /* Don't overwrite the unicast address */
- mutex_lock(rule_lock);
- if (ice_find_rule_entry(hw, ICE_SW_LKUP_MAC,
- &m_list_itr->fltr_info)) {
- mutex_unlock(rule_lock);
- return -EEXIST;
- }
- mutex_unlock(rule_lock);
- num_unicast++;
- } else if (is_multicast_ether_addr(add) ||
- (is_unicast_ether_addr(add) && hw->ucast_shared)) {
- m_list_itr->status =
- ice_add_rule_internal(hw, ICE_SW_LKUP_MAC,
- m_list_itr);
- if (m_list_itr->status)
- return m_list_itr->status;
- }
- }
-
- mutex_lock(rule_lock);
- /* Exit if no suitable entries were found for adding bulk switch rule */
- if (!num_unicast) {
- status = 0;
- goto ice_add_mac_exit;
- }
-
- rule_head = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules;
-
- /* Allocate switch rule buffer for the bulk update for unicast */
- s_rule_size = ICE_SW_RULE_RX_TX_ETH_HDR_SIZE(s_rule);
- s_rule = devm_kcalloc(ice_hw_to_dev(hw), num_unicast, s_rule_size,
- GFP_KERNEL);
- if (!s_rule) {
- status = -ENOMEM;
- goto ice_add_mac_exit;
- }
-
- r_iter = s_rule;
- list_for_each_entry(m_list_itr, m_list, list_entry) {
- struct ice_fltr_info *f_info = &m_list_itr->fltr_info;
- u8 *mac_addr = &f_info->l_data.mac.mac_addr[0];
-
- if (is_unicast_ether_addr(mac_addr)) {
- ice_fill_sw_rule(hw, &m_list_itr->fltr_info, r_iter,
- ice_aqc_opc_add_sw_rules);
- r_iter = (typeof(s_rule))((u8 *)r_iter + s_rule_size);
- }
- }
-
- /* Call AQ bulk switch rule update for all unicast addresses */
- r_iter = s_rule;
- /* Call AQ switch rule in AQ_MAX chunk */
- for (total_elem_left = num_unicast; total_elem_left > 0;
- total_elem_left -= elem_sent) {
- struct ice_sw_rule_lkup_rx_tx *entry = r_iter;
-
- elem_sent = min_t(u8, total_elem_left,
- (ICE_AQ_MAX_BUF_LEN / s_rule_size));
- status = ice_aq_sw_rules(hw, entry, elem_sent * s_rule_size,
- elem_sent, ice_aqc_opc_add_sw_rules,
- NULL);
- if (status)
- goto ice_add_mac_exit;
- r_iter = (typeof(s_rule))
- ((u8 *)r_iter + (elem_sent * s_rule_size));
- }
-
- /* Fill up rule ID based on the value returned from FW */
- r_iter = s_rule;
- list_for_each_entry(m_list_itr, m_list, list_entry) {
- struct ice_fltr_info *f_info = &m_list_itr->fltr_info;
- u8 *mac_addr = &f_info->l_data.mac.mac_addr[0];
- struct ice_fltr_mgmt_list_entry *fm_entry;
-
- if (is_unicast_ether_addr(mac_addr)) {
- f_info->fltr_rule_id = le16_to_cpu(r_iter->index);
- f_info->fltr_act = ICE_FWD_TO_VSI;
- /* Create an entry to track this MAC address */
- fm_entry = devm_kzalloc(ice_hw_to_dev(hw),
- sizeof(*fm_entry), GFP_KERNEL);
- if (!fm_entry) {
- status = -ENOMEM;
- goto ice_add_mac_exit;
- }
- fm_entry->fltr_info = *f_info;
- fm_entry->vsi_count = 1;
- /* The book keeping entries will get removed when
- * base driver calls remove filter AQ command
- */
- list_add(&fm_entry->list_entry, rule_head);
- r_iter = (typeof(s_rule))((u8 *)r_iter + s_rule_size);
- }
+ m_list_itr->status = ice_add_rule_internal(hw, ICE_SW_LKUP_MAC,
+ m_list_itr);
+ if (m_list_itr->status)
+ return m_list_itr->status;
}
-ice_add_mac_exit:
- mutex_unlock(rule_lock);
- if (s_rule)
- devm_kfree(ice_hw_to_dev(hw), s_rule);
return status;
}
@@ -3979,38 +3870,6 @@ ice_check_if_dflt_vsi(struct ice_port_info *pi, u16 vsi_handle,
}
/**
- * ice_find_ucast_rule_entry - Search for a unicast MAC filter rule entry
- * @hw: pointer to the hardware structure
- * @recp_id: lookup type for which the specified rule needs to be searched
- * @f_info: rule information
- *
- * Helper function to search for a unicast rule entry - this is to be used
- * to remove unicast MAC filter that is not shared with other VSIs on the
- * PF switch.
- *
- * Returns pointer to entry storing the rule if found
- */
-static struct ice_fltr_mgmt_list_entry *
-ice_find_ucast_rule_entry(struct ice_hw *hw, u8 recp_id,
- struct ice_fltr_info *f_info)
-{
- struct ice_switch_info *sw = hw->switch_info;
- struct ice_fltr_mgmt_list_entry *list_itr;
- struct list_head *list_head;
-
- list_head = &sw->recp_list[recp_id].filt_rules;
- list_for_each_entry(list_itr, list_head, list_entry) {
- if (!memcmp(&f_info->l_data, &list_itr->fltr_info.l_data,
- sizeof(f_info->l_data)) &&
- f_info->fwd_id.hw_vsi_id ==
- list_itr->fltr_info.fwd_id.hw_vsi_id &&
- f_info->flag == list_itr->fltr_info.flag)
- return list_itr;
- }
- return NULL;
-}
-
-/**
* ice_remove_mac - remove a MAC address based filter rule
* @hw: pointer to the hardware structure
* @m_list: list of MAC addresses and forwarding information
@@ -4026,15 +3885,12 @@ ice_find_ucast_rule_entry(struct ice_hw *hw, u8 recp_id,
int ice_remove_mac(struct ice_hw *hw, struct list_head *m_list)
{
struct ice_fltr_list_entry *list_itr, *tmp;
- struct mutex *rule_lock; /* Lock to protect filter rule list */
if (!m_list)
return -EINVAL;
- rule_lock = &hw->switch_info->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock;
list_for_each_entry_safe(list_itr, tmp, m_list, list_entry) {
enum ice_sw_lkup_type l_type = list_itr->fltr_info.lkup_type;
- u8 *add = &list_itr->fltr_info.l_data.mac.mac_addr[0];
u16 vsi_handle;
if (l_type != ICE_SW_LKUP_MAC)
@@ -4046,19 +3902,7 @@ int ice_remove_mac(struct ice_hw *hw, struct list_head *m_list)
list_itr->fltr_info.fwd_id.hw_vsi_id =
ice_get_hw_vsi_num(hw, vsi_handle);
- if (is_unicast_ether_addr(add) && !hw->ucast_shared) {
- /* Don't remove the unicast address that belongs to
- * another VSI on the switch, since it is not being
- * shared...
- */
- mutex_lock(rule_lock);
- if (!ice_find_ucast_rule_entry(hw, ICE_SW_LKUP_MAC,
- &list_itr->fltr_info)) {
- mutex_unlock(rule_lock);
- return -ENOENT;
- }
- mutex_unlock(rule_lock);
- }
+
list_itr->status = ice_remove_rule_internal(hw,
ICE_SW_LKUP_MAC,
list_itr);
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 836dce840712..42b42f4b21ef 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -2255,8 +2255,10 @@ ice_tstamp(struct ice_tx_ring *tx_ring, struct sk_buff *skb,
/* Grab an open timestamp slot */
idx = ice_ptp_request_ts(tx_ring->tx_tstamps, skb);
- if (idx < 0)
+ if (idx < 0) {
+ tx_ring->vsi->back->ptp.tx_hwtstamp_skipped++;
return;
+ }
off->cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
(ICE_TX_CTX_DESC_TSYN << ICE_TXD_CTX_QW1_CMD_S) |
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index ca902af54bb4..932b5661ec4d 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -295,10 +295,11 @@ struct ice_rx_ring {
struct xsk_buff_pool *xsk_pool;
struct sk_buff *skb;
dma_addr_t dma; /* physical address of ring */
-#define ICE_RX_FLAGS_RING_BUILD_SKB BIT(1)
u64 cached_phctime;
u8 dcb_tc; /* Traffic class of ring */
u8 ptp_rx;
+#define ICE_RX_FLAGS_RING_BUILD_SKB BIT(1)
+#define ICE_RX_FLAGS_CRC_STRIP_DIS BIT(2)
u8 flags;
} ____cacheline_internodealigned_in_smp;
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 861b64322959..8651f6c735ba 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -885,8 +885,6 @@ struct ice_hw {
/* INTRL granularity in 1 us */
u8 intrl_gran;
- u8 ucast_shared; /* true if VSIs can share unicast addr */
-
#define ICE_PHY_PER_NAC 1
#define ICE_MAX_QUAD 2
#define ICE_NUM_QUAD_TYPE 2
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index ebff0e04045d..bf6c461e1a2a 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2129,65 +2129,102 @@ static bool igc_alloc_rx_buffers_zc(struct igc_ring *ring, u16 count)
return ok;
}
-static int igc_xdp_init_tx_buffer(struct igc_tx_buffer *buffer,
- struct xdp_frame *xdpf,
- struct igc_ring *ring)
-{
- dma_addr_t dma;
-
- dma = dma_map_single(ring->dev, xdpf->data, xdpf->len, DMA_TO_DEVICE);
- if (dma_mapping_error(ring->dev, dma)) {
- netdev_err_once(ring->netdev, "Failed to map DMA for TX\n");
- return -ENOMEM;
- }
-
- buffer->type = IGC_TX_BUFFER_TYPE_XDP;
- buffer->xdpf = xdpf;
- buffer->protocol = 0;
- buffer->bytecount = xdpf->len;
- buffer->gso_segs = 1;
- buffer->time_stamp = jiffies;
- dma_unmap_len_set(buffer, len, xdpf->len);
- dma_unmap_addr_set(buffer, dma, dma);
- return 0;
-}
-
/* This function requires __netif_tx_lock is held by the caller. */
static int igc_xdp_init_tx_descriptor(struct igc_ring *ring,
struct xdp_frame *xdpf)
{
- struct igc_tx_buffer *buffer;
- union igc_adv_tx_desc *desc;
- u32 cmd_type, olinfo_status;
- int err;
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf);
+ u8 nr_frags = unlikely(xdp_frame_has_frags(xdpf)) ? sinfo->nr_frags : 0;
+ u16 count, index = ring->next_to_use;
+ struct igc_tx_buffer *head = &ring->tx_buffer_info[index];
+ struct igc_tx_buffer *buffer = head;
+ union igc_adv_tx_desc *desc = IGC_TX_DESC(ring, index);
+ u32 olinfo_status, len = xdpf->len, cmd_type;
+ void *data = xdpf->data;
+ u16 i;
- if (!igc_desc_unused(ring))
- return -EBUSY;
+ count = TXD_USE_COUNT(len);
+ for (i = 0; i < nr_frags; i++)
+ count += TXD_USE_COUNT(skb_frag_size(&sinfo->frags[i]));
- buffer = &ring->tx_buffer_info[ring->next_to_use];
- err = igc_xdp_init_tx_buffer(buffer, xdpf, ring);
- if (err)
- return err;
+ if (igc_maybe_stop_tx(ring, count + 3)) {
+ /* this is a hard error */
+ return -EBUSY;
+ }
- cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT |
- IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD |
- buffer->bytecount;
- olinfo_status = buffer->bytecount << IGC_ADVTXD_PAYLEN_SHIFT;
+ i = 0;
+ head->bytecount = xdp_get_frame_len(xdpf);
+ head->type = IGC_TX_BUFFER_TYPE_XDP;
+ head->gso_segs = 1;
+ head->xdpf = xdpf;
- desc = IGC_TX_DESC(ring, ring->next_to_use);
- desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+ olinfo_status = head->bytecount << IGC_ADVTXD_PAYLEN_SHIFT;
desc->read.olinfo_status = cpu_to_le32(olinfo_status);
- desc->read.buffer_addr = cpu_to_le64(dma_unmap_addr(buffer, dma));
- netdev_tx_sent_queue(txring_txq(ring), buffer->bytecount);
+ for (;;) {
+ dma_addr_t dma;
- buffer->next_to_watch = desc;
+ dma = dma_map_single(ring->dev, data, len, DMA_TO_DEVICE);
+ if (dma_mapping_error(ring->dev, dma)) {
+ netdev_err_once(ring->netdev,
+ "Failed to map DMA for TX\n");
+ goto unmap;
+ }
- ring->next_to_use++;
- if (ring->next_to_use == ring->count)
- ring->next_to_use = 0;
+ dma_unmap_len_set(buffer, len, len);
+ dma_unmap_addr_set(buffer, dma, dma);
+
+ cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT |
+ IGC_ADVTXD_DCMD_IFCS | len;
+
+ desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+ desc->read.buffer_addr = cpu_to_le64(dma);
+
+ buffer->protocol = 0;
+
+ if (++index == ring->count)
+ index = 0;
+
+ if (i == nr_frags)
+ break;
+
+ buffer = &ring->tx_buffer_info[index];
+ desc = IGC_TX_DESC(ring, index);
+ desc->read.olinfo_status = 0;
+
+ data = skb_frag_address(&sinfo->frags[i]);
+ len = skb_frag_size(&sinfo->frags[i]);
+ i++;
+ }
+ desc->read.cmd_type_len |= cpu_to_le32(IGC_TXD_DCMD);
+
+ netdev_tx_sent_queue(txring_txq(ring), head->bytecount);
+ /* set the timestamp */
+ head->time_stamp = jiffies;
+ /* set next_to_watch value indicating a packet is present */
+ head->next_to_watch = desc;
+ ring->next_to_use = index;
return 0;
+
+unmap:
+ for (;;) {
+ buffer = &ring->tx_buffer_info[index];
+ if (dma_unmap_len(buffer, len))
+ dma_unmap_page(ring->dev,
+ dma_unmap_addr(buffer, dma),
+ dma_unmap_len(buffer, len),
+ DMA_TO_DEVICE);
+ dma_unmap_len_set(buffer, len, 0);
+ if (buffer == head)
+ break;
+
+ if (!index)
+ index += ring->count;
+ index--;
+ }
+
+ return -ENOMEM;
}
static struct igc_ring *igc_xdp_get_tx_ring(struct igc_adapter *adapter,
@@ -2369,6 +2406,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
xdp_prepare_buff(&xdp, pktbuf - igc_rx_offset(rx_ring),
igc_rx_offset(rx_ring) + pkt_offset,
size, true);
+ xdp_buff_clear_frags_flag(&xdp);
skb = igc_xdp_run_prog(adapter, &xdp);
}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 04f453eabef6..cb5c707538a5 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -1964,15 +1964,13 @@ static bool ixgbe_check_lbtest_frame(struct ixgbe_rx_buffer *rx_buffer,
frame_size >>= 1;
- data = kmap(rx_buffer->page) + rx_buffer->page_offset;
+ data = page_address(rx_buffer->page) + rx_buffer->page_offset;
if (data[3] != 0xFF ||
data[frame_size + 10] != 0xBE ||
data[frame_size + 12] != 0xAF)
match = false;
- kunmap(rx_buffer->page);
-
return match;
}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index 7f7ea468ffa9..2b00db92b08f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -3712,7 +3712,9 @@ struct ixgbe_info {
#define IXGBE_KRM_LINK_S1(P) ((P) ? 0x8200 : 0x4200)
#define IXGBE_KRM_LINK_CTRL_1(P) ((P) ? 0x820C : 0x420C)
#define IXGBE_KRM_AN_CNTL_1(P) ((P) ? 0x822C : 0x422C)
+#define IXGBE_KRM_AN_CNTL_4(P) ((P) ? 0x8238 : 0x4238)
#define IXGBE_KRM_AN_CNTL_8(P) ((P) ? 0x8248 : 0x4248)
+#define IXGBE_KRM_PCS_KX_AN(P) ((P) ? 0x9918 : 0x5918)
#define IXGBE_KRM_SGMII_CTRL(P) ((P) ? 0x82A0 : 0x42A0)
#define IXGBE_KRM_LP_BASE_PAGE_HIGH(P) ((P) ? 0x836C : 0x436C)
#define IXGBE_KRM_DSP_TXFFE_STATE_4(P) ((P) ? 0x8634 : 0x4634)
@@ -3722,6 +3724,7 @@ struct ixgbe_info {
#define IXGBE_KRM_PMD_FLX_MASK_ST20(P) ((P) ? 0x9054 : 0x5054)
#define IXGBE_KRM_TX_COEFF_CTRL_1(P) ((P) ? 0x9520 : 0x5520)
#define IXGBE_KRM_RX_ANA_CTL(P) ((P) ? 0x9A00 : 0x5A00)
+#define IXGBE_KRM_FLX_TMRS_CTRL_ST31(P) ((P) ? 0x9180 : 0x5180)
#define IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_DA ~(0x3 << 20)
#define IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_SR BIT(20)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index 35c2b9b8bd19..aa4bf6c9a2f7 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -1721,9 +1721,59 @@ static s32 ixgbe_setup_sfi_x550a(struct ixgbe_hw *hw, ixgbe_link_speed *speed)
return IXGBE_ERR_LINK_SETUP;
}
- status = mac->ops.write_iosf_sb_reg(hw,
- IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
- IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+ (void)mac->ops.write_iosf_sb_reg(hw,
+ IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+
+ /* change mode enforcement rules to hybrid */
+ (void)mac->ops.read_iosf_sb_reg(hw,
+ IXGBE_KRM_FLX_TMRS_CTRL_ST31(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+ reg_val |= 0x0400;
+
+ (void)mac->ops.write_iosf_sb_reg(hw,
+ IXGBE_KRM_FLX_TMRS_CTRL_ST31(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+
+ /* manually control the config */
+ (void)mac->ops.read_iosf_sb_reg(hw,
+ IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+ reg_val |= 0x20002240;
+
+ (void)mac->ops.write_iosf_sb_reg(hw,
+ IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+
+ /* move the AN base page values */
+ (void)mac->ops.read_iosf_sb_reg(hw,
+ IXGBE_KRM_PCS_KX_AN(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+ reg_val |= 0x1;
+
+ (void)mac->ops.write_iosf_sb_reg(hw,
+ IXGBE_KRM_PCS_KX_AN(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+
+ /* set the AN37 over CB mode */
+ (void)mac->ops.read_iosf_sb_reg(hw,
+ IXGBE_KRM_AN_CNTL_4(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+ reg_val |= 0x20000000;
+
+ (void)mac->ops.write_iosf_sb_reg(hw,
+ IXGBE_KRM_AN_CNTL_4(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+
+ /* restart AN manually */
+ (void)mac->ops.read_iosf_sb_reg(hw,
+ IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+ reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART;
+
+ (void)mac->ops.write_iosf_sb_reg(hw,
+ IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
/* Toggle port SW reset by AN reset. */
status = ixgbe_restart_an_internal_phy_x550em(hw);
diff --git a/drivers/net/ethernet/marvell/prestera/prestera.h b/drivers/net/ethernet/marvell/prestera/prestera.h
index 2f84d0fb4094..e5a4381a88b3 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera.h
+++ b/drivers/net/ethernet/marvell/prestera/prestera.h
@@ -367,6 +367,8 @@ int prestera_port_learning_set(struct prestera_port *port, bool learn_enable);
int prestera_port_uc_flood_set(struct prestera_port *port, bool flood);
int prestera_port_mc_flood_set(struct prestera_port *port, bool flood);
+int prestera_port_br_locked_set(struct prestera_port *port, bool br_locked);
+
int prestera_port_pvid_set(struct prestera_port *port, u16 vid);
bool prestera_netdev_check(const struct net_device *dev);
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_hw.c b/drivers/net/ethernet/marvell/prestera/prestera_hw.c
index 962d7e0c0cb5..1a68a888d587 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_hw.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_hw.c
@@ -101,6 +101,7 @@ enum {
PRESTERA_CMD_PORT_ATTR_LEARNING = 7,
PRESTERA_CMD_PORT_ATTR_FLOOD = 8,
PRESTERA_CMD_PORT_ATTR_CAPABILITY = 9,
+ PRESTERA_CMD_PORT_ATTR_LOCKED = 10,
PRESTERA_CMD_PORT_ATTR_PHY_MODE = 12,
PRESTERA_CMD_PORT_ATTR_TYPE = 13,
PRESTERA_CMD_PORT_ATTR_STATS = 17,
@@ -285,6 +286,7 @@ union prestera_msg_port_param {
u8 duplex;
u8 fec;
u8 fc;
+ u8 br_locked;
union {
struct {
u8 admin;
@@ -745,6 +747,7 @@ static void prestera_hw_build_tests(void)
BUILD_BUG_ON(sizeof(struct prestera_msg_rif_resp) != 12);
BUILD_BUG_ON(sizeof(struct prestera_msg_vr_resp) != 12);
BUILD_BUG_ON(sizeof(struct prestera_msg_policer_resp) != 12);
+ BUILD_BUG_ON(sizeof(struct prestera_msg_flood_domain_create_resp) != 12);
/* check events */
BUILD_BUG_ON(sizeof(struct prestera_msg_event_port) != 20);
@@ -1639,6 +1642,22 @@ int prestera_hw_port_mc_flood_set(const struct prestera_port *port, bool flood)
&req.cmd, sizeof(req));
}
+int prestera_hw_port_br_locked_set(const struct prestera_port *port,
+ bool br_locked)
+{
+ struct prestera_msg_port_attr_req req = {
+ .attr = __cpu_to_le32(PRESTERA_CMD_PORT_ATTR_LOCKED),
+ .port = __cpu_to_le32(port->hw_id),
+ .dev = __cpu_to_le32(port->dev_id),
+ .param = {
+ .br_locked = br_locked,
+ }
+ };
+
+ return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_SET,
+ &req.cmd, sizeof(req));
+}
+
int prestera_hw_vlan_create(struct prestera_switch *sw, u16 vid)
{
struct prestera_msg_vlan_req req = {
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_hw.h b/drivers/net/ethernet/marvell/prestera/prestera_hw.h
index 56e043146dd2..4aca43e72a05 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_hw.h
+++ b/drivers/net/ethernet/marvell/prestera/prestera_hw.h
@@ -183,6 +183,8 @@ int prestera_hw_port_speed_get(const struct prestera_port *port, u32 *speed);
int prestera_hw_port_learning_set(struct prestera_port *port, bool enable);
int prestera_hw_port_uc_flood_set(const struct prestera_port *port, bool flood);
int prestera_hw_port_mc_flood_set(const struct prestera_port *port, bool flood);
+int prestera_hw_port_br_locked_set(const struct prestera_port *port,
+ bool br_locked);
int prestera_hw_port_accept_frm_type(struct prestera_port *port,
enum prestera_accept_frm_type type);
/* Vlan API */
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c
index ede3e53b9790..3956d6d5df3c 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_main.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c
@@ -51,6 +51,11 @@ int prestera_port_mc_flood_set(struct prestera_port *port, bool flood)
return prestera_hw_port_mc_flood_set(port, flood);
}
+int prestera_port_br_locked_set(struct prestera_port *port, bool br_locked)
+{
+ return prestera_hw_port_br_locked_set(port, br_locked);
+}
+
int prestera_port_pvid_set(struct prestera_port *port, u16 vid)
{
enum prestera_accept_frm_type frm_type;
@@ -797,32 +802,30 @@ static void prestera_port_handle_event(struct prestera_switch *sw,
caching_dw = &port->cached_hw_stats.caching_dw;
- if (port->phy_link) {
- memset(&smac, 0, sizeof(smac));
- smac.valid = true;
- smac.oper = pevt->data.mac.oper;
- if (smac.oper) {
- smac.mode = pevt->data.mac.mode;
- smac.speed = pevt->data.mac.speed;
- smac.duplex = pevt->data.mac.duplex;
- smac.fc = pevt->data.mac.fc;
- smac.fec = pevt->data.mac.fec;
- phylink_mac_change(port->phy_link, true);
- } else {
- phylink_mac_change(port->phy_link, false);
- }
- prestera_port_mac_state_cache_write(port, &smac);
+ memset(&smac, 0, sizeof(smac));
+ smac.valid = true;
+ smac.oper = pevt->data.mac.oper;
+ if (smac.oper) {
+ smac.mode = pevt->data.mac.mode;
+ smac.speed = pevt->data.mac.speed;
+ smac.duplex = pevt->data.mac.duplex;
+ smac.fc = pevt->data.mac.fc;
+ smac.fec = pevt->data.mac.fec;
}
+ prestera_port_mac_state_cache_write(port, &smac);
if (port->state_mac.oper) {
- if (!port->phy_link)
+ if (port->phy_link)
+ phylink_mac_change(port->phy_link, true);
+ else
netif_carrier_on(port->dev);
if (!delayed_work_pending(caching_dw))
queue_delayed_work(prestera_wq, caching_dw, 0);
- } else if (netif_running(port->dev) &&
- netif_carrier_ok(port->dev)) {
- if (!port->phy_link)
+ } else {
+ if (port->phy_link)
+ phylink_mac_change(port->phy_link, false);
+ else if (netif_running(port->dev) && netif_carrier_ok(port->dev))
netif_carrier_off(port->dev);
if (delayed_work_pending(caching_dw))
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
index 71cde97d85c8..e548cd32582e 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
@@ -143,6 +143,7 @@ prestera_br_port_flags_reset(struct prestera_bridge_port *br_port,
prestera_port_uc_flood_set(port, false);
prestera_port_mc_flood_set(port, false);
prestera_port_learning_set(port, false);
+ prestera_port_br_locked_set(port, false);
}
static int prestera_br_port_flags_set(struct prestera_bridge_port *br_port,
@@ -162,6 +163,11 @@ static int prestera_br_port_flags_set(struct prestera_bridge_port *br_port,
if (err)
goto err_out;
+ err = prestera_port_br_locked_set(port,
+ br_port->flags & BR_PORT_LOCKED);
+ if (err)
+ goto err_out;
+
return 0;
err_out:
@@ -1163,7 +1169,7 @@ static int prestera_port_obj_attr_set(struct net_device *dev, const void *ctx,
break;
case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS:
if (attr->u.brport_flags.mask &
- ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD))
+ ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_PORT_LOCKED))
err = -EINVAL;
break;
case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 5ace4609de47..96c856ceb0f9 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1573,8 +1573,8 @@ static int mtk_xdp_submit_frame(struct mtk_eth *eth, struct xdp_frame *xdpf,
.last = !xdp_frame_has_frags(xdpf),
};
int err, index = 0, n_desc = 1, nr_frags;
- struct mtk_tx_dma *htxd, *txd, *txd_pdma;
struct mtk_tx_buf *htx_buf, *tx_buf;
+ struct mtk_tx_dma *htxd, *txd;
void *data = xdpf->data;
if (unlikely(test_bit(MTK_RESETTING, &eth->state)))
@@ -1608,7 +1608,6 @@ static int mtk_xdp_submit_frame(struct mtk_eth *eth, struct xdp_frame *xdpf,
if (MTK_HAS_CAPS(soc->caps, MTK_QDMA) || (index & 0x1)) {
txd = mtk_qdma_phys_to_virt(ring, txd->txd2);
- txd_pdma = qdma_to_pdma(ring, txd);
if (txd == ring->last_free)
goto unmap;
@@ -1629,7 +1628,8 @@ static int mtk_xdp_submit_frame(struct mtk_eth *eth, struct xdp_frame *xdpf,
htx_buf->data = xdpf;
if (!MTK_HAS_CAPS(soc->caps, MTK_QDMA)) {
- txd_pdma = qdma_to_pdma(ring, txd);
+ struct mtk_tx_dma *txd_pdma = qdma_to_pdma(ring, txd);
+
if (index & 1)
txd_pdma->txd2 |= TX_DMA_LS0;
else
@@ -1660,13 +1660,15 @@ static int mtk_xdp_submit_frame(struct mtk_eth *eth, struct xdp_frame *xdpf,
unmap:
while (htxd != txd) {
- txd_pdma = qdma_to_pdma(ring, htxd);
tx_buf = mtk_desc_to_tx_buf(ring, htxd, soc->txrx.txd_size);
mtk_tx_unmap(eth, tx_buf, NULL, false);
htxd->txd3 = TX_DMA_LS0 | TX_DMA_OWNER_CPU;
- if (!MTK_HAS_CAPS(soc->caps, MTK_QDMA))
+ if (!MTK_HAS_CAPS(soc->caps, MTK_QDMA)) {
+ struct mtk_tx_dma *txd_pdma = qdma_to_pdma(ring, htxd);
+
txd_pdma->txd2 = TX_DMA_DESP2_DEF;
+ }
htxd = mtk_qdma_phys_to_virt(ring, htxd->txd2);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index a560df446bac..e464024481b4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -856,11 +856,6 @@ enum {
MLX5E_STATE_XDP_ACTIVE,
};
-enum {
- MLX5E_TC_PRIO = 0,
- MLX5E_NIC_PRIO
-};
-
struct mlx5e_modify_sq_param {
int curr_state;
int next_state;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index 9b8cdf2e68ad..bf2741eb7f9b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -8,6 +8,7 @@
#include "lib/fs_ttc.h"
struct mlx5e_post_act;
+struct mlx5e_tc_table;
enum {
MLX5E_TC_FT_LEVEL = 0,
@@ -15,6 +16,11 @@ enum {
MLX5E_TC_MISS_LEVEL,
};
+enum {
+ MLX5E_TC_PRIO = 0,
+ MLX5E_NIC_PRIO
+};
+
struct mlx5e_flow_table {
int num_groups;
struct mlx5_flow_table *t;
@@ -83,54 +89,28 @@ enum {
#endif
};
-struct mlx5e_priv;
-
-#ifdef CONFIG_MLX5_EN_RXNFC
-
-struct mlx5e_ethtool_table {
- struct mlx5_flow_table *ft;
- int num_rules;
-};
-
-#define ETHTOOL_NUM_L3_L4_FTS 7
-#define ETHTOOL_NUM_L2_FTS 4
-
-struct mlx5e_ethtool_steering {
- struct mlx5e_ethtool_table l3_l4_ft[ETHTOOL_NUM_L3_L4_FTS];
- struct mlx5e_ethtool_table l2_ft[ETHTOOL_NUM_L2_FTS];
- struct list_head rules;
- int tot_num_rules;
-};
-
-void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv);
-void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv);
-int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd);
-int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv,
- struct ethtool_rxnfc *info, u32 *rule_locs);
-#else
-static inline void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv) { }
-static inline void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv) { }
-static inline int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd)
-{ return -EOPNOTSUPP; }
-static inline int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv,
- struct ethtool_rxnfc *info, u32 *rule_locs)
-{ return -EOPNOTSUPP; }
-#endif /* CONFIG_MLX5_EN_RXNFC */
+struct mlx5e_flow_steering;
+struct mlx5e_rx_res;
#ifdef CONFIG_MLX5_EN_ARFS
struct mlx5e_arfs_tables;
-int mlx5e_arfs_create_tables(struct mlx5e_priv *priv);
-void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv);
-int mlx5e_arfs_enable(struct mlx5e_priv *priv);
-int mlx5e_arfs_disable(struct mlx5e_priv *priv);
+int mlx5e_arfs_create_tables(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res, bool ntuple);
+void mlx5e_arfs_destroy_tables(struct mlx5e_flow_steering *fs, bool ntuple);
+int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs);
+int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs);
int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
u16 rxq_index, u32 flow_id);
#else
-static inline int mlx5e_arfs_create_tables(struct mlx5e_priv *priv) { return 0; }
-static inline void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv) {}
-static inline int mlx5e_arfs_enable(struct mlx5e_priv *priv) { return -EOPNOTSUPP; }
-static inline int mlx5e_arfs_disable(struct mlx5e_priv *priv) { return -EOPNOTSUPP; }
+static inline int mlx5e_arfs_create_tables(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res, bool ntuple)
+{ return 0; }
+static inline void mlx5e_arfs_destroy_tables(struct mlx5e_flow_steering *fs, bool ntuple) {}
+static inline int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs)
+{ return -EOPNOTSUPP; }
+static inline int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs)
+{ return -EOPNOTSUPP; }
#endif
#ifdef CONFIG_MLX5_EN_TLS
@@ -142,54 +122,63 @@ struct mlx5e_fs_udp;
struct mlx5e_fs_any;
struct mlx5e_ptp_fs;
-struct mlx5e_flow_steering {
- bool state_destroy;
- bool vlan_strip_disable;
- struct mlx5_core_dev *mdev;
- struct mlx5_flow_namespace *ns;
-#ifdef CONFIG_MLX5_EN_RXNFC
- struct mlx5e_ethtool_steering ethtool;
-#endif
- struct mlx5e_tc_table *tc;
- struct mlx5e_promisc_table promisc;
- struct mlx5e_vlan_table *vlan;
- struct mlx5e_l2_table l2;
- struct mlx5_ttc_table *ttc;
- struct mlx5_ttc_table *inner_ttc;
-#ifdef CONFIG_MLX5_EN_ARFS
- struct mlx5e_arfs_tables *arfs;
-#endif
-#ifdef CONFIG_MLX5_EN_TLS
- struct mlx5e_accel_fs_tcp *accel_tcp;
-#endif
- struct mlx5e_fs_udp *udp;
- struct mlx5e_fs_any *any;
- struct mlx5e_ptp_fs *ptp_fs;
-};
-
-void mlx5e_set_ttc_params(struct mlx5e_priv *priv,
+void mlx5e_set_ttc_params(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
struct ttc_params *ttc_params, bool tunnel);
-void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv);
-int mlx5e_create_ttc_table(struct mlx5e_priv *priv);
+void mlx5e_destroy_ttc_table(struct mlx5e_flow_steering *fs);
+int mlx5e_create_ttc_table(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res);
void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft);
-void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv);
-void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv);
+void mlx5e_enable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc);
+void mlx5e_disable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc);
-int mlx5e_create_flow_steering(struct mlx5e_priv *priv);
-void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv);
+int mlx5e_create_flow_steering(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
+ const struct mlx5e_profile *profile,
+ struct net_device *netdev);
+void mlx5e_destroy_flow_steering(struct mlx5e_flow_steering *fs, bool ntuple,
+ const struct mlx5e_profile *profile);
struct mlx5e_flow_steering *mlx5e_fs_init(const struct mlx5e_profile *profile,
struct mlx5_core_dev *mdev,
bool state_destroy);
void mlx5e_fs_cleanup(struct mlx5e_flow_steering *fs);
-
-int mlx5e_add_vlan_trap(struct mlx5e_priv *priv, int trap_id, int tir_num);
-void mlx5e_remove_vlan_trap(struct mlx5e_priv *priv);
-int mlx5e_add_mac_trap(struct mlx5e_priv *priv, int trap_id, int tir_num);
-void mlx5e_remove_mac_trap(struct mlx5e_priv *priv);
+struct mlx5e_vlan_table *mlx5e_fs_get_vlan(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_tc(struct mlx5e_flow_steering *fs, struct mlx5e_tc_table *tc);
+struct mlx5e_tc_table *mlx5e_fs_get_tc(struct mlx5e_flow_steering *fs);
+struct mlx5e_l2_table *mlx5e_fs_get_l2(struct mlx5e_flow_steering *fs);
+struct mlx5_flow_namespace *mlx5e_fs_get_ns(struct mlx5e_flow_steering *fs, bool egress);
+void mlx5e_fs_set_ns(struct mlx5e_flow_steering *fs, struct mlx5_flow_namespace *ns, bool egress);
+#ifdef CONFIG_MLX5_EN_RXNFC
+struct mlx5e_ethtool_steering *mlx5e_fs_get_ethtool(struct mlx5e_flow_steering *fs);
+#endif
+struct mlx5_ttc_table *mlx5e_fs_get_ttc(struct mlx5e_flow_steering *fs, bool inner);
+void mlx5e_fs_set_ttc(struct mlx5e_flow_steering *fs, struct mlx5_ttc_table *ttc, bool inner);
+#ifdef CONFIG_MLX5_EN_ARFS
+struct mlx5e_arfs_tables *mlx5e_fs_get_arfs(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_arfs(struct mlx5e_flow_steering *fs, struct mlx5e_arfs_tables *arfs);
+#endif
+struct mlx5e_ptp_fs *mlx5e_fs_get_ptp(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_ptp(struct mlx5e_flow_steering *fs, struct mlx5e_ptp_fs *ptp_fs);
+struct mlx5e_fs_any *mlx5e_fs_get_any(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_any(struct mlx5e_flow_steering *fs, struct mlx5e_fs_any *any);
+struct mlx5e_fs_udp *mlx5e_fs_get_udp(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_udp(struct mlx5e_flow_steering *fs, struct mlx5e_fs_udp *udp);
+#ifdef CONFIG_MLX5_EN_TLS
+struct mlx5e_accel_fs_tcp *mlx5e_fs_get_accel_tcp(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_accel_tcp(struct mlx5e_flow_steering *fs, struct mlx5e_accel_fs_tcp *accel_tcp);
+#endif
+void mlx5e_fs_set_state_destroy(struct mlx5e_flow_steering *fs, bool state_destroy);
+void mlx5e_fs_set_vlan_strip_disable(struct mlx5e_flow_steering *fs, bool vlan_strip_disable);
+
+struct mlx5_core_dev *mlx5e_fs_get_mdev(struct mlx5e_flow_steering *fs);
+int mlx5e_add_vlan_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num);
+void mlx5e_remove_vlan_trap(struct mlx5e_flow_steering *fs);
+int mlx5e_add_mac_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num);
+void mlx5e_remove_mac_trap(struct mlx5e_flow_steering *fs);
void mlx5e_fs_set_rx_mode_work(struct mlx5e_flow_steering *fs, struct net_device *netdev);
int mlx5e_fs_vlan_rx_add_vid(struct mlx5e_flow_steering *fs,
struct net_device *netdev,
@@ -198,5 +187,18 @@ int mlx5e_fs_vlan_rx_kill_vid(struct mlx5e_flow_steering *fs,
struct net_device *netdev,
__be16 proto, u16 vid);
void mlx5e_fs_init_l2_addr(struct mlx5e_flow_steering *fs, struct net_device *netdev);
+
+#define fs_err(fs, fmt, ...) \
+ mlx5_core_err(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__)
+
+#define fs_dbg(fs, fmt, ...) \
+ mlx5_core_dbg(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__)
+
+#define fs_warn(fs, fmt, ...) \
+ mlx5_core_warn(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__)
+
+#define fs_warn_once(fs, fmt, ...) \
+ mlx5_core_warn_once(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__)
+
#endif /* __MLX5E_FLOW_STEER_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h
new file mode 100644
index 000000000000..9e276fd3c0cf
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef __MLX5E_FS_ETHTOOL_H__
+#define __MLX5E_FS_ETHTOOL_H__
+
+struct mlx5e_priv;
+struct mlx5e_ethtool_steering;
+#ifdef CONFIG_MLX5_EN_RXNFC
+int mlx5e_ethtool_alloc(struct mlx5e_ethtool_steering **ethtool);
+void mlx5e_ethtool_free(struct mlx5e_ethtool_steering *ethtool);
+void mlx5e_ethtool_init_steering(struct mlx5e_flow_steering *fs);
+void mlx5e_ethtool_cleanup_steering(struct mlx5e_flow_steering *fs);
+int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd);
+int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv,
+ struct ethtool_rxnfc *info, u32 *rule_locs);
+#else
+static inline int mlx5e_ethtool_alloc(struct mlx5e_ethtool_steering **ethtool)
+{ return 0; }
+static inline void mlx5e_ethtool_free(struct mlx5e_ethtool_steering *ethtool) { }
+static inline void mlx5e_ethtool_init_steering(struct mlx5e_flow_steering *fs) { }
+static inline void mlx5e_ethtool_cleanup_steering(struct mlx5e_flow_steering *fs) { }
+static inline int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd)
+{ return -EOPNOTSUPP; }
+static inline int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv,
+ struct ethtool_rxnfc *info, u32 *rule_locs)
+{ return -EOPNOTSUPP; }
+#endif
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
index e153d6119e02..03cb79adf912 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
@@ -1,9 +1,9 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
-#include <linux/netdevice.h>
#include "en/fs_tt_redirect.h"
#include "fs_core.h"
+#include "mlx5_core.h"
enum fs_udp_type {
FS_IPV4_UDP,
@@ -74,17 +74,17 @@ static void fs_udp_set_dport_flow(struct mlx5_flow_spec *spec, enum fs_udp_type
}
struct mlx5_flow_handle *
-mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv,
+mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_flow_steering *fs,
enum mlx5_traffic_types ttc_type,
u32 tir_num, u16 d_port)
{
+ struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs);
enum fs_udp_type type = tt2fs_udp(ttc_type);
struct mlx5_flow_destination dest = {};
struct mlx5_flow_table *ft = NULL;
MLX5_DECLARE_FLOW_ACT(flow_act);
struct mlx5_flow_handle *rule;
struct mlx5_flow_spec *spec;
- struct mlx5e_fs_udp *fs_udp;
int err;
if (type == FS_UDP_NUM_TYPES)
@@ -94,7 +94,6 @@ mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv,
if (!spec)
return ERR_PTR(-ENOMEM);
- fs_udp = priv->fs->udp;
ft = fs_udp->tables[type].t;
fs_udp_set_dport_flow(spec, type, d_port);
@@ -106,31 +105,30 @@ mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv,
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
- netdev_err(priv->netdev, "%s: add %s rule failed, err %d\n",
- __func__, fs_udp_type2str(type), err);
+ fs_err(fs, "%s: add %s rule failed, err %d\n",
+ __func__, fs_udp_type2str(type), err);
}
return rule;
}
-static int fs_udp_add_default_rule(struct mlx5e_priv *priv, enum fs_udp_type type)
+static int fs_udp_add_default_rule(struct mlx5e_flow_steering *fs, enum fs_udp_type type)
{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs);
struct mlx5e_flow_table *fs_udp_t;
struct mlx5_flow_destination dest;
MLX5_DECLARE_FLOW_ACT(flow_act);
struct mlx5_flow_handle *rule;
- struct mlx5e_fs_udp *fs_udp;
int err;
- fs_udp = priv->fs->udp;
fs_udp_t = &fs_udp->tables[type];
- dest = mlx5_ttc_get_default_dest(priv->fs->ttc, fs_udp2tt(type));
+ dest = mlx5_ttc_get_default_dest(ttc, fs_udp2tt(type));
rule = mlx5_add_flow_rules(fs_udp_t->t, NULL, &flow_act, &dest, 1);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
- netdev_err(priv->netdev,
- "%s: add default rule failed, fs type=%d, err %d\n",
- __func__, type, err);
+ fs_err(fs, "%s: add default rule failed, fs type=%d, err %d\n",
+ __func__, type, err);
return err;
}
@@ -206,33 +204,36 @@ out:
return err;
}
-static int fs_udp_create_table(struct mlx5e_priv *priv, enum fs_udp_type type)
+static int fs_udp_create_table(struct mlx5e_flow_steering *fs, enum fs_udp_type type)
{
- struct mlx5e_flow_table *ft = &priv->fs->udp->tables[type];
+ struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false);
+ struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs);
struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5e_flow_table *ft;
int err;
+ ft = &fs_udp->tables[type];
ft->num_groups = 0;
ft_attr.max_fte = MLX5E_FS_UDP_TABLE_SIZE;
ft_attr.level = MLX5E_FS_TT_UDP_FT_LEVEL;
ft_attr.prio = MLX5E_NIC_PRIO;
- ft->t = mlx5_create_flow_table(priv->fs->ns, &ft_attr);
+ ft->t = mlx5_create_flow_table(ns, &ft_attr);
if (IS_ERR(ft->t)) {
err = PTR_ERR(ft->t);
ft->t = NULL;
return err;
}
- netdev_dbg(priv->netdev, "Created fs %s table id %u level %u\n",
- fs_udp_type2str(type), ft->t->id, ft->t->level);
+ mlx5_core_dbg(mlx5e_fs_get_mdev(fs), "Created fs %s table id %u level %u\n",
+ fs_udp_type2str(type), ft->t->id, ft->t->level);
err = fs_udp_create_groups(ft, type);
if (err)
goto err;
- err = fs_udp_add_default_rule(priv, type);
+ err = fs_udp_add_default_rule(fs, type);
if (err)
goto err;
@@ -253,17 +254,17 @@ static void fs_udp_destroy_table(struct mlx5e_fs_udp *fs_udp, int i)
fs_udp->tables[i].t = NULL;
}
-static int fs_udp_disable(struct mlx5e_priv *priv)
+static int fs_udp_disable(struct mlx5e_flow_steering *fs)
{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
int err, i;
for (i = 0; i < FS_UDP_NUM_TYPES; i++) {
/* Modify ttc rules destination to point back to the indir TIRs */
- err = mlx5_ttc_fwd_default_dest(priv->fs->ttc, fs_udp2tt(i));
+ err = mlx5_ttc_fwd_default_dest(ttc, fs_udp2tt(i));
if (err) {
- netdev_err(priv->netdev,
- "%s: modify ttc[%d] default destination failed, err(%d)\n",
- __func__, fs_udp2tt(i), err);
+ fs_err(fs, "%s: modify ttc[%d] default destination failed, err(%d)\n",
+ __func__, fs_udp2tt(i), err);
return err;
}
}
@@ -271,30 +272,31 @@ static int fs_udp_disable(struct mlx5e_priv *priv)
return 0;
}
-static int fs_udp_enable(struct mlx5e_priv *priv)
+static int fs_udp_enable(struct mlx5e_flow_steering *fs)
{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5e_fs_udp *udp = mlx5e_fs_get_udp(fs);
struct mlx5_flow_destination dest = {};
int err, i;
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
for (i = 0; i < FS_UDP_NUM_TYPES; i++) {
- dest.ft = priv->fs->udp->tables[i].t;
+ dest.ft = udp->tables[i].t;
/* Modify ttc rules destination to point on the accel_fs FTs */
- err = mlx5_ttc_fwd_dest(priv->fs->ttc, fs_udp2tt(i), &dest);
+ err = mlx5_ttc_fwd_dest(ttc, fs_udp2tt(i), &dest);
if (err) {
- netdev_err(priv->netdev,
- "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
- __func__, fs_udp2tt(i), err);
+ fs_err(fs, "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
+ __func__, fs_udp2tt(i), err);
return err;
}
}
return 0;
}
-void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_priv *priv)
+void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_flow_steering *fs)
{
- struct mlx5e_fs_udp *fs_udp = priv->fs->udp;
+ struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs);
int i;
if (!fs_udp)
@@ -303,48 +305,50 @@ void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_priv *priv)
if (--fs_udp->ref_cnt)
return;
- fs_udp_disable(priv);
+ fs_udp_disable(fs);
for (i = 0; i < FS_UDP_NUM_TYPES; i++)
fs_udp_destroy_table(fs_udp, i);
kfree(fs_udp);
- priv->fs->udp = NULL;
+ mlx5e_fs_set_udp(fs, NULL);
}
-int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_priv *priv)
+int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_flow_steering *fs)
{
+ struct mlx5e_fs_udp *udp = mlx5e_fs_get_udp(fs);
int i, err;
- if (priv->fs->udp) {
- priv->fs->udp->ref_cnt++;
+ if (udp) {
+ udp->ref_cnt++;
return 0;
}
- priv->fs->udp = kzalloc(sizeof(*priv->fs->udp), GFP_KERNEL);
- if (!priv->fs->udp)
+ udp = kzalloc(sizeof(*udp), GFP_KERNEL);
+ if (!udp)
return -ENOMEM;
+ mlx5e_fs_set_udp(fs, udp);
for (i = 0; i < FS_UDP_NUM_TYPES; i++) {
- err = fs_udp_create_table(priv, i);
+ err = fs_udp_create_table(fs, i);
if (err)
goto err_destroy_tables;
}
- err = fs_udp_enable(priv);
+ err = fs_udp_enable(fs);
if (err)
goto err_destroy_tables;
- priv->fs->udp->ref_cnt = 1;
+ udp->ref_cnt = 1;
return 0;
err_destroy_tables:
while (--i >= 0)
- fs_udp_destroy_table(priv->fs->udp, i);
+ fs_udp_destroy_table(udp, i);
- kfree(priv->fs->udp);
- priv->fs->udp = NULL;
+ kfree(udp);
+ mlx5e_fs_set_udp(fs, NULL);
return err;
}
@@ -356,22 +360,21 @@ static void fs_any_set_ethertype_flow(struct mlx5_flow_spec *spec, u16 ether_typ
}
struct mlx5_flow_handle *
-mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_priv *priv,
+mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_flow_steering *fs,
u32 tir_num, u16 ether_type)
{
+ struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs);
struct mlx5_flow_destination dest = {};
struct mlx5_flow_table *ft = NULL;
MLX5_DECLARE_FLOW_ACT(flow_act);
struct mlx5_flow_handle *rule;
struct mlx5_flow_spec *spec;
- struct mlx5e_fs_any *fs_any;
int err;
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
if (!spec)
return ERR_PTR(-ENOMEM);
- fs_any = priv->fs->any;
ft = fs_any->table.t;
fs_any_set_ethertype_flow(spec, ether_type);
@@ -383,31 +386,29 @@ mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_priv *priv,
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
- netdev_err(priv->netdev, "%s: add ANY rule failed, err %d\n",
- __func__, err);
+ fs_err(fs, "%s: add ANY rule failed, err %d\n",
+ __func__, err);
}
return rule;
}
-static int fs_any_add_default_rule(struct mlx5e_priv *priv)
+static int fs_any_add_default_rule(struct mlx5e_flow_steering *fs)
{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs);
struct mlx5e_flow_table *fs_any_t;
struct mlx5_flow_destination dest;
MLX5_DECLARE_FLOW_ACT(flow_act);
struct mlx5_flow_handle *rule;
- struct mlx5e_fs_any *fs_any;
int err;
- fs_any = priv->fs->any;
fs_any_t = &fs_any->table;
-
- dest = mlx5_ttc_get_default_dest(priv->fs->ttc, MLX5_TT_ANY);
+ dest = mlx5_ttc_get_default_dest(ttc, MLX5_TT_ANY);
rule = mlx5_add_flow_rules(fs_any_t->t, NULL, &flow_act, &dest, 1);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
- netdev_err(priv->netdev,
- "%s: add default rule failed, fs type=ANY, err %d\n",
- __func__, err);
+ fs_err(fs, "%s: add default rule failed, fs type=ANY, err %d\n",
+ __func__, err);
return err;
}
@@ -472,9 +473,11 @@ err:
return err;
}
-static int fs_any_create_table(struct mlx5e_priv *priv)
+static int fs_any_create_table(struct mlx5e_flow_steering *fs)
{
- struct mlx5e_flow_table *ft = &priv->fs->any->table;
+ struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false);
+ struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs);
+ struct mlx5e_flow_table *ft = &fs_any->table;
struct mlx5_flow_table_attr ft_attr = {};
int err;
@@ -484,21 +487,21 @@ static int fs_any_create_table(struct mlx5e_priv *priv)
ft_attr.level = MLX5E_FS_TT_ANY_FT_LEVEL;
ft_attr.prio = MLX5E_NIC_PRIO;
- ft->t = mlx5_create_flow_table(priv->fs->ns, &ft_attr);
+ ft->t = mlx5_create_flow_table(ns, &ft_attr);
if (IS_ERR(ft->t)) {
err = PTR_ERR(ft->t);
ft->t = NULL;
return err;
}
- netdev_dbg(priv->netdev, "Created fs ANY table id %u level %u\n",
- ft->t->id, ft->t->level);
+ mlx5_core_dbg(mlx5e_fs_get_mdev(fs), "Created fs ANY table id %u level %u\n",
+ ft->t->id, ft->t->level);
err = fs_any_create_groups(ft);
if (err)
goto err;
- err = fs_any_add_default_rule(priv);
+ err = fs_any_add_default_rule(fs);
if (err)
goto err;
@@ -509,35 +512,38 @@ err:
return err;
}
-static int fs_any_disable(struct mlx5e_priv *priv)
+static int fs_any_disable(struct mlx5e_flow_steering *fs)
{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
int err;
/* Modify ttc rules destination to point back to the indir TIRs */
- err = mlx5_ttc_fwd_default_dest(priv->fs->ttc, MLX5_TT_ANY);
+ err = mlx5_ttc_fwd_default_dest(ttc, MLX5_TT_ANY);
if (err) {
- netdev_err(priv->netdev,
- "%s: modify ttc[%d] default destination failed, err(%d)\n",
- __func__, MLX5_TT_ANY, err);
+ fs_err(fs,
+ "%s: modify ttc[%d] default destination failed, err(%d)\n",
+ __func__, MLX5_TT_ANY, err);
return err;
}
return 0;
}
-static int fs_any_enable(struct mlx5e_priv *priv)
+static int fs_any_enable(struct mlx5e_flow_steering *fs)
{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5e_fs_any *any = mlx5e_fs_get_any(fs);
struct mlx5_flow_destination dest = {};
int err;
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest.ft = priv->fs->any->table.t;
+ dest.ft = any->table.t;
/* Modify ttc rules destination to point on the accel_fs FTs */
- err = mlx5_ttc_fwd_dest(priv->fs->ttc, MLX5_TT_ANY, &dest);
+ err = mlx5_ttc_fwd_dest(ttc, MLX5_TT_ANY, &dest);
if (err) {
- netdev_err(priv->netdev,
- "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
- __func__, MLX5_TT_ANY, err);
+ fs_err(fs,
+ "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
+ __func__, MLX5_TT_ANY, err);
return err;
}
return 0;
@@ -553,9 +559,9 @@ static void fs_any_destroy_table(struct mlx5e_fs_any *fs_any)
fs_any->table.t = NULL;
}
-void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_priv *priv)
+void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_flow_steering *fs)
{
- struct mlx5e_fs_any *fs_any = priv->fs->any;
+ struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs);
if (!fs_any)
return;
@@ -563,43 +569,45 @@ void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_priv *priv)
if (--fs_any->ref_cnt)
return;
- fs_any_disable(priv);
+ fs_any_disable(fs);
fs_any_destroy_table(fs_any);
kfree(fs_any);
- priv->fs->any = NULL;
+ mlx5e_fs_set_any(fs, NULL);
}
-int mlx5e_fs_tt_redirect_any_create(struct mlx5e_priv *priv)
+int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs)
{
+ struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs);
int err;
- if (priv->fs->any) {
- priv->fs->any->ref_cnt++;
+ if (fs_any) {
+ fs_any->ref_cnt++;
return 0;
}
- priv->fs->any = kzalloc(sizeof(*priv->fs->any), GFP_KERNEL);
- if (!priv->fs->any)
+ fs_any = kzalloc(sizeof(*fs_any), GFP_KERNEL);
+ if (!fs_any)
return -ENOMEM;
+ mlx5e_fs_set_any(fs, fs_any);
- err = fs_any_create_table(priv);
+ err = fs_any_create_table(fs);
if (err)
return err;
- err = fs_any_enable(priv);
+ err = fs_any_enable(fs);
if (err)
goto err_destroy_table;
- priv->fs->any->ref_cnt = 1;
+ fs_any->ref_cnt = 1;
return 0;
err_destroy_table:
- fs_any_destroy_table(priv->fs->any);
+ fs_any_destroy_table(fs_any);
- kfree(priv->fs->any);
- priv->fs->any = NULL;
+ kfree(fs_any);
+ mlx5e_fs_set_any(fs, NULL);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h
index 7a70c4f38fda..5780fd7ad507 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h
@@ -4,23 +4,22 @@
#ifndef __MLX5E_FS_TT_REDIRECT_H__
#define __MLX5E_FS_TT_REDIRECT_H__
-#include "en.h"
#include "en/fs.h"
void mlx5e_fs_tt_redirect_del_rule(struct mlx5_flow_handle *rule);
/* UDP traffic type redirect */
struct mlx5_flow_handle *
-mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv,
+mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_flow_steering *fs,
enum mlx5_traffic_types ttc_type,
u32 tir_num, u16 d_port);
-void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_priv *priv);
-int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_priv *priv);
+void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_flow_steering *fs);
+int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_flow_steering *fs);
/* ANY traffic type redirect*/
struct mlx5_flow_handle *
-mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_priv *priv,
+mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_flow_steering *fs,
u32 tir_num, u16 ether_type);
-void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_priv *priv);
-int mlx5e_fs_tt_redirect_any_create(struct mlx5e_priv *priv);
+void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_flow_steering *fs);
+int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs);
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index 903de88bab53..6fefce30d296 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -622,37 +622,39 @@ static int mlx5e_ptp_set_state(struct mlx5e_ptp *c, struct mlx5e_params *params)
return bitmap_empty(c->state, MLX5E_PTP_STATE_NUM_STATES) ? -EINVAL : 0;
}
-static void mlx5e_ptp_rx_unset_fs(struct mlx5e_priv *priv)
+static void mlx5e_ptp_rx_unset_fs(struct mlx5e_flow_steering *fs)
{
- struct mlx5e_ptp_fs *ptp_fs = priv->fs->ptp_fs;
+ struct mlx5e_ptp_fs *ptp_fs = mlx5e_fs_get_ptp(fs);
if (!ptp_fs->valid)
return;
mlx5e_fs_tt_redirect_del_rule(ptp_fs->l2_rule);
- mlx5e_fs_tt_redirect_any_destroy(priv);
+ mlx5e_fs_tt_redirect_any_destroy(fs);
mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v6_rule);
mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v4_rule);
- mlx5e_fs_tt_redirect_udp_destroy(priv);
+ mlx5e_fs_tt_redirect_udp_destroy(fs);
ptp_fs->valid = false;
}
static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv)
{
u32 tirn = mlx5e_rx_res_get_tirn_ptp(priv->rx_res);
- struct mlx5e_ptp_fs *ptp_fs = priv->fs->ptp_fs;
+ struct mlx5e_flow_steering *fs = priv->fs;
struct mlx5_flow_handle *rule;
+ struct mlx5e_ptp_fs *ptp_fs;
int err;
+ ptp_fs = mlx5e_fs_get_ptp(fs);
if (ptp_fs->valid)
return 0;
- err = mlx5e_fs_tt_redirect_udp_create(priv);
+ err = mlx5e_fs_tt_redirect_udp_create(fs);
if (err)
goto out_free;
- rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5_TT_IPV4_UDP,
+ rule = mlx5e_fs_tt_redirect_udp_add_rule(fs, MLX5_TT_IPV4_UDP,
tirn, PTP_EV_PORT);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
@@ -660,7 +662,7 @@ static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv)
}
ptp_fs->udp_v4_rule = rule;
- rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5_TT_IPV6_UDP,
+ rule = mlx5e_fs_tt_redirect_udp_add_rule(fs, MLX5_TT_IPV6_UDP,
tirn, PTP_EV_PORT);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
@@ -668,11 +670,11 @@ static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv)
}
ptp_fs->udp_v6_rule = rule;
- err = mlx5e_fs_tt_redirect_any_create(priv);
+ err = mlx5e_fs_tt_redirect_any_create(fs);
if (err)
goto out_destroy_udp_v6_rule;
- rule = mlx5e_fs_tt_redirect_any_add_rule(priv, tirn, ETH_P_1588);
+ rule = mlx5e_fs_tt_redirect_any_add_rule(fs, tirn, ETH_P_1588);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
goto out_destroy_fs_any;
@@ -683,13 +685,13 @@ static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv)
return 0;
out_destroy_fs_any:
- mlx5e_fs_tt_redirect_any_destroy(priv);
+ mlx5e_fs_tt_redirect_any_destroy(fs);
out_destroy_udp_v6_rule:
mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v6_rule);
out_destroy_udp_v4_rule:
mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v4_rule);
out_destroy_fs_udp:
- mlx5e_fs_tt_redirect_udp_destroy(priv);
+ mlx5e_fs_tt_redirect_udp_destroy(fs);
out_free:
return err;
}
@@ -797,29 +799,31 @@ int mlx5e_ptp_get_rqn(struct mlx5e_ptp *c, u32 *rqn)
return 0;
}
-int mlx5e_ptp_alloc_rx_fs(struct mlx5e_priv *priv)
+int mlx5e_ptp_alloc_rx_fs(struct mlx5e_flow_steering *fs,
+ const struct mlx5e_profile *profile)
{
struct mlx5e_ptp_fs *ptp_fs;
- if (!mlx5e_profile_feature_cap(priv->profile, PTP_RX))
+ if (!mlx5e_profile_feature_cap(profile, PTP_RX))
return 0;
ptp_fs = kzalloc(sizeof(*ptp_fs), GFP_KERNEL);
if (!ptp_fs)
return -ENOMEM;
+ mlx5e_fs_set_ptp(fs, ptp_fs);
- priv->fs->ptp_fs = ptp_fs;
return 0;
}
-void mlx5e_ptp_free_rx_fs(struct mlx5e_priv *priv)
+void mlx5e_ptp_free_rx_fs(struct mlx5e_flow_steering *fs,
+ const struct mlx5e_profile *profile)
{
- struct mlx5e_ptp_fs *ptp_fs = priv->fs->ptp_fs;
+ struct mlx5e_ptp_fs *ptp_fs = mlx5e_fs_get_ptp(fs);
- if (!mlx5e_profile_feature_cap(priv->profile, PTP_RX))
+ if (!mlx5e_profile_feature_cap(profile, PTP_RX))
return;
- mlx5e_ptp_rx_unset_fs(priv);
+ mlx5e_ptp_rx_unset_fs(fs);
kfree(ptp_fs);
}
@@ -845,6 +849,6 @@ int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set)
netdev_WARN_ONCE(priv->netdev, "Don't try to remove PTP RX-FS rules");
return -EINVAL;
}
- mlx5e_ptp_rx_unset_fs(priv);
+ mlx5e_ptp_rx_unset_fs(priv->fs);
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
index 92dbbec472ec..5bce554e131a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
@@ -74,8 +74,10 @@ void mlx5e_ptp_close(struct mlx5e_ptp *c);
void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c);
void mlx5e_ptp_deactivate_channel(struct mlx5e_ptp *c);
int mlx5e_ptp_get_rqn(struct mlx5e_ptp *c, u32 *rqn);
-int mlx5e_ptp_alloc_rx_fs(struct mlx5e_priv *priv);
-void mlx5e_ptp_free_rx_fs(struct mlx5e_priv *priv);
+int mlx5e_ptp_alloc_rx_fs(struct mlx5e_flow_steering *fs,
+ const struct mlx5e_profile *profile);
+void mlx5e_ptp_free_rx_fs(struct mlx5e_flow_steering *fs,
+ const struct mlx5e_profile *profile);
int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set);
enum {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c
index 69949ab830b6..25174f68613e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c
@@ -12,6 +12,7 @@ validate_goto_chain(struct mlx5e_priv *priv,
const struct flow_action_entry *act,
struct netlink_ext_ack *extack)
{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
bool is_esw = mlx5e_is_eswitch_flow(flow);
bool ft_flow = mlx5e_is_ft_flow(flow);
u32 dest_chain = act->chain_index;
@@ -21,7 +22,7 @@ validate_goto_chain(struct mlx5e_priv *priv,
u32 max_chain;
esw = priv->mdev->priv.eswitch;
- chains = is_esw ? esw_chains(esw) : mlx5e_nic_chains(priv->fs->tc);
+ chains = is_esw ? esw_chains(esw) : mlx5e_nic_chains(tc);
max_chain = mlx5_chains_get_chain_range(chains);
reformat_and_fwd = is_esw ?
MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_and_fwd_to_table) :
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
index 11f2a7fb72a9..46c2e5f9c05c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
@@ -230,12 +230,12 @@ static int mlx5e_handle_action_trap(struct mlx5e_priv *priv, int trap_id)
switch (trap_id) {
case DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER:
- err = mlx5e_add_vlan_trap(priv, trap_id, mlx5e_trap_get_tirn(priv->en_trap));
+ err = mlx5e_add_vlan_trap(priv->fs, trap_id, mlx5e_trap_get_tirn(priv->en_trap));
if (err)
goto err_out;
break;
case DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER:
- err = mlx5e_add_mac_trap(priv, trap_id, mlx5e_trap_get_tirn(priv->en_trap));
+ err = mlx5e_add_mac_trap(priv->fs, trap_id, mlx5e_trap_get_tirn(priv->en_trap));
if (err)
goto err_out;
break;
@@ -256,10 +256,10 @@ static int mlx5e_handle_action_drop(struct mlx5e_priv *priv, int trap_id)
{
switch (trap_id) {
case DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER:
- mlx5e_remove_vlan_trap(priv);
+ mlx5e_remove_vlan_trap(priv->fs);
break;
case DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER:
- mlx5e_remove_mac_trap(priv);
+ mlx5e_remove_mac_trap(priv->fs);
break;
default:
netdev_warn(priv->netdev, "%s: Unknown trap id %d\n", __func__, trap_id);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
index 20a4f1e585af..285d32d2fd08 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
-#include <linux/netdevice.h>
+#include <mlx5_core.h>
#include "en_accel/fs_tcp.h"
#include "fs_core.h"
@@ -71,13 +71,13 @@ void mlx5e_accel_fs_del_sk(struct mlx5_flow_handle *rule)
mlx5_del_flow_rules(rule);
}
-struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_priv *priv,
+struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_flow_steering *fs,
struct sock *sk, u32 tirn,
uint32_t flow_tag)
{
+ struct mlx5e_accel_fs_tcp *fs_tcp = mlx5e_fs_get_accel_tcp(fs);
struct mlx5_flow_destination dest = {};
struct mlx5e_flow_table *ft = NULL;
- struct mlx5e_accel_fs_tcp *fs_tcp;
MLX5_DECLARE_FLOW_ACT(flow_act);
struct mlx5_flow_handle *flow;
struct mlx5_flow_spec *spec;
@@ -86,19 +86,17 @@ struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_priv *priv,
if (!spec)
return ERR_PTR(-ENOMEM);
- fs_tcp = priv->fs->accel_tcp;
-
spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
switch (sk->sk_family) {
case AF_INET:
accel_fs_tcp_set_ipv4_flow(spec, sk);
ft = &fs_tcp->tables[ACCEL_FS_IPV4_TCP];
- mlx5e_dbg(HW, priv, "%s flow is %pI4:%d -> %pI4:%d\n", __func__,
- &inet_sk(sk)->inet_rcv_saddr,
- inet_sk(sk)->inet_sport,
- &inet_sk(sk)->inet_daddr,
- inet_sk(sk)->inet_dport);
+ fs_dbg(fs, "%s flow is %pI4:%d -> %pI4:%d\n", __func__,
+ &inet_sk(sk)->inet_rcv_saddr,
+ inet_sk(sk)->inet_sport,
+ &inet_sk(sk)->inet_daddr,
+ inet_sk(sk)->inet_dport);
break;
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
@@ -140,34 +138,32 @@ struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_priv *priv,
flow = mlx5_add_flow_rules(ft->t, spec, &flow_act, &dest, 1);
if (IS_ERR(flow))
- netdev_err(priv->netdev, "mlx5_add_flow_rules() failed, flow is %ld\n",
- PTR_ERR(flow));
+ fs_err(fs, "mlx5_add_flow_rules() failed, flow is %ld\n", PTR_ERR(flow));
out:
kvfree(spec);
return flow;
}
-static int accel_fs_tcp_add_default_rule(struct mlx5e_priv *priv,
+static int accel_fs_tcp_add_default_rule(struct mlx5e_flow_steering *fs,
enum accel_fs_tcp_type type)
{
+ struct mlx5e_accel_fs_tcp *fs_tcp = mlx5e_fs_get_accel_tcp(fs);
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
struct mlx5e_flow_table *accel_fs_t;
struct mlx5_flow_destination dest;
- struct mlx5e_accel_fs_tcp *fs_tcp;
MLX5_DECLARE_FLOW_ACT(flow_act);
struct mlx5_flow_handle *rule;
int err = 0;
- fs_tcp = priv->fs->accel_tcp;
accel_fs_t = &fs_tcp->tables[type];
- dest = mlx5_ttc_get_default_dest(priv->fs->ttc, fs_accel2tt(type));
+ dest = mlx5_ttc_get_default_dest(ttc, fs_accel2tt(type));
rule = mlx5_add_flow_rules(accel_fs_t->t, NULL, &flow_act, &dest, 1);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
- netdev_err(priv->netdev,
- "%s: add default rule failed, accel_fs type=%d, err %d\n",
- __func__, type, err);
+ fs_err(fs, "%s: add default rule failed, accel_fs type=%d, err %d\n",
+ __func__, type, err);
return err;
}
@@ -265,9 +261,11 @@ out:
return err;
}
-static int accel_fs_tcp_create_table(struct mlx5e_priv *priv, enum accel_fs_tcp_type type)
+static int accel_fs_tcp_create_table(struct mlx5e_flow_steering *fs, enum accel_fs_tcp_type type)
{
- struct mlx5e_flow_table *ft = &priv->fs->accel_tcp->tables[type];
+ struct mlx5e_accel_fs_tcp *accel_tcp = mlx5e_fs_get_accel_tcp(fs);
+ struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false);
+ struct mlx5e_flow_table *ft = &accel_tcp->tables[type];
struct mlx5_flow_table_attr ft_attr = {};
int err;
@@ -277,21 +275,21 @@ static int accel_fs_tcp_create_table(struct mlx5e_priv *priv, enum accel_fs_tcp_
ft_attr.level = MLX5E_ACCEL_FS_TCP_FT_LEVEL;
ft_attr.prio = MLX5E_NIC_PRIO;
- ft->t = mlx5_create_flow_table(priv->fs->ns, &ft_attr);
+ ft->t = mlx5_create_flow_table(ns, &ft_attr);
if (IS_ERR(ft->t)) {
err = PTR_ERR(ft->t);
ft->t = NULL;
return err;
}
- netdev_dbg(priv->netdev, "Created fs accel table id %u level %u\n",
- ft->t->id, ft->t->level);
+ fs_dbg(fs, "Created fs accel table id %u level %u\n",
+ ft->t->id, ft->t->level);
err = accel_fs_tcp_create_groups(ft, type);
if (err)
goto err;
- err = accel_fs_tcp_add_default_rule(priv, type);
+ err = accel_fs_tcp_add_default_rule(fs, type);
if (err)
goto err;
@@ -301,17 +299,18 @@ err:
return err;
}
-static int accel_fs_tcp_disable(struct mlx5e_priv *priv)
+static int accel_fs_tcp_disable(struct mlx5e_flow_steering *fs)
{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
int err, i;
for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) {
/* Modify ttc rules destination to point back to the indir TIRs */
- err = mlx5_ttc_fwd_default_dest(priv->fs->ttc, fs_accel2tt(i));
+ err = mlx5_ttc_fwd_default_dest(ttc, fs_accel2tt(i));
if (err) {
- netdev_err(priv->netdev,
- "%s: modify ttc[%d] default destination failed, err(%d)\n",
- __func__, fs_accel2tt(i), err);
+ fs_err(fs,
+ "%s: modify ttc[%d] default destination failed, err(%d)\n",
+ __func__, fs_accel2tt(i), err);
return err;
}
}
@@ -319,32 +318,32 @@ static int accel_fs_tcp_disable(struct mlx5e_priv *priv)
return 0;
}
-static int accel_fs_tcp_enable(struct mlx5e_priv *priv)
+static int accel_fs_tcp_enable(struct mlx5e_flow_steering *fs)
{
+ struct mlx5e_accel_fs_tcp *accel_tcp = mlx5e_fs_get_accel_tcp(fs);
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
struct mlx5_flow_destination dest = {};
int err, i;
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) {
- dest.ft = priv->fs->accel_tcp->tables[i].t;
+ dest.ft = accel_tcp->tables[i].t;
/* Modify ttc rules destination to point on the accel_fs FTs */
- err = mlx5_ttc_fwd_dest(priv->fs->ttc, fs_accel2tt(i), &dest);
+ err = mlx5_ttc_fwd_dest(ttc, fs_accel2tt(i), &dest);
if (err) {
- netdev_err(priv->netdev,
- "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
- __func__, fs_accel2tt(i), err);
+ fs_err(fs, "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
+ __func__, fs_accel2tt(i), err);
return err;
}
}
return 0;
}
-static void accel_fs_tcp_destroy_table(struct mlx5e_priv *priv, int i)
+static void accel_fs_tcp_destroy_table(struct mlx5e_flow_steering *fs, int i)
{
- struct mlx5e_accel_fs_tcp *fs_tcp;
+ struct mlx5e_accel_fs_tcp *fs_tcp = mlx5e_fs_get_accel_tcp(fs);
- fs_tcp = priv->fs->accel_tcp;
if (IS_ERR_OR_NULL(fs_tcp->tables[i].t))
return;
@@ -353,40 +352,43 @@ static void accel_fs_tcp_destroy_table(struct mlx5e_priv *priv, int i)
fs_tcp->tables[i].t = NULL;
}
-void mlx5e_accel_fs_tcp_destroy(struct mlx5e_priv *priv)
+void mlx5e_accel_fs_tcp_destroy(struct mlx5e_flow_steering *fs)
{
+ struct mlx5e_accel_fs_tcp *accel_tcp = mlx5e_fs_get_accel_tcp(fs);
int i;
- if (!priv->fs->accel_tcp)
+ if (!accel_tcp)
return;
- accel_fs_tcp_disable(priv);
+ accel_fs_tcp_disable(fs);
for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++)
- accel_fs_tcp_destroy_table(priv, i);
+ accel_fs_tcp_destroy_table(fs, i);
- kfree(priv->fs->accel_tcp);
- priv->fs->accel_tcp = NULL;
+ kfree(accel_tcp);
+ mlx5e_fs_set_accel_tcp(fs, NULL);
}
-int mlx5e_accel_fs_tcp_create(struct mlx5e_priv *priv)
+int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs)
{
+ struct mlx5e_accel_fs_tcp *accel_tcp;
int i, err;
- if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version))
+ if (!MLX5_CAP_FLOWTABLE_NIC_RX(mlx5e_fs_get_mdev(fs), ft_field_support.outer_ip_version))
return -EOPNOTSUPP;
- priv->fs->accel_tcp = kzalloc(sizeof(*priv->fs->accel_tcp), GFP_KERNEL);
- if (!priv->fs->accel_tcp)
+ accel_tcp = kvzalloc(sizeof(*accel_tcp), GFP_KERNEL);
+ if (!accel_tcp)
return -ENOMEM;
+ mlx5e_fs_set_accel_tcp(fs, accel_tcp);
for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) {
- err = accel_fs_tcp_create_table(priv, i);
+ err = accel_fs_tcp_create_table(fs, i);
if (err)
goto err_destroy_tables;
}
- err = accel_fs_tcp_enable(priv);
+ err = accel_fs_tcp_enable(fs);
if (err)
goto err_destroy_tables;
@@ -394,9 +396,8 @@ int mlx5e_accel_fs_tcp_create(struct mlx5e_priv *priv)
err_destroy_tables:
while (--i >= 0)
- accel_fs_tcp_destroy_table(priv, i);
-
- kfree(priv->fs->accel_tcp);
- priv->fs->accel_tcp = NULL;
+ accel_fs_tcp_destroy_table(fs, i);
+ kfree(accel_tcp);
+ mlx5e_fs_set_accel_tcp(fs, NULL);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h
index 589235824543..a032bff482a6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h
@@ -4,19 +4,19 @@
#ifndef __MLX5E_ACCEL_FS_TCP_H__
#define __MLX5E_ACCEL_FS_TCP_H__
-#include "en.h"
+#include "en/fs.h"
#ifdef CONFIG_MLX5_EN_TLS
-int mlx5e_accel_fs_tcp_create(struct mlx5e_priv *priv);
-void mlx5e_accel_fs_tcp_destroy(struct mlx5e_priv *priv);
-struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_priv *priv,
+int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs);
+void mlx5e_accel_fs_tcp_destroy(struct mlx5e_flow_steering *fs);
+struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_flow_steering *fs,
struct sock *sk, u32 tirn,
uint32_t flow_tag);
void mlx5e_accel_fs_del_sk(struct mlx5_flow_handle *rule);
#else
-static inline int mlx5e_accel_fs_tcp_create(struct mlx5e_priv *priv) { return 0; }
-static inline void mlx5e_accel_fs_tcp_destroy(struct mlx5e_priv *priv) {}
-static inline struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_priv *priv,
+static inline int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs) { return 0; }
+static inline void mlx5e_accel_fs_tcp_destroy(struct mlx5e_flow_steering *fs) {}
+static inline struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_flow_steering *fs,
struct sock *sk, u32 tirn,
uint32_t flow_tag)
{ return ERR_PTR(-EOPNOTSUPP); }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index f8113fd23265..e776b9f2da06 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -174,6 +174,8 @@ static void rx_destroy(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
static int rx_create(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
{
+ struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(priv->fs, false);
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(priv->fs, false);
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5e_accel_fs_esp_prot *fs_prot;
struct mlx5e_accel_fs_esp *accel_esp;
@@ -182,15 +184,14 @@ static int rx_create(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
accel_esp = priv->ipsec->rx_fs;
fs_prot = &accel_esp->fs_prot[type];
-
fs_prot->default_dest =
- mlx5_ttc_get_default_dest(priv->fs->ttc, fs_esp2tt(type));
+ mlx5_ttc_get_default_dest(ttc, fs_esp2tt(type));
ft_attr.max_fte = 1;
ft_attr.autogroup.max_num_groups = 1;
ft_attr.level = MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL;
ft_attr.prio = MLX5E_NIC_PRIO;
- ft = mlx5_create_auto_grouped_flow_table(priv->fs->ns, &ft_attr);
+ ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
if (IS_ERR(ft))
return PTR_ERR(ft);
@@ -205,7 +206,7 @@ static int rx_create(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
ft_attr.prio = MLX5E_NIC_PRIO;
ft_attr.autogroup.num_reserved_entries = 1;
ft_attr.autogroup.max_num_groups = 1;
- ft = mlx5_create_auto_grouped_flow_table(priv->fs->ns, &ft_attr);
+ ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
if (IS_ERR(ft)) {
err = PTR_ERR(ft);
goto err_fs_ft;
@@ -230,6 +231,7 @@ err_add:
static int rx_ft_get(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(priv->fs, false);
struct mlx5e_accel_fs_esp_prot *fs_prot;
struct mlx5_flow_destination dest = {};
struct mlx5e_accel_fs_esp *accel_esp;
@@ -249,7 +251,7 @@ static int rx_ft_get(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
/* connect */
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
dest.ft = fs_prot->ft;
- mlx5_ttc_fwd_dest(priv->fs->ttc, fs_esp2tt(type), &dest);
+ mlx5_ttc_fwd_dest(ttc, fs_esp2tt(type), &dest);
skip:
fs_prot->refcnt++;
@@ -260,6 +262,7 @@ out:
static void rx_ft_put(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(priv->fs, false);
struct mlx5e_accel_fs_esp_prot *fs_prot;
struct mlx5e_accel_fs_esp *accel_esp;
@@ -271,7 +274,7 @@ static void rx_ft_put(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
goto out;
/* disconnect */
- mlx5_ttc_fwd_default_dest(priv->fs->ttc, fs_esp2tt(type));
+ mlx5_ttc_fwd_default_dest(ttc, fs_esp2tt(type));
/* remove FT */
rx_destroy(priv, type);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
index 30a70d139046..c0b77963cc7c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
@@ -118,9 +118,9 @@ int mlx5e_ktls_set_feature_rx(struct net_device *netdev, bool enable)
mutex_lock(&priv->state_lock);
if (enable)
- err = mlx5e_accel_fs_tcp_create(priv);
+ err = mlx5e_accel_fs_tcp_create(priv->fs);
else
- mlx5e_accel_fs_tcp_destroy(priv);
+ mlx5e_accel_fs_tcp_destroy(priv->fs);
mutex_unlock(&priv->state_lock);
return err;
@@ -138,7 +138,7 @@ int mlx5e_ktls_init_rx(struct mlx5e_priv *priv)
return -ENOMEM;
if (priv->netdev->features & NETIF_F_HW_TLS_RX) {
- err = mlx5e_accel_fs_tcp_create(priv);
+ err = mlx5e_accel_fs_tcp_create(priv->fs);
if (err) {
destroy_workqueue(priv->tls->rx_wq);
return err;
@@ -154,7 +154,7 @@ void mlx5e_ktls_cleanup_rx(struct mlx5e_priv *priv)
return;
if (priv->netdev->features & NETIF_F_HW_TLS_RX)
- mlx5e_accel_fs_tcp_destroy(priv);
+ mlx5e_accel_fs_tcp_destroy(priv->fs);
destroy_workqueue(priv->tls->rx_wq);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
index 27483aa7be8a..13145ecaf839 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
@@ -111,7 +111,7 @@ static void accel_rule_handle_work(struct work_struct *work)
if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags)))
goto out;
- rule = mlx5e_accel_fs_add_sk(accel_rule->priv, priv_rx->sk,
+ rule = mlx5e_accel_fs_add_sk(accel_rule->priv->fs, priv_rx->sk,
mlx5e_tir_get_tirn(&priv_rx->tir),
MLX5_FS_DEFAULT_FLOW_TAG);
if (!IS_ERR_OR_NULL(rule))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
index cd7f245dcf14..0ae1865086ff 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -114,47 +114,49 @@ static enum mlx5_traffic_types arfs_get_tt(enum arfs_type type)
}
}
-static int arfs_disable(struct mlx5e_priv *priv)
+static int arfs_disable(struct mlx5e_flow_steering *fs)
{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
int err, i;
for (i = 0; i < ARFS_NUM_TYPES; i++) {
/* Modify ttc rules destination back to their default */
- err = mlx5_ttc_fwd_default_dest(priv->fs->ttc, arfs_get_tt(i));
+ err = mlx5_ttc_fwd_default_dest(ttc, arfs_get_tt(i));
if (err) {
- netdev_err(priv->netdev,
- "%s: modify ttc[%d] default destination failed, err(%d)\n",
- __func__, arfs_get_tt(i), err);
+ fs_err(fs,
+ "%s: modify ttc[%d] default destination failed, err(%d)\n",
+ __func__, arfs_get_tt(i), err);
return err;
}
}
return 0;
}
-static void arfs_del_rules(struct mlx5e_priv *priv);
+static void arfs_del_rules(struct mlx5e_flow_steering *fs);
-int mlx5e_arfs_disable(struct mlx5e_priv *priv)
+int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs)
{
- arfs_del_rules(priv);
+ arfs_del_rules(fs);
- return arfs_disable(priv);
+ return arfs_disable(fs);
}
-int mlx5e_arfs_enable(struct mlx5e_priv *priv)
+int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs)
{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs);
struct mlx5_flow_destination dest = {};
int err, i;
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
for (i = 0; i < ARFS_NUM_TYPES; i++) {
- dest.ft = priv->fs->arfs->arfs_tables[i].ft.t;
+ dest.ft = arfs->arfs_tables[i].ft.t;
/* Modify ttc rules destination to point on the aRFS FTs */
- err = mlx5_ttc_fwd_dest(priv->fs->ttc, arfs_get_tt(i), &dest);
+ err = mlx5_ttc_fwd_dest(ttc, arfs_get_tt(i), &dest);
if (err) {
- netdev_err(priv->netdev,
- "%s: modify ttc[%d] dest to arfs, failed err(%d)\n",
- __func__, arfs_get_tt(i), err);
- arfs_disable(priv);
+ fs_err(fs, "%s: modify ttc[%d] dest to arfs, failed err(%d)\n",
+ __func__, arfs_get_tt(i), err);
+ arfs_disable(fs);
return err;
}
}
@@ -167,31 +169,37 @@ static void arfs_destroy_table(struct arfs_table *arfs_t)
mlx5e_destroy_flow_table(&arfs_t->ft);
}
-static void _mlx5e_cleanup_tables(struct mlx5e_priv *priv)
+static void _mlx5e_cleanup_tables(struct mlx5e_flow_steering *fs)
{
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs);
int i;
- arfs_del_rules(priv);
- destroy_workqueue(priv->fs->arfs->wq);
+ arfs_del_rules(fs);
+ destroy_workqueue(arfs->wq);
for (i = 0; i < ARFS_NUM_TYPES; i++) {
- if (!IS_ERR_OR_NULL(priv->fs->arfs->arfs_tables[i].ft.t))
- arfs_destroy_table(&priv->fs->arfs->arfs_tables[i]);
+ if (!IS_ERR_OR_NULL(arfs->arfs_tables[i].ft.t))
+ arfs_destroy_table(&arfs->arfs_tables[i]);
}
}
-void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv)
+void mlx5e_arfs_destroy_tables(struct mlx5e_flow_steering *fs, bool ntuple)
{
- if (!(priv->netdev->hw_features & NETIF_F_NTUPLE))
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs);
+
+ if (!ntuple)
return;
- _mlx5e_cleanup_tables(priv);
- kvfree(priv->fs->arfs);
+ _mlx5e_cleanup_tables(fs);
+ mlx5e_fs_set_arfs(fs, NULL);
+ kvfree(arfs);
}
-static int arfs_add_default_rule(struct mlx5e_priv *priv,
+static int arfs_add_default_rule(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
enum arfs_type type)
{
- struct arfs_table *arfs_t = &priv->fs->arfs->arfs_tables[type];
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs);
+ struct arfs_table *arfs_t = &arfs->arfs_tables[type];
struct mlx5_flow_destination dest = {};
MLX5_DECLARE_FLOW_ACT(flow_act);
enum mlx5_traffic_types tt;
@@ -200,23 +208,21 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
tt = arfs_get_tt(type);
if (tt == -EINVAL) {
- netdev_err(priv->netdev, "%s: bad arfs_type: %d\n",
- __func__, type);
+ fs_err(fs, "%s: bad arfs_type: %d\n", __func__, type);
return -EINVAL;
}
/* FIXME: Must use mlx5_ttc_get_default_dest(),
* but can't since TTC default is not setup yet !
*/
- dest.tir_num = mlx5e_rx_res_get_tirn_rss(priv->rx_res, tt);
+ dest.tir_num = mlx5e_rx_res_get_tirn_rss(rx_res, tt);
arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, NULL,
&flow_act,
&dest, 1);
if (IS_ERR(arfs_t->default_rule)) {
err = PTR_ERR(arfs_t->default_rule);
arfs_t->default_rule = NULL;
- netdev_err(priv->netdev, "%s: add rule failed, arfs type=%d\n",
- __func__, type);
+ fs_err(fs, "%s: add rule failed, arfs type=%d\n", __func__, type);
}
return err;
@@ -318,10 +324,12 @@ out:
return err;
}
-static int arfs_create_table(struct mlx5e_priv *priv,
+static int arfs_create_table(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
enum arfs_type type)
{
- struct mlx5e_arfs_tables *arfs = priv->fs->arfs;
+ struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false);
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs);
struct mlx5e_flow_table *ft = &arfs->arfs_tables[type].ft;
struct mlx5_flow_table_attr ft_attr = {};
int err;
@@ -332,7 +340,7 @@ static int arfs_create_table(struct mlx5e_priv *priv,
ft_attr.level = MLX5E_ARFS_FT_LEVEL;
ft_attr.prio = MLX5E_NIC_PRIO;
- ft->t = mlx5_create_flow_table(priv->fs->ns, &ft_attr);
+ ft->t = mlx5_create_flow_table(ns, &ft_attr);
if (IS_ERR(ft->t)) {
err = PTR_ERR(ft->t);
ft->t = NULL;
@@ -343,7 +351,7 @@ static int arfs_create_table(struct mlx5e_priv *priv,
if (err)
goto err;
- err = arfs_add_default_rule(priv, type);
+ err = arfs_add_default_rule(fs, rx_res, type);
if (err)
goto err;
@@ -353,35 +361,40 @@ err:
return err;
}
-int mlx5e_arfs_create_tables(struct mlx5e_priv *priv)
+int mlx5e_arfs_create_tables(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res, bool ntuple)
{
+ struct mlx5e_arfs_tables *arfs;
int err = -ENOMEM;
int i;
- if (!(priv->netdev->hw_features & NETIF_F_NTUPLE))
+ if (!ntuple)
return 0;
- priv->fs->arfs = kvzalloc(sizeof(*priv->fs->arfs), GFP_KERNEL);
- if (!priv->fs->arfs)
+ arfs = kvzalloc(sizeof(*arfs), GFP_KERNEL);
+ if (!arfs)
return -ENOMEM;
- spin_lock_init(&priv->fs->arfs->arfs_lock);
- INIT_LIST_HEAD(&priv->fs->arfs->rules);
- priv->fs->arfs->wq = create_singlethread_workqueue("mlx5e_arfs");
- if (!priv->fs->arfs->wq)
+ spin_lock_init(&arfs->arfs_lock);
+ INIT_LIST_HEAD(&arfs->rules);
+ arfs->wq = create_singlethread_workqueue("mlx5e_arfs");
+ if (!arfs->wq)
goto err;
+ mlx5e_fs_set_arfs(fs, arfs);
+
for (i = 0; i < ARFS_NUM_TYPES; i++) {
- err = arfs_create_table(priv, i);
+ err = arfs_create_table(fs, rx_res, i);
if (err)
goto err_des;
}
return 0;
err_des:
- _mlx5e_cleanup_tables(priv);
+ _mlx5e_cleanup_tables(fs);
err:
- kvfree(priv->fs->arfs);
+ mlx5e_fs_set_arfs(fs, NULL);
+ kvfree(arfs);
return err;
}
@@ -389,6 +402,7 @@ err:
static void arfs_may_expire_flow(struct mlx5e_priv *priv)
{
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(priv->fs);
struct arfs_rule *arfs_rule;
struct hlist_node *htmp;
HLIST_HEAD(del_list);
@@ -396,8 +410,8 @@ static void arfs_may_expire_flow(struct mlx5e_priv *priv)
int i;
int j;
- spin_lock_bh(&priv->fs->arfs->arfs_lock);
- mlx5e_for_each_arfs_rule(arfs_rule, htmp, priv->fs->arfs->arfs_tables, i, j) {
+ spin_lock_bh(&arfs->arfs_lock);
+ mlx5e_for_each_arfs_rule(arfs_rule, htmp, arfs->arfs_tables, i, j) {
if (!work_pending(&arfs_rule->arfs_work) &&
rps_may_expire_flow(priv->netdev,
arfs_rule->rxq, arfs_rule->flow_id,
@@ -408,7 +422,7 @@ static void arfs_may_expire_flow(struct mlx5e_priv *priv)
break;
}
}
- spin_unlock_bh(&priv->fs->arfs->arfs_lock);
+ spin_unlock_bh(&arfs->arfs_lock);
hlist_for_each_entry_safe(arfs_rule, htmp, &del_list, hlist) {
if (arfs_rule->rule)
mlx5_del_flow_rules(arfs_rule->rule);
@@ -417,20 +431,21 @@ static void arfs_may_expire_flow(struct mlx5e_priv *priv)
}
}
-static void arfs_del_rules(struct mlx5e_priv *priv)
+static void arfs_del_rules(struct mlx5e_flow_steering *fs)
{
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs);
struct hlist_node *htmp;
struct arfs_rule *rule;
HLIST_HEAD(del_list);
int i;
int j;
- spin_lock_bh(&priv->fs->arfs->arfs_lock);
- mlx5e_for_each_arfs_rule(rule, htmp, priv->fs->arfs->arfs_tables, i, j) {
+ spin_lock_bh(&arfs->arfs_lock);
+ mlx5e_for_each_arfs_rule(rule, htmp, arfs->arfs_tables, i, j) {
hlist_del_init(&rule->hlist);
hlist_add_head(&rule->hlist, &del_list);
}
- spin_unlock_bh(&priv->fs->arfs->arfs_lock);
+ spin_unlock_bh(&arfs->arfs_lock);
hlist_for_each_entry_safe(rule, htmp, &del_list, hlist) {
cancel_work_sync(&rule->arfs_work);
@@ -474,7 +489,7 @@ static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs,
static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv,
struct arfs_rule *arfs_rule)
{
- struct mlx5e_arfs_tables *arfs = priv->fs->arfs;
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(priv->fs);
struct arfs_tuple *tuple = &arfs_rule->tuple;
struct mlx5_flow_handle *rule = NULL;
struct mlx5_flow_destination dest = {};
@@ -588,13 +603,15 @@ static void arfs_handle_work(struct work_struct *work)
struct arfs_rule,
arfs_work);
struct mlx5e_priv *priv = arfs_rule->priv;
+ struct mlx5e_arfs_tables *arfs;
struct mlx5_flow_handle *rule;
+ arfs = mlx5e_fs_get_arfs(priv->fs);
mutex_lock(&priv->state_lock);
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- spin_lock_bh(&priv->fs->arfs->arfs_lock);
+ spin_lock_bh(&arfs->arfs_lock);
hlist_del(&arfs_rule->hlist);
- spin_unlock_bh(&priv->fs->arfs->arfs_lock);
+ spin_unlock_bh(&arfs->arfs_lock);
mutex_unlock(&priv->state_lock);
kfree(arfs_rule);
@@ -620,6 +637,7 @@ static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv,
const struct flow_keys *fk,
u16 rxq, u32 flow_id)
{
+ struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(priv->fs);
struct arfs_rule *rule;
struct arfs_tuple *tuple;
@@ -647,7 +665,7 @@ static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv,
tuple->dst_port = fk->ports.dst;
rule->flow_id = flow_id;
- rule->filter_id = priv->fs->arfs->last_filter_id++ % RPS_NO_FILTER;
+ rule->filter_id = arfs->last_filter_id++ % RPS_NO_FILTER;
hlist_add_head(&rule->hlist,
arfs_hash_bucket(arfs_t, tuple->src_port,
@@ -691,11 +709,12 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
u16 rxq_index, u32 flow_id)
{
struct mlx5e_priv *priv = netdev_priv(dev);
- struct mlx5e_arfs_tables *arfs = priv->fs->arfs;
- struct arfs_table *arfs_t;
+ struct mlx5e_arfs_tables *arfs;
struct arfs_rule *arfs_rule;
+ struct arfs_table *arfs_t;
struct flow_keys fk;
+ arfs = mlx5e_fs_get_arfs(priv->fs);
if (!skb_flow_dissect_flow_keys(skb, &fk, 0))
return -EPROTONOSUPPORT;
@@ -725,7 +744,7 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
return -ENOMEM;
}
}
- queue_work(priv->fs->arfs->wq, &arfs_rule->arfs_work);
+ queue_work(arfs->wq, &arfs_rule->arfs_work);
spin_unlock_bh(&arfs->arfs_lock);
return arfs_rule->filter_id;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index b811207fe5ed..e5befe5d34b4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -38,6 +38,7 @@
#include "en/xsk/pool.h"
#include "en/ptp.h"
#include "lib/clock.h"
+#include "en/fs_ethtool.h"
void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
struct ethtool_drvinfo *drvinfo)
@@ -494,14 +495,14 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
arfs_enabled = opened && (priv->netdev->features & NETIF_F_NTUPLE);
if (arfs_enabled)
- mlx5e_arfs_disable(priv);
+ mlx5e_arfs_disable(priv->fs);
/* Switch to new channels, set new parameters and close old ones */
err = mlx5e_safe_switch_params(priv, &new_params,
mlx5e_num_channels_changed_ctx, NULL, true);
if (arfs_enabled) {
- int err2 = mlx5e_arfs_enable(priv);
+ int err2 = mlx5e_arfs_enable(priv->fs);
if (err2)
netdev_err(priv->netdev, "%s: mlx5e_arfs_enable failed: %d\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index e0ce5a233d0b..1892ccb889b3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -36,10 +36,38 @@
#include <linux/tcp.h>
#include <linux/mlx5/fs.h>
#include <linux/mlx5/mpfs.h>
-#include "en.h"
#include "en_tc.h"
#include "lib/mpfs.h"
#include "en/ptp.h"
+#include "en/fs_ethtool.h"
+
+struct mlx5e_flow_steering {
+ struct work_struct set_rx_mode_work;
+ bool state_destroy;
+ bool vlan_strip_disable;
+ struct mlx5_core_dev *mdev;
+ struct net_device *netdev;
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_namespace *egress_ns;
+#ifdef CONFIG_MLX5_EN_RXNFC
+ struct mlx5e_ethtool_steering *ethtool;
+#endif
+ struct mlx5e_tc_table *tc;
+ struct mlx5e_promisc_table promisc;
+ struct mlx5e_vlan_table *vlan;
+ struct mlx5e_l2_table l2;
+ struct mlx5_ttc_table *ttc;
+ struct mlx5_ttc_table *inner_ttc;
+#ifdef CONFIG_MLX5_EN_ARFS
+ struct mlx5e_arfs_tables *arfs;
+#endif
+#ifdef CONFIG_MLX5_EN_TLS
+ struct mlx5e_accel_fs_tcp *accel_tcp;
+#endif
+ struct mlx5e_fs_udp *udp;
+ struct mlx5e_fs_any *any;
+ struct mlx5e_ptp_fs *ptp_fs;
+};
static int mlx5e_add_l2_flow_rule(struct mlx5e_flow_steering *fs,
struct mlx5e_l2_rule *ai, int type);
@@ -148,9 +176,8 @@ static int mlx5e_vport_context_update_vlans(struct mlx5e_flow_steering *fs)
max_list_size = 1 << MLX5_CAP_GEN(fs->mdev, log_max_vlan_list);
if (list_size > max_list_size) {
- mlx5_core_warn(fs->mdev,
- "netdev vlans list size (%d) > (%d) max vport list size, some vlans will be dropped\n",
- list_size, max_list_size);
+ fs_warn(fs, "netdev vlans list size (%d) > (%d) max vport list size, some vlans will be dropped\n",
+ list_size, max_list_size);
list_size = max_list_size;
}
@@ -167,8 +194,8 @@ static int mlx5e_vport_context_update_vlans(struct mlx5e_flow_steering *fs)
err = mlx5_modify_nic_vport_vlans(fs->mdev, vlans, list_size);
if (err)
- mlx5_core_err(fs->mdev, "Failed to modify vport vlans list err(%d)\n",
- err);
+ fs_err(fs, "Failed to modify vport vlans list err(%d)\n",
+ err);
kvfree(vlans);
return err;
@@ -249,7 +276,7 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_flow_steering *fs,
if (IS_ERR(*rule_p)) {
err = PTR_ERR(*rule_p);
*rule_p = NULL;
- mlx5_core_err(fs->mdev, "%s: add rule failed\n", __func__);
+ fs_err(fs, "%s: add rule failed\n", __func__);
}
return err;
@@ -351,78 +378,78 @@ mlx5e_add_trap_rule(struct mlx5_flow_table *ft, int trap_id, int tir_num)
return rule;
}
-int mlx5e_add_vlan_trap(struct mlx5e_priv *priv, int trap_id, int tir_num)
+int mlx5e_add_vlan_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num)
{
- struct mlx5_flow_table *ft = priv->fs->vlan->ft.t;
+ struct mlx5_flow_table *ft = fs->vlan->ft.t;
struct mlx5_flow_handle *rule;
int err;
rule = mlx5e_add_trap_rule(ft, trap_id, tir_num);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
- priv->fs->vlan->trap_rule = NULL;
- mlx5_core_err(priv->fs->mdev, "%s: add VLAN trap rule failed, err %d\n",
- __func__, err);
+ fs->vlan->trap_rule = NULL;
+ fs_err(fs, "%s: add VLAN trap rule failed, err %d\n",
+ __func__, err);
return err;
}
- priv->fs->vlan->trap_rule = rule;
+ fs->vlan->trap_rule = rule;
return 0;
}
-void mlx5e_remove_vlan_trap(struct mlx5e_priv *priv)
+void mlx5e_remove_vlan_trap(struct mlx5e_flow_steering *fs)
{
- if (priv->fs->vlan->trap_rule) {
- mlx5_del_flow_rules(priv->fs->vlan->trap_rule);
- priv->fs->vlan->trap_rule = NULL;
+ if (fs->vlan->trap_rule) {
+ mlx5_del_flow_rules(fs->vlan->trap_rule);
+ fs->vlan->trap_rule = NULL;
}
}
-int mlx5e_add_mac_trap(struct mlx5e_priv *priv, int trap_id, int tir_num)
+int mlx5e_add_mac_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num)
{
- struct mlx5_flow_table *ft = priv->fs->l2.ft.t;
+ struct mlx5_flow_table *ft = fs->l2.ft.t;
struct mlx5_flow_handle *rule;
int err;
rule = mlx5e_add_trap_rule(ft, trap_id, tir_num);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
- priv->fs->l2.trap_rule = NULL;
- mlx5_core_err(priv->fs->mdev, "%s: add MAC trap rule failed, err %d\n",
- __func__, err);
+ fs->l2.trap_rule = NULL;
+ fs_err(fs, "%s: add MAC trap rule failed, err %d\n",
+ __func__, err);
return err;
}
- priv->fs->l2.trap_rule = rule;
+ fs->l2.trap_rule = rule;
return 0;
}
-void mlx5e_remove_mac_trap(struct mlx5e_priv *priv)
+void mlx5e_remove_mac_trap(struct mlx5e_flow_steering *fs)
{
- if (priv->fs->l2.trap_rule) {
- mlx5_del_flow_rules(priv->fs->l2.trap_rule);
- priv->fs->l2.trap_rule = NULL;
+ if (fs->l2.trap_rule) {
+ mlx5_del_flow_rules(fs->l2.trap_rule);
+ fs->l2.trap_rule = NULL;
}
}
-void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv)
+void mlx5e_enable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc)
{
- if (!priv->fs->vlan->cvlan_filter_disabled)
+ if (!fs->vlan->cvlan_filter_disabled)
return;
- priv->fs->vlan->cvlan_filter_disabled = false;
- if (priv->netdev->flags & IFF_PROMISC)
+ fs->vlan->cvlan_filter_disabled = false;
+ if (promisc)
return;
- mlx5e_fs_del_vlan_rule(priv->fs, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
+ mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
}
-void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv)
+void mlx5e_disable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc)
{
- if (priv->fs->vlan->cvlan_filter_disabled)
+ if (fs->vlan->cvlan_filter_disabled)
return;
- priv->fs->vlan->cvlan_filter_disabled = true;
- if (priv->netdev->flags & IFF_PROMISC)
+ fs->vlan->cvlan_filter_disabled = true;
+ if (promisc)
return;
- mlx5e_add_vlan_rule(priv->fs, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
+ mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
}
static int mlx5e_vlan_rx_add_cvid(struct mlx5e_flow_steering *fs, u16 vid)
@@ -462,7 +489,7 @@ int mlx5e_fs_vlan_rx_add_vid(struct mlx5e_flow_steering *fs,
{
if (!fs->vlan) {
- mlx5_core_err(fs->mdev, "Vlan doesn't exist\n");
+ fs_err(fs, "Vlan doesn't exist\n");
return -EINVAL;
}
@@ -479,7 +506,7 @@ int mlx5e_fs_vlan_rx_kill_vid(struct mlx5e_flow_steering *fs,
__be16 proto, u16 vid)
{
if (!fs->vlan) {
- mlx5_core_err(fs->mdev, "Vlan doesn't exist\n");
+ fs_err(fs, "Vlan doesn't exist\n");
return -EINVAL;
}
@@ -512,28 +539,28 @@ static void mlx5e_fs_add_vlan_rules(struct mlx5e_flow_steering *fs)
mlx5e_fs_add_any_vid_rules(fs);
}
-static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv)
+static void mlx5e_del_vlan_rules(struct mlx5e_flow_steering *fs)
{
int i;
- mlx5e_fs_del_vlan_rule(priv->fs, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
+ mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
- for_each_set_bit(i, priv->fs->vlan->active_cvlans, VLAN_N_VID) {
- mlx5e_fs_del_vlan_rule(priv->fs, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i);
+ for_each_set_bit(i, fs->vlan->active_cvlans, VLAN_N_VID) {
+ mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i);
}
- for_each_set_bit(i, priv->fs->vlan->active_svlans, VLAN_N_VID)
- mlx5e_fs_del_vlan_rule(priv->fs, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i);
+ for_each_set_bit(i, fs->vlan->active_svlans, VLAN_N_VID)
+ mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i);
- WARN_ON_ONCE(priv->fs->state_destroy);
+ WARN_ON_ONCE(fs->state_destroy);
- mlx5e_remove_vlan_trap(priv);
+ mlx5e_remove_vlan_trap(fs);
/* must be called after DESTROY bit is set and
* set_rx_mode is called and flushed
*/
- if (priv->fs->vlan->cvlan_filter_disabled)
- mlx5e_fs_del_any_vid_rules(priv->fs);
+ if (fs->vlan->cvlan_filter_disabled)
+ mlx5e_fs_del_any_vid_rules(fs);
}
#define mlx5e_for_each_hash_node(hn, tmp, hash, i) \
@@ -568,8 +595,9 @@ static void mlx5e_execute_l2_action(struct mlx5e_flow_steering *fs,
}
if (l2_err)
- mlx5_core_warn(fs->mdev, "MPFS, failed to %s mac %pM, err(%d)\n",
- action == MLX5E_ACTION_ADD ? "add" : "del", mac_addr, l2_err);
+ fs_warn(fs, "MPFS, failed to %s mac %pM, err(%d)\n",
+ action == MLX5E_ACTION_ADD ? "add" : "del",
+ mac_addr, l2_err);
}
static void mlx5e_sync_netdev_addr(struct mlx5e_flow_steering *fs,
@@ -640,9 +668,8 @@ static void mlx5e_vport_context_update_addr_list(struct mlx5e_flow_steering *fs,
size++;
if (size > max_size) {
- mlx5_core_warn(fs->mdev,
- "mdev %s list size (%d) > (%d) max vport list size, some addresses will be dropped\n",
- is_uc ? "UC" : "MC", size, max_size);
+ fs_warn(fs, "mdev %s list size (%d) > (%d) max vport list size, some addresses will be dropped\n",
+ is_uc ? "UC" : "MC", size, max_size);
size = max_size;
}
@@ -658,9 +685,8 @@ static void mlx5e_vport_context_update_addr_list(struct mlx5e_flow_steering *fs,
err = mlx5_modify_nic_vport_mac_list(fs->mdev, list_type, addr_array, size);
out:
if (err)
- mlx5_core_err(fs->mdev,
- "Failed to modify vport %s list err(%d)\n",
- is_uc ? "UC" : "MC", err);
+ fs_err(fs, "Failed to modify vport %s list err(%d)\n",
+ is_uc ? "UC" : "MC", err);
kfree(addr_array);
}
@@ -730,7 +756,7 @@ static int mlx5e_add_promisc_rule(struct mlx5e_flow_steering *fs)
if (IS_ERR(*rule_p)) {
err = PTR_ERR(*rule_p);
*rule_p = NULL;
- mlx5_core_err(fs->mdev, "%s: add promiscuous rule failed\n", __func__);
+ fs_err(fs, "%s: add promiscuous rule failed\n", __func__);
}
kvfree(spec);
return err;
@@ -750,7 +776,7 @@ static int mlx5e_create_promisc_table(struct mlx5e_flow_steering *fs)
ft->t = mlx5_create_auto_grouped_flow_table(fs->ns, &ft_attr);
if (IS_ERR(ft->t)) {
err = PTR_ERR(ft->t);
- mlx5_core_err(fs->mdev, "fail to create promisc table err=%d\n", err);
+ fs_err(fs, "fail to create promisc table err=%d\n", err);
return err;
}
@@ -807,8 +833,8 @@ void mlx5e_fs_set_rx_mode_work(struct mlx5e_flow_steering *fs,
if (err)
enable_promisc = false;
if (!fs->vlan_strip_disable && !err)
- mlx5_core_warn_once(fs->mdev,
- "S-tagged traffic will be dropped while C-tag vlan stripping is enabled\n");
+ fs_warn_once(fs,
+ "S-tagged traffic will be dropped while C-tag vlan stripping is enabled\n");
}
if (enable_allmulti)
mlx5e_add_l2_flow_rule(fs, &ea->allmulti, MLX5E_ALLMULTI);
@@ -856,14 +882,15 @@ void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft)
ft->t = NULL;
}
-static void mlx5e_set_inner_ttc_params(struct mlx5e_priv *priv,
+static void mlx5e_set_inner_ttc_params(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
struct ttc_params *ttc_params)
{
struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
int tt;
memset(ttc_params, 0, sizeof(*ttc_params));
- ttc_params->ns = mlx5_get_flow_namespace(priv->fs->mdev,
+ ttc_params->ns = mlx5_get_flow_namespace(fs->mdev,
MLX5_FLOW_NAMESPACE_KERNEL);
ft_attr->level = MLX5E_INNER_TTC_FT_LEVEL;
ft_attr->prio = MLX5E_NIC_PRIO;
@@ -872,13 +899,14 @@ static void mlx5e_set_inner_ttc_params(struct mlx5e_priv *priv,
ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
ttc_params->dests[tt].tir_num =
tt == MLX5_TT_ANY ?
- mlx5e_rx_res_get_tirn_direct(priv->rx_res, 0) :
- mlx5e_rx_res_get_tirn_rss_inner(priv->rx_res,
+ mlx5e_rx_res_get_tirn_direct(rx_res, 0) :
+ mlx5e_rx_res_get_tirn_rss_inner(rx_res,
tt);
}
}
-void mlx5e_set_ttc_params(struct mlx5e_priv *priv,
+void mlx5e_set_ttc_params(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
struct ttc_params *ttc_params, bool tunnel)
{
@@ -886,7 +914,7 @@ void mlx5e_set_ttc_params(struct mlx5e_priv *priv,
int tt;
memset(ttc_params, 0, sizeof(*ttc_params));
- ttc_params->ns = mlx5_get_flow_namespace(priv->fs->mdev,
+ ttc_params->ns = mlx5_get_flow_namespace(fs->mdev,
MLX5_FLOW_NAMESPACE_KERNEL);
ft_attr->level = MLX5E_TTC_FT_LEVEL;
ft_attr->prio = MLX5E_NIC_PRIO;
@@ -895,19 +923,19 @@ void mlx5e_set_ttc_params(struct mlx5e_priv *priv,
ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
ttc_params->dests[tt].tir_num =
tt == MLX5_TT_ANY ?
- mlx5e_rx_res_get_tirn_direct(priv->rx_res, 0) :
- mlx5e_rx_res_get_tirn_rss(priv->rx_res, tt);
+ mlx5e_rx_res_get_tirn_direct(rx_res, 0) :
+ mlx5e_rx_res_get_tirn_rss(rx_res, tt);
}
ttc_params->inner_ttc = tunnel;
- if (!tunnel || !mlx5_tunnel_inner_ft_supported(priv->fs->mdev))
+ if (!tunnel || !mlx5_tunnel_inner_ft_supported(fs->mdev))
return;
for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
ttc_params->tunnel_dests[tt].type =
MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
ttc_params->tunnel_dests[tt].ft =
- mlx5_get_ttc_flow_table(priv->fs->inner_ttc);
+ mlx5_get_ttc_flow_table(fs->inner_ttc);
}
}
@@ -959,8 +987,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_flow_steering *fs,
ai->rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
if (IS_ERR(ai->rule)) {
- mlx5_core_err(fs->mdev, "%s: add l2 rule(mac:%pM) failed\n",
- __func__, mv_dmac);
+ fs_err(fs, "%s: add l2 rule(mac:%pM) failed\n", __func__, mv_dmac);
err = PTR_ERR(ai->rule);
ai->rule = NULL;
}
@@ -1044,14 +1071,14 @@ err_destroy_groups:
return err;
}
-static void mlx5e_destroy_l2_table(struct mlx5e_priv *priv)
+static void mlx5e_destroy_l2_table(struct mlx5e_flow_steering *fs)
{
- mlx5e_destroy_flow_table(&priv->fs->l2.ft);
+ mlx5e_destroy_flow_table(&fs->l2.ft);
}
-static int mlx5e_create_l2_table(struct mlx5e_priv *priv)
+static int mlx5e_create_l2_table(struct mlx5e_flow_steering *fs)
{
- struct mlx5e_l2_table *l2_table = &priv->fs->l2;
+ struct mlx5e_l2_table *l2_table = &fs->l2;
struct mlx5e_flow_table *ft = &l2_table->ft;
struct mlx5_flow_table_attr ft_attr = {};
int err;
@@ -1062,7 +1089,7 @@ static int mlx5e_create_l2_table(struct mlx5e_priv *priv)
ft_attr.level = MLX5E_L2_FT_LEVEL;
ft_attr.prio = MLX5E_NIC_PRIO;
- ft->t = mlx5_create_flow_table(priv->fs->ns, &ft_attr);
+ ft->t = mlx5_create_flow_table(fs->ns, &ft_attr);
if (IS_ERR(ft->t)) {
err = PTR_ERR(ft->t);
ft->t = NULL;
@@ -1221,126 +1248,128 @@ err_destroy_vlan_table:
return err;
}
-static void mlx5e_destroy_vlan_table(struct mlx5e_priv *priv)
+static void mlx5e_destroy_vlan_table(struct mlx5e_flow_steering *fs)
{
- mlx5e_del_vlan_rules(priv);
- mlx5e_destroy_flow_table(&priv->fs->vlan->ft);
+ mlx5e_del_vlan_rules(fs);
+ mlx5e_destroy_flow_table(&fs->vlan->ft);
}
-static void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv)
+static void mlx5e_destroy_inner_ttc_table(struct mlx5e_flow_steering *fs)
{
- if (!mlx5_tunnel_inner_ft_supported(priv->fs->mdev))
+ if (!mlx5_tunnel_inner_ft_supported(fs->mdev))
return;
- mlx5_destroy_ttc_table(priv->fs->inner_ttc);
+ mlx5_destroy_ttc_table(fs->inner_ttc);
}
-void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv)
+void mlx5e_destroy_ttc_table(struct mlx5e_flow_steering *fs)
{
- mlx5_destroy_ttc_table(priv->fs->ttc);
+ mlx5_destroy_ttc_table(fs->ttc);
}
-static int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv)
+static int mlx5e_create_inner_ttc_table(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res)
{
struct ttc_params ttc_params = {};
- if (!mlx5_tunnel_inner_ft_supported(priv->fs->mdev))
+ if (!mlx5_tunnel_inner_ft_supported(fs->mdev))
return 0;
- mlx5e_set_inner_ttc_params(priv, &ttc_params);
- priv->fs->inner_ttc = mlx5_create_inner_ttc_table(priv->fs->mdev,
- &ttc_params);
- if (IS_ERR(priv->fs->inner_ttc))
- return PTR_ERR(priv->fs->inner_ttc);
+ mlx5e_set_inner_ttc_params(fs, rx_res, &ttc_params);
+ fs->inner_ttc = mlx5_create_inner_ttc_table(fs->mdev,
+ &ttc_params);
+ if (IS_ERR(fs->inner_ttc))
+ return PTR_ERR(fs->inner_ttc);
return 0;
}
-int mlx5e_create_ttc_table(struct mlx5e_priv *priv)
+int mlx5e_create_ttc_table(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res)
{
struct ttc_params ttc_params = {};
- mlx5e_set_ttc_params(priv, &ttc_params, true);
- priv->fs->ttc = mlx5_create_ttc_table(priv->fs->mdev, &ttc_params);
- if (IS_ERR(priv->fs->ttc))
- return PTR_ERR(priv->fs->ttc);
+ mlx5e_set_ttc_params(fs, rx_res, &ttc_params, true);
+ fs->ttc = mlx5_create_ttc_table(fs->mdev, &ttc_params);
+ if (IS_ERR(fs->ttc))
+ return PTR_ERR(fs->ttc);
return 0;
}
-int mlx5e_create_flow_steering(struct mlx5e_priv *priv)
+int mlx5e_create_flow_steering(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
+ const struct mlx5e_profile *profile,
+ struct net_device *netdev)
{
+ struct mlx5_flow_namespace *ns = mlx5_get_flow_namespace(fs->mdev,
+ MLX5_FLOW_NAMESPACE_KERNEL);
int err;
- priv->fs->ns = mlx5_get_flow_namespace(priv->fs->mdev,
- MLX5_FLOW_NAMESPACE_KERNEL);
-
- if (!priv->fs->ns)
+ if (!ns)
return -EOPNOTSUPP;
- err = mlx5e_arfs_create_tables(priv);
+ mlx5e_fs_set_ns(fs, ns, false);
+ err = mlx5e_arfs_create_tables(fs, rx_res,
+ !!(netdev->hw_features & NETIF_F_NTUPLE));
if (err) {
- mlx5_core_err(priv->fs->mdev, "Failed to create arfs tables, err=%d\n",
- err);
- priv->netdev->hw_features &= ~NETIF_F_NTUPLE;
+ fs_err(fs, "Failed to create arfs tables, err=%d\n", err);
+ netdev->hw_features &= ~NETIF_F_NTUPLE;
}
- err = mlx5e_create_inner_ttc_table(priv);
+ err = mlx5e_create_inner_ttc_table(fs, rx_res);
if (err) {
- mlx5_core_err(priv->fs->mdev,
- "Failed to create inner ttc table, err=%d\n", err);
+ fs_err(fs, "Failed to create inner ttc table, err=%d\n", err);
goto err_destroy_arfs_tables;
}
- err = mlx5e_create_ttc_table(priv);
+ err = mlx5e_create_ttc_table(fs, rx_res);
if (err) {
- mlx5_core_err(priv->fs->mdev, "Failed to create ttc table, err=%d\n",
- err);
+ fs_err(fs, "Failed to create ttc table, err=%d\n", err);
goto err_destroy_inner_ttc_table;
}
- err = mlx5e_create_l2_table(priv);
+ err = mlx5e_create_l2_table(fs);
if (err) {
- mlx5_core_err(priv->fs->mdev, "Failed to create l2 table, err=%d\n",
- err);
+ fs_err(fs, "Failed to create l2 table, err=%d\n", err);
goto err_destroy_ttc_table;
}
- err = mlx5e_fs_create_vlan_table(priv->fs);
+ err = mlx5e_fs_create_vlan_table(fs);
if (err) {
- mlx5_core_err(priv->fs->mdev, "Failed to create vlan table, err=%d\n",
- err);
+ fs_err(fs, "Failed to create vlan table, err=%d\n", err);
goto err_destroy_l2_table;
}
- err = mlx5e_ptp_alloc_rx_fs(priv);
+ err = mlx5e_ptp_alloc_rx_fs(fs, profile);
if (err)
goto err_destory_vlan_table;
- mlx5e_ethtool_init_steering(priv);
+ mlx5e_ethtool_init_steering(fs);
return 0;
err_destory_vlan_table:
- mlx5e_destroy_vlan_table(priv);
+ mlx5e_destroy_vlan_table(fs);
err_destroy_l2_table:
- mlx5e_destroy_l2_table(priv);
+ mlx5e_destroy_l2_table(fs);
err_destroy_ttc_table:
- mlx5e_destroy_ttc_table(priv);
+ mlx5e_destroy_ttc_table(fs);
err_destroy_inner_ttc_table:
- mlx5e_destroy_inner_ttc_table(priv);
+ mlx5e_destroy_inner_ttc_table(fs);
err_destroy_arfs_tables:
- mlx5e_arfs_destroy_tables(priv);
+ mlx5e_arfs_destroy_tables(fs, !!(netdev->hw_features & NETIF_F_NTUPLE));
return err;
}
-void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv)
+void mlx5e_destroy_flow_steering(struct mlx5e_flow_steering *fs, bool ntuple,
+ const struct mlx5e_profile *profile)
{
- mlx5e_ptp_free_rx_fs(priv);
- mlx5e_destroy_vlan_table(priv);
- mlx5e_destroy_l2_table(priv);
- mlx5e_destroy_ttc_table(priv);
- mlx5e_destroy_inner_ttc_table(priv);
- mlx5e_arfs_destroy_tables(priv);
- mlx5e_ethtool_cleanup_steering(priv);
+ mlx5e_ptp_free_rx_fs(fs, profile);
+ mlx5e_destroy_vlan_table(fs);
+ mlx5e_destroy_l2_table(fs);
+ mlx5e_destroy_ttc_table(fs);
+ mlx5e_destroy_inner_ttc_table(fs);
+ mlx5e_arfs_destroy_tables(fs, ntuple);
+ mlx5e_ethtool_cleanup_steering(fs);
}
static int mlx5e_fs_vlan_alloc(struct mlx5e_flow_steering *fs)
@@ -1356,6 +1385,11 @@ static void mlx5e_fs_vlan_free(struct mlx5e_flow_steering *fs)
kvfree(fs->vlan);
}
+struct mlx5e_vlan_table *mlx5e_fs_get_vlan(struct mlx5e_flow_steering *fs)
+{
+ return fs->vlan;
+}
+
static int mlx5e_fs_tc_alloc(struct mlx5e_flow_steering *fs)
{
fs->tc = mlx5e_tc_table_alloc();
@@ -1369,6 +1403,32 @@ static void mlx5e_fs_tc_free(struct mlx5e_flow_steering *fs)
mlx5e_tc_table_free(fs->tc);
}
+struct mlx5e_tc_table *mlx5e_fs_get_tc(struct mlx5e_flow_steering *fs)
+{
+ return fs->tc;
+}
+
+#ifdef CONFIG_MLX5_EN_RXNFC
+static int mlx5e_fs_ethtool_alloc(struct mlx5e_flow_steering *fs)
+{
+ return mlx5e_ethtool_alloc(&fs->ethtool);
+}
+
+static void mlx5e_fs_ethtool_free(struct mlx5e_flow_steering *fs)
+{
+ mlx5e_ethtool_free(fs->ethtool);
+}
+
+struct mlx5e_ethtool_steering *mlx5e_fs_get_ethtool(struct mlx5e_flow_steering *fs)
+{
+ return fs->ethtool;
+}
+#else
+static int mlx5e_fs_ethtool_alloc(struct mlx5e_flow_steering *fs)
+{ return 0; }
+static void mlx5e_fs_ethtool_free(struct mlx5e_flow_steering *fs) { }
+#endif
+
struct mlx5e_flow_steering *mlx5e_fs_init(const struct mlx5e_profile *profile,
struct mlx5_core_dev *mdev,
bool state_destroy)
@@ -1394,8 +1454,13 @@ struct mlx5e_flow_steering *mlx5e_fs_init(const struct mlx5e_profile *profile,
goto err_free_vlan;
}
- return fs;
+ err = mlx5e_fs_ethtool_alloc(fs);
+ if (err)
+ goto err_free_tc;
+ return fs;
+err_free_tc:
+ mlx5e_fs_tc_free(fs);
err_free_vlan:
mlx5e_fs_vlan_free(fs);
err_free_fs:
@@ -1406,7 +1471,109 @@ err:
void mlx5e_fs_cleanup(struct mlx5e_flow_steering *fs)
{
+ mlx5e_fs_ethtool_free(fs);
mlx5e_fs_tc_free(fs);
mlx5e_fs_vlan_free(fs);
kvfree(fs);
}
+
+struct mlx5e_l2_table *mlx5e_fs_get_l2(struct mlx5e_flow_steering *fs)
+{
+ return &fs->l2;
+}
+
+struct mlx5_flow_namespace *mlx5e_fs_get_ns(struct mlx5e_flow_steering *fs, bool egress)
+{
+ return egress ? fs->egress_ns : fs->ns;
+}
+
+void mlx5e_fs_set_ns(struct mlx5e_flow_steering *fs, struct mlx5_flow_namespace *ns, bool egress)
+{
+ if (!egress)
+ fs->ns = ns;
+ else
+ fs->egress_ns = ns;
+}
+
+struct mlx5_ttc_table *mlx5e_fs_get_ttc(struct mlx5e_flow_steering *fs, bool inner)
+{
+ return inner ? fs->inner_ttc : fs->ttc;
+}
+
+void mlx5e_fs_set_ttc(struct mlx5e_flow_steering *fs, struct mlx5_ttc_table *ttc, bool inner)
+{
+ if (!inner)
+ fs->ttc = ttc;
+ else
+ fs->inner_ttc = ttc;
+}
+
+#ifdef CONFIG_MLX5_EN_ARFS
+struct mlx5e_arfs_tables *mlx5e_fs_get_arfs(struct mlx5e_flow_steering *fs)
+{
+ return fs->arfs;
+}
+
+void mlx5e_fs_set_arfs(struct mlx5e_flow_steering *fs, struct mlx5e_arfs_tables *arfs)
+{
+ fs->arfs = arfs;
+}
+#endif
+
+struct mlx5e_ptp_fs *mlx5e_fs_get_ptp(struct mlx5e_flow_steering *fs)
+{
+ return fs->ptp_fs;
+}
+
+void mlx5e_fs_set_ptp(struct mlx5e_flow_steering *fs, struct mlx5e_ptp_fs *ptp_fs)
+{
+ fs->ptp_fs = ptp_fs;
+}
+
+struct mlx5e_fs_any *mlx5e_fs_get_any(struct mlx5e_flow_steering *fs)
+{
+ return fs->any;
+}
+
+void mlx5e_fs_set_any(struct mlx5e_flow_steering *fs, struct mlx5e_fs_any *any)
+{
+ fs->any = any;
+}
+
+#ifdef CONFIG_MLX5_EN_TLS
+struct mlx5e_accel_fs_tcp *mlx5e_fs_get_accel_tcp(struct mlx5e_flow_steering *fs)
+{
+ return fs->accel_tcp;
+}
+
+void mlx5e_fs_set_accel_tcp(struct mlx5e_flow_steering *fs, struct mlx5e_accel_fs_tcp *accel_tcp)
+{
+ fs->accel_tcp = accel_tcp;
+}
+#endif
+
+void mlx5e_fs_set_state_destroy(struct mlx5e_flow_steering *fs, bool state_destroy)
+{
+ fs->state_destroy = state_destroy;
+}
+
+void mlx5e_fs_set_vlan_strip_disable(struct mlx5e_flow_steering *fs,
+ bool vlan_strip_disable)
+{
+ fs->vlan_strip_disable = vlan_strip_disable;
+}
+
+struct mlx5e_fs_udp *mlx5e_fs_get_udp(struct mlx5e_flow_steering *fs)
+{
+ return fs->udp;
+}
+
+void mlx5e_fs_set_udp(struct mlx5e_flow_steering *fs, struct mlx5e_fs_udp *udp)
+{
+ fs->udp = udp;
+}
+
+struct mlx5_core_dev *mlx5e_fs_get_mdev(struct mlx5e_flow_steering *fs)
+{
+ return fs->mdev;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index 3e4bc7836ef4..2a67798cd446 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -34,6 +34,22 @@
#include "en.h"
#include "en/params.h"
#include "en/xsk/pool.h"
+#include "en/fs_ethtool.h"
+
+struct mlx5e_ethtool_table {
+ struct mlx5_flow_table *ft;
+ int num_rules;
+};
+
+#define ETHTOOL_NUM_L3_L4_FTS 7
+#define ETHTOOL_NUM_L2_FTS 4
+
+struct mlx5e_ethtool_steering {
+ struct mlx5e_ethtool_table l3_l4_ft[ETHTOOL_NUM_L3_L4_FTS];
+ struct mlx5e_ethtool_table l2_ft[ETHTOOL_NUM_L2_FTS];
+ struct list_head rules;
+ int tot_num_rules;
+};
static int flow_type_to_traffic_type(u32 flow_type);
@@ -66,6 +82,7 @@ static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv,
struct ethtool_rx_flow_spec *fs,
int num_tuples)
{
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs);
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5e_ethtool_table *eth_ft;
struct mlx5_flow_namespace *ns;
@@ -81,18 +98,18 @@ static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv,
case UDP_V6_FLOW:
max_tuples = ETHTOOL_NUM_L3_L4_FTS;
prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples);
- eth_ft = &priv->fs->ethtool.l3_l4_ft[prio];
+ eth_ft = &ethtool->l3_l4_ft[prio];
break;
case IP_USER_FLOW:
case IPV6_USER_FLOW:
max_tuples = ETHTOOL_NUM_L3_L4_FTS;
prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples);
- eth_ft = &priv->fs->ethtool.l3_l4_ft[prio];
+ eth_ft = &ethtool->l3_l4_ft[prio];
break;
case ETHER_FLOW:
max_tuples = ETHTOOL_NUM_L2_FTS;
prio = max_tuples - num_tuples;
- eth_ft = &priv->fs->ethtool.l2_ft[prio];
+ eth_ft = &ethtool->l2_ft[prio];
prio += MLX5E_ETHTOOL_L2_PRIO;
break;
default:
@@ -382,15 +399,16 @@ static int set_flow_attrs(u32 *match_c, u32 *match_v,
static void add_rule_to_list(struct mlx5e_priv *priv,
struct mlx5e_ethtool_rule *rule)
{
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs);
+ struct list_head *head = &ethtool->rules;
struct mlx5e_ethtool_rule *iter;
- struct list_head *head = &priv->fs->ethtool.rules;
- list_for_each_entry(iter, &priv->fs->ethtool.rules, list) {
+ list_for_each_entry(iter, &ethtool->rules, list) {
if (iter->flow_spec.location > rule->flow_spec.location)
break;
head = &iter->list;
}
- priv->fs->ethtool.tot_num_rules++;
+ ethtool->tot_num_rules++;
list_add(&rule->list, head);
}
@@ -499,15 +517,16 @@ free:
return err ? ERR_PTR(err) : rule;
}
-static void del_ethtool_rule(struct mlx5e_priv *priv,
+static void del_ethtool_rule(struct mlx5e_flow_steering *fs,
struct mlx5e_ethtool_rule *eth_rule)
{
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(fs);
if (eth_rule->rule)
mlx5_del_flow_rules(eth_rule->rule);
if (eth_rule->rss)
mlx5e_rss_refcnt_dec(eth_rule->rss);
list_del(&eth_rule->list);
- priv->fs->ethtool.tot_num_rules--;
+ ethtool->tot_num_rules--;
put_flow_table(eth_rule->eth_ft);
kfree(eth_rule);
}
@@ -515,9 +534,10 @@ static void del_ethtool_rule(struct mlx5e_priv *priv,
static struct mlx5e_ethtool_rule *find_ethtool_rule(struct mlx5e_priv *priv,
int location)
{
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs);
struct mlx5e_ethtool_rule *iter;
- list_for_each_entry(iter, &priv->fs->ethtool.rules, list) {
+ list_for_each_entry(iter, &ethtool->rules, list) {
if (iter->flow_spec.location == location)
return iter;
}
@@ -531,7 +551,7 @@ static struct mlx5e_ethtool_rule *get_ethtool_rule(struct mlx5e_priv *priv,
eth_rule = find_ethtool_rule(priv, location);
if (eth_rule)
- del_ethtool_rule(priv, eth_rule);
+ del_ethtool_rule(priv->fs, eth_rule);
eth_rule = kzalloc(sizeof(*eth_rule), GFP_KERNEL);
if (!eth_rule)
@@ -754,7 +774,7 @@ mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv,
return 0;
del_ethtool_rule:
- del_ethtool_rule(priv, eth_rule);
+ del_ethtool_rule(priv->fs, eth_rule);
return err;
}
@@ -774,7 +794,7 @@ mlx5e_ethtool_flow_remove(struct mlx5e_priv *priv, int location)
goto out;
}
- del_ethtool_rule(priv, eth_rule);
+ del_ethtool_rule(priv->fs, eth_rule);
out:
return err;
}
@@ -783,12 +803,13 @@ static int
mlx5e_ethtool_get_flow(struct mlx5e_priv *priv,
struct ethtool_rxnfc *info, int location)
{
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs);
struct mlx5e_ethtool_rule *eth_rule;
if (location < 0 || location >= MAX_NUM_OF_ETHTOOL_RULES)
return -EINVAL;
- list_for_each_entry(eth_rule, &priv->fs->ethtool.rules, list) {
+ list_for_each_entry(eth_rule, &ethtool->rules, list) {
int index;
if (eth_rule->flow_spec.location != location)
@@ -826,18 +847,34 @@ mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv,
return err;
}
-void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv)
+int mlx5e_ethtool_alloc(struct mlx5e_ethtool_steering **ethtool)
+{
+ *ethtool = kvzalloc(sizeof(**ethtool), GFP_KERNEL);
+ if (!*ethtool)
+ return -ENOMEM;
+ return 0;
+}
+
+void mlx5e_ethtool_free(struct mlx5e_ethtool_steering *ethtool)
{
+ kvfree(ethtool);
+}
+
+void mlx5e_ethtool_cleanup_steering(struct mlx5e_flow_steering *fs)
+{
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(fs);
struct mlx5e_ethtool_rule *iter;
struct mlx5e_ethtool_rule *temp;
- list_for_each_entry_safe(iter, temp, &priv->fs->ethtool.rules, list)
- del_ethtool_rule(priv, iter);
+ list_for_each_entry_safe(iter, temp, &ethtool->rules, list)
+ del_ethtool_rule(fs, iter);
}
-void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv)
+void mlx5e_ethtool_init_steering(struct mlx5e_flow_steering *fs)
{
- INIT_LIST_HEAD(&priv->fs->ethtool.rules);
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(fs);
+
+ INIT_LIST_HEAD(&ethtool->rules);
}
static int flow_type_to_traffic_type(u32 flow_type)
@@ -959,11 +996,12 @@ int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd)
int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv,
struct ethtool_rxnfc *info, u32 *rule_locs)
{
+ struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs);
int err = 0;
switch (info->cmd) {
case ETHTOOL_GRXCLSRLCNT:
- info->rule_cnt = priv->fs->ethtool.tot_num_rules;
+ info->rule_cnt = ethtool->tot_num_rules;
break;
case ETHTOOL_GRXCLSRULE:
err = mlx5e_ethtool_get_flow(priv, info, info->fs.location);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 02eb2f0fa2ae..7c1a13738a58 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2738,7 +2738,7 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
netif_tx_start_all_queues(priv->netdev);
if (mlx5e_is_vport_rep(priv))
- mlx5e_add_sqs_fwd_rules(priv);
+ mlx5e_rep_activate_channels(priv);
mlx5e_wait_channels_min_rx_wqes(&priv->channels);
@@ -2752,7 +2752,7 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
mlx5e_rx_res_channels_deactivate(priv->rx_res);
if (mlx5e_is_vport_rep(priv))
- mlx5e_remove_sqs_fwd_rules(priv);
+ mlx5e_rep_deactivate_channels(priv);
/* The results of ndo_select_queue are unreliable, while netdev config
* is being changed (real_num_tx_queues, num_tc). Stop all queues to
@@ -3669,9 +3669,11 @@ static int set_feature_cvlan_filter(struct net_device *netdev, bool enable)
struct mlx5e_priv *priv = netdev_priv(netdev);
if (enable)
- mlx5e_enable_cvlan_filter(priv);
+ mlx5e_enable_cvlan_filter(priv->fs,
+ !!(priv->netdev->flags & IFF_PROMISC));
else
- mlx5e_disable_cvlan_filter(priv);
+ mlx5e_disable_cvlan_filter(priv->fs,
+ !!(priv->netdev->flags & IFF_PROMISC));
return 0;
}
@@ -3780,7 +3782,7 @@ static int set_feature_rx_vlan(struct net_device *netdev, bool enable)
mutex_lock(&priv->state_lock);
- priv->fs->vlan_strip_disable = !enable;
+ mlx5e_fs_set_vlan_strip_disable(priv->fs, !enable);
priv->channels.params.vlan_strip_disable = !enable;
if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
@@ -3788,7 +3790,7 @@ static int set_feature_rx_vlan(struct net_device *netdev, bool enable)
err = mlx5e_modify_channels_vsd(&priv->channels, !enable);
if (err) {
- priv->fs->vlan_strip_disable = enable;
+ mlx5e_fs_set_vlan_strip_disable(priv->fs, enable);
priv->channels.params.vlan_strip_disable = enable;
}
unlock:
@@ -3826,9 +3828,9 @@ static int set_feature_arfs(struct net_device *netdev, bool enable)
int err;
if (enable)
- err = mlx5e_arfs_enable(priv);
+ err = mlx5e_arfs_enable(priv->fs);
else
- err = mlx5e_arfs_disable(priv);
+ err = mlx5e_arfs_disable(priv->fs);
return err;
}
@@ -3912,12 +3914,14 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
netdev_features_t features)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_vlan_table *vlan;
struct mlx5e_params *params;
+ vlan = mlx5e_fs_get_vlan(priv->fs);
mutex_lock(&priv->state_lock);
params = &priv->channels.params;
- if (!priv->fs->vlan ||
- !bitmap_empty(mlx5e_vlan_get_active_svlans(priv->fs->vlan), VLAN_N_VID)) {
+ if (!vlan ||
+ !bitmap_empty(mlx5e_vlan_get_active_svlans(vlan), VLAN_N_VID)) {
/* HW strips the outer C-tag header, this is a problem
* for S-tag traffic.
*/
@@ -5097,7 +5101,8 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
if (err)
goto err_close_drop_rq;
- err = mlx5e_create_flow_steering(priv);
+ err = mlx5e_create_flow_steering(priv->fs, priv->rx_res, priv->profile,
+ priv->netdev);
if (err) {
mlx5_core_warn(mdev, "create flow steering failed, %d\n", err);
goto err_destroy_rx_res;
@@ -5120,7 +5125,8 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
err_tc_nic_cleanup:
mlx5e_tc_nic_cleanup(priv);
err_destroy_flow_steering:
- mlx5e_destroy_flow_steering(priv);
+ mlx5e_destroy_flow_steering(priv->fs, !!(priv->netdev->hw_features & NETIF_F_NTUPLE),
+ priv->profile);
err_destroy_rx_res:
mlx5e_rx_res_destroy(priv->rx_res);
err_close_drop_rq:
@@ -5136,7 +5142,8 @@ static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv)
{
mlx5e_accel_cleanup_rx(priv);
mlx5e_tc_nic_cleanup(priv);
- mlx5e_destroy_flow_steering(priv);
+ mlx5e_destroy_flow_steering(priv->fs, !!(priv->netdev->hw_features & NETIF_F_NTUPLE),
+ priv->profile);
mlx5e_rx_res_destroy(priv->rx_res);
mlx5e_close_drop_rq(&priv->drop_rq);
mlx5e_destroy_q_counters(priv);
@@ -5512,7 +5519,8 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv)
clear_bit(MLX5E_STATE_DESTROYING, &priv->state);
if (priv->fs)
- priv->fs->state_destroy = !test_bit(MLX5E_STATE_DESTROYING, &priv->state);
+ mlx5e_fs_set_state_destroy(priv->fs,
+ !test_bit(MLX5E_STATE_DESTROYING, &priv->state));
/* max number of channels may have changed */
max_nch = mlx5e_calc_max_nch(priv->mdev, priv->netdev, profile);
@@ -5573,7 +5581,8 @@ out:
mlx5e_reset_channels(priv->netdev);
set_bit(MLX5E_STATE_DESTROYING, &priv->state);
if (priv->fs)
- priv->fs->state_destroy = !test_bit(MLX5E_STATE_DESTROYING, &priv->state);
+ mlx5e_fs_set_state_destroy(priv->fs,
+ !test_bit(MLX5E_STATE_DESTROYING, &priv->state));
cancel_work_sync(&priv->update_stats_work);
return err;
}
@@ -5584,7 +5593,8 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv)
set_bit(MLX5E_STATE_DESTROYING, &priv->state);
if (priv->fs)
- priv->fs->state_destroy = !test_bit(MLX5E_STATE_DESTROYING, &priv->state);
+ mlx5e_fs_set_state_destroy(priv->fs,
+ !test_bit(MLX5E_STATE_DESTROYING, &priv->state));
if (profile->disable)
profile->disable(priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 759f7d3c2cfd..914bddbfc1d7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -56,6 +56,7 @@
#include "en_accel/ipsec.h"
#include "en/tc/int_port.h"
#include "en/ptp.h"
+#include "en/fs_ethtool.h"
#define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \
max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)
@@ -397,7 +398,8 @@ out_err:
return err;
}
-int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv)
+static int
+mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv)
{
int sqs_per_channel = mlx5e_get_dcb_num_tc(&priv->channels.params);
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
@@ -451,7 +453,8 @@ out:
return err;
}
-void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv)
+static void
+mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
@@ -460,6 +463,53 @@ void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv)
mlx5e_sqs2vport_stop(esw, rep);
}
+static int
+mlx5e_rep_add_meta_tunnel_rule(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_flow_group *g;
+ int err;
+
+ g = esw->fdb_table.offloads.send_to_vport_meta_grp;
+ if (!g)
+ return 0;
+
+ flow_rule = mlx5_eswitch_add_send_to_vport_meta_rule(esw, rep->vport);
+ if (IS_ERR(flow_rule)) {
+ err = PTR_ERR(flow_rule);
+ goto out;
+ }
+
+ rpriv->send_to_vport_meta_rule = flow_rule;
+
+out:
+ return err;
+}
+
+static void
+mlx5e_rep_del_meta_tunnel_rule(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ if (rpriv->send_to_vport_meta_rule)
+ mlx5_eswitch_del_send_to_vport_meta_rule(rpriv->send_to_vport_meta_rule);
+}
+
+void mlx5e_rep_activate_channels(struct mlx5e_priv *priv)
+{
+ mlx5e_add_sqs_fwd_rules(priv);
+ mlx5e_rep_add_meta_tunnel_rule(priv);
+}
+
+void mlx5e_rep_deactivate_channels(struct mlx5e_priv *priv)
+{
+ mlx5e_rep_del_meta_tunnel_rule(priv);
+ mlx5e_remove_sqs_fwd_rules(priv);
+}
+
static int mlx5e_rep_open(struct net_device *dev)
{
struct mlx5e_priv *priv = netdev_priv(dev);
@@ -747,19 +797,20 @@ static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv)
struct ttc_params ttc_params = {};
int err;
- priv->fs->ns = mlx5_get_flow_namespace(priv->mdev,
- MLX5_FLOW_NAMESPACE_KERNEL);
+ mlx5e_fs_set_ns(priv->fs,
+ mlx5_get_flow_namespace(priv->mdev,
+ MLX5_FLOW_NAMESPACE_KERNEL), false);
/* The inner_ttc in the ttc params is intentionally not set */
- mlx5e_set_ttc_params(priv, &ttc_params, false);
+ mlx5e_set_ttc_params(priv->fs, priv->rx_res, &ttc_params, false);
if (rep->vport != MLX5_VPORT_UPLINK)
/* To give uplik rep TTC a lower level for chaining from root ft */
ttc_params.ft_attr.level = MLX5E_TTC_FT_LEVEL + 1;
- priv->fs->ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
- if (IS_ERR(priv->fs->ttc)) {
- err = PTR_ERR(priv->fs->ttc);
+ mlx5e_fs_set_ttc(priv->fs, mlx5_create_ttc_table(priv->mdev, &ttc_params), false);
+ if (IS_ERR(mlx5e_fs_get_ttc(priv->fs, false))) {
+ err = PTR_ERR(mlx5e_fs_get_ttc(priv->fs, false));
netdev_err(priv->netdev, "Failed to create rep ttc table, err=%d\n",
err);
return err;
@@ -779,7 +830,7 @@ static int mlx5e_create_rep_root_ft(struct mlx5e_priv *priv)
/* non uplik reps will skip any bypass tables and go directly to
* their own ttc
*/
- rpriv->root_ft = mlx5_get_ttc_flow_table(priv->fs->ttc);
+ rpriv->root_ft = mlx5_get_ttc_flow_table(mlx5e_fs_get_ttc(priv->fs, false));
return 0;
}
@@ -887,14 +938,14 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
if (err)
goto err_destroy_root_ft;
- mlx5e_ethtool_init_steering(priv);
+ mlx5e_ethtool_init_steering(priv->fs);
return 0;
err_destroy_root_ft:
mlx5e_destroy_rep_root_ft(priv);
err_destroy_ttc_table:
- mlx5_destroy_ttc_table(priv->fs->ttc);
+ mlx5_destroy_ttc_table(mlx5e_fs_get_ttc(priv->fs, false));
err_destroy_rx_res:
mlx5e_rx_res_destroy(priv->rx_res);
err_close_drop_rq:
@@ -908,10 +959,10 @@ err_free_fs:
static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
{
- mlx5e_ethtool_cleanup_steering(priv);
+ mlx5e_ethtool_cleanup_steering(priv->fs);
rep_vport_rx_rule_destroy(priv);
mlx5e_destroy_rep_root_ft(priv);
- mlx5_destroy_ttc_table(priv->fs->ttc);
+ mlx5_destroy_ttc_table(mlx5e_fs_get_ttc(priv->fs, false));
mlx5e_rx_res_destroy(priv->rx_res);
mlx5e_close_drop_rq(&priv->drop_rq);
mlx5e_rx_res_free(priv->rx_res);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index dec183ccd4ac..b4e691760da9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -111,6 +111,7 @@ struct mlx5e_rep_priv {
struct list_head vport_sqs_list;
struct mlx5_rep_uplink_priv uplink_priv; /* valid for uplink rep */
struct rtnl_link_stats64 prev_vf_vport_stats;
+ struct mlx5_flow_handle *send_to_vport_meta_rule;
struct rhashtable tc_ht;
};
@@ -241,8 +242,8 @@ int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev,
void *sp);
bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv);
-int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv);
-void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv);
+void mlx5e_rep_activate_channels(struct mlx5e_priv *priv);
+void mlx5e_rep_deactivate_channels(struct mlx5e_priv *priv);
void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv);
@@ -256,8 +257,8 @@ static inline bool mlx5e_eswitch_rep(const struct net_device *netdev)
#else /* CONFIG_MLX5_ESWITCH */
static inline bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { return false; }
-static inline int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) { return 0; }
-static inline void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) {}
+static inline void mlx5e_rep_activate_channels(struct mlx5e_priv *priv) {}
+static inline void mlx5e_rep_deactivate_channels(struct mlx5e_priv *priv) {}
static inline int mlx5e_rep_init(void) { return 0; };
static inline void mlx5e_rep_cleanup(void) {};
static inline bool mlx5e_rep_has_offload_stats(const struct net_device *dev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index f154bda668ad..0872a214d2a3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -311,6 +311,7 @@ mlx5e_get_flow_meters(struct mlx5_core_dev *dev)
static struct mlx5_tc_ct_priv *
get_ct_priv(struct mlx5e_priv *priv)
{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_rep_uplink_priv *uplink_priv;
struct mlx5e_rep_priv *uplink_rpriv;
@@ -322,7 +323,7 @@ get_ct_priv(struct mlx5e_priv *priv)
return uplink_priv->ct_priv;
}
- return priv->fs->tc->ct;
+ return tc->ct;
}
static struct mlx5e_tc_psample *
@@ -345,6 +346,7 @@ get_sample_priv(struct mlx5e_priv *priv)
static struct mlx5e_post_act *
get_post_action(struct mlx5e_priv *priv)
{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_rep_uplink_priv *uplink_priv;
struct mlx5e_rep_priv *uplink_rpriv;
@@ -356,7 +358,7 @@ get_post_action(struct mlx5e_priv *priv)
return uplink_priv->post_act;
}
- return priv->fs->tc->post_act;
+ return tc->post_act;
}
struct mlx5_flow_handle *
@@ -607,11 +609,12 @@ int mlx5e_get_flow_namespace(struct mlx5e_tc_flow *flow)
static struct mod_hdr_tbl *
get_mod_hdr_table(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
return mlx5e_get_flow_namespace(flow) == MLX5_FLOW_NAMESPACE_FDB ?
&esw->offloads.mod_hdr :
- &priv->fs->tc->mod_hdr;
+ &tc->mod_hdr;
}
static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
@@ -810,6 +813,7 @@ static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
{
struct mlx5e_priv *priv = hp->func_priv;
struct ttc_params ttc_params;
+ struct mlx5_ttc_table *ttc;
int err;
err = mlx5e_hairpin_create_indirect_rqt(hp);
@@ -827,9 +831,10 @@ static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
goto err_create_ttc_table;
}
+ ttc = mlx5e_fs_get_ttc(priv->fs, false);
netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n",
hp->num_channels,
- mlx5_get_ttc_flow_table(priv->fs->ttc)->id);
+ mlx5_get_ttc_flow_table(ttc)->id);
return 0;
@@ -916,10 +921,11 @@ static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio)
static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv,
u16 peer_vhca_id, u8 prio)
{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
struct mlx5e_hairpin_entry *hpe;
u32 hash_key = hash_hairpin_info(peer_vhca_id, prio);
- hash_for_each_possible(priv->fs->tc->hairpin_tbl, hpe,
+ hash_for_each_possible(tc->hairpin_tbl, hpe,
hairpin_hlist, hash_key) {
if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) {
refcount_inc(&hpe->refcnt);
@@ -933,11 +939,12 @@ static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv,
static void mlx5e_hairpin_put(struct mlx5e_priv *priv,
struct mlx5e_hairpin_entry *hpe)
{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
/* no more hairpin flows for us, release the hairpin pair */
- if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &priv->fs->tc->hairpin_tbl_lock))
+ if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &tc->hairpin_tbl_lock))
return;
hash_del(&hpe->hairpin_hlist);
- mutex_unlock(&priv->fs->tc->hairpin_tbl_lock);
+ mutex_unlock(&tc->hairpin_tbl_lock);
if (!IS_ERR_OR_NULL(hpe->hp)) {
netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
@@ -993,6 +1000,7 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
struct mlx5e_tc_flow_parse_attr *parse_attr,
struct netlink_ext_ack *extack)
{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
int peer_ifindex = parse_attr->mirred_ifindex[0];
struct mlx5_hairpin_params params;
struct mlx5_core_dev *peer_mdev;
@@ -1021,10 +1029,10 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
if (err)
return err;
- mutex_lock(&priv->fs->tc->hairpin_tbl_lock);
+ mutex_lock(&tc->hairpin_tbl_lock);
hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
if (hpe) {
- mutex_unlock(&priv->fs->tc->hairpin_tbl_lock);
+ mutex_unlock(&tc->hairpin_tbl_lock);
wait_for_completion(&hpe->res_ready);
if (IS_ERR(hpe->hp)) {
@@ -1036,7 +1044,7 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
if (!hpe) {
- mutex_unlock(&priv->fs->tc->hairpin_tbl_lock);
+ mutex_unlock(&tc->hairpin_tbl_lock);
return -ENOMEM;
}
@@ -1048,9 +1056,9 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
refcount_set(&hpe->refcnt, 1);
init_completion(&hpe->res_ready);
- hash_add(priv->fs->tc->hairpin_tbl, &hpe->hairpin_hlist,
+ hash_add(tc->hairpin_tbl, &hpe->hairpin_hlist,
hash_hairpin_info(peer_id, match_prio));
- mutex_unlock(&priv->fs->tc->hairpin_tbl_lock);
+ mutex_unlock(&tc->hairpin_tbl_lock);
params.log_data_size = 16;
params.log_data_size = min_t(u8, params.log_data_size,
@@ -1126,8 +1134,9 @@ mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
struct mlx5_flow_attr *attr)
{
struct mlx5_flow_context *flow_context = &spec->flow_context;
+ struct mlx5e_vlan_table *vlan = mlx5e_fs_get_vlan(priv->fs);
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
struct mlx5_nic_flow_attr *nic_attr = attr->nic_attr;
- struct mlx5e_tc_table *tc = priv->fs->tc;
struct mlx5_flow_destination dest[2] = {};
struct mlx5_fs_chains *nic_chains;
struct mlx5_flow_act flow_act = {
@@ -1163,7 +1172,7 @@ mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
if (IS_ERR(dest[dest_ix].ft))
return ERR_CAST(dest[dest_ix].ft);
} else {
- dest[dest_ix].ft = mlx5e_vlan_get_flowtable(priv->fs->vlan);
+ dest[dest_ix].ft = mlx5e_vlan_get_flowtable(vlan);
}
dest_ix++;
}
@@ -1191,7 +1200,7 @@ mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
mutex_unlock(&tc->t_lock);
netdev_err(priv->netdev,
"Failed to create tc offload table\n");
- rule = ERR_CAST(priv->fs->tc->t);
+ rule = ERR_CAST(tc->t);
goto err_ft_get;
}
}
@@ -1293,8 +1302,10 @@ void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv,
struct mlx5_flow_handle *rule,
struct mlx5_flow_attr *attr)
{
- struct mlx5_fs_chains *nic_chains = mlx5e_nic_chains(priv->fs->tc);
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ struct mlx5_fs_chains *nic_chains;
+ nic_chains = mlx5e_nic_chains(tc);
mlx5_del_flow_rules(rule);
if (attr->chain || attr->prio)
@@ -1309,8 +1320,8 @@ void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv,
static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow)
{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
struct mlx5_flow_attr *attr = flow->attr;
- struct mlx5e_tc_table *tc = priv->fs->tc;
flow_flag_clear(flow, OFFLOADED);
@@ -1322,13 +1333,13 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
/* Remove root table if no rules are left to avoid
* extra steering hops.
*/
- mutex_lock(&priv->fs->tc->t_lock);
+ mutex_lock(&tc->t_lock);
if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) &&
!IS_ERR_OR_NULL(tc->t)) {
mlx5_chains_put_table(mlx5e_nic_chains(tc), 0, 1, MLX5E_TC_FT_LEVEL);
- priv->fs->tc->t = NULL;
+ tc->t = NULL;
}
- mutex_unlock(&priv->fs->tc->t_lock);
+ mutex_unlock(&tc->t_lock);
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
mlx5e_detach_mod_hdr(priv, flow);
@@ -1494,8 +1505,11 @@ bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_
route_priv = netdev_priv(route_dev);
route_mdev = route_priv->mdev;
- if (out_mdev->coredev_type != MLX5_COREDEV_PF ||
- route_mdev->coredev_type != MLX5_COREDEV_VF)
+ if (out_mdev->coredev_type != MLX5_COREDEV_PF)
+ return false;
+
+ if (route_mdev->coredev_type != MLX5_COREDEV_VF &&
+ route_mdev->coredev_type != MLX5_COREDEV_SF)
return false;
return mlx5e_same_hw_devs(out_priv, route_priv);
@@ -4058,13 +4072,14 @@ static const struct rhashtable_params tc_ht_params = {
static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv,
unsigned long flags)
{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
struct mlx5e_rep_priv *rpriv;
if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) {
rpriv = priv->ppriv;
return &rpriv->tc_ht;
} else /* NIC offload */
- return &priv->fs->tc->ht;
+ return &tc->ht;
}
static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
@@ -4772,6 +4787,7 @@ void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
struct mlx5e_priv *peer_priv)
{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
struct mlx5e_hairpin_entry *hpe, *tmp;
LIST_HEAD(init_wait_list);
@@ -4783,11 +4799,11 @@ static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
- mutex_lock(&priv->fs->tc->hairpin_tbl_lock);
- hash_for_each(priv->fs->tc->hairpin_tbl, bkt, hpe, hairpin_hlist)
+ mutex_lock(&tc->hairpin_tbl_lock);
+ hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist)
if (refcount_inc_not_zero(&hpe->refcnt))
list_add(&hpe->dead_peer_wait_list, &init_wait_list);
- mutex_unlock(&priv->fs->tc->hairpin_tbl_lock);
+ mutex_unlock(&tc->hairpin_tbl_lock);
list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
wait_for_completion(&hpe->res_ready);
@@ -4841,7 +4857,8 @@ static int mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev *dev)
static int mlx5e_tc_nic_create_miss_table(struct mlx5e_priv *priv)
{
- struct mlx5_flow_table **ft = &priv->fs->tc->miss_t;
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ struct mlx5_flow_table **ft = &tc->miss_t;
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_namespace *ns;
int err = 0;
@@ -4863,12 +4880,14 @@ static int mlx5e_tc_nic_create_miss_table(struct mlx5e_priv *priv)
static void mlx5e_tc_nic_destroy_miss_table(struct mlx5e_priv *priv)
{
- mlx5_destroy_flow_table(priv->fs->tc->miss_t);
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+
+ mlx5_destroy_flow_table(tc->miss_t);
}
int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
{
- struct mlx5e_tc_table *tc = priv->fs->tc;
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
struct mlx5_core_dev *dev = priv->mdev;
struct mapping_ctx *chains_mapping;
struct mlx5_chains_attr attr = {};
@@ -4909,7 +4928,7 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
attr.ns = MLX5_FLOW_NAMESPACE_KERNEL;
attr.max_ft_sz = mlx5e_tc_nic_get_ft_size(dev);
attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS;
- attr.default_ft = priv->fs->tc->miss_t;
+ attr.default_ft = tc->miss_t;
attr.mapping = chains_mapping;
tc->chains = mlx5_chains_create(dev, &attr);
@@ -4958,7 +4977,7 @@ static void _mlx5e_tc_del_flow(void *ptr, void *arg)
void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
{
- struct mlx5e_tc_table *tc = priv->fs->tc;
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
if (tc->netdevice_nb.notifier_call)
unregister_netdevice_notifier_dev_net(priv->netdev,
@@ -5163,13 +5182,13 @@ bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
u32 chain = 0, chain_tag, reg_b, zone_restore_id;
struct mlx5e_priv *priv = netdev_priv(skb->dev);
- struct mlx5e_tc_table *tc = priv->fs->tc;
struct mlx5_mapped_obj mapped_obj;
struct tc_skb_ext *tc_skb_ext;
+ struct mlx5e_tc_table *tc;
int err;
reg_b = be32_to_cpu(cqe->ft_metadata);
-
+ tc = mlx5e_fs_get_tc(priv->fs);
chain_tag = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK;
err = mapping_find(tc->mapping, chain_tag, &mapped_obj);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index 6ce1ab6b86b7..48241317a535 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -54,6 +54,7 @@
ESW_FLOW_ATTR_SZ :\
NIC_FLOW_ATTR_SZ)
+struct mlx5_fs_chains *mlx5e_nic_chains(struct mlx5e_tc_table *tc);
int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags);
struct mlx5e_tc_update_priv {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c
index 0abef71cb839..c9a91158e99c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c
@@ -78,12 +78,16 @@ mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw,
struct mlx5_core_dev *dest_mdev)
{
struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ bool vf_sf_vport;
+
+ vf_sf_vport = mlx5_eswitch_is_vf_vport(esw, vport_num) ||
+ mlx5_esw_is_sf_vport(esw, vport_num);
/* Use indirect table for all IP traffic from UL to VF with vport
* destination when source rewrite flag is set.
*/
return esw_attr->in_rep->vport == MLX5_VPORT_UPLINK &&
- mlx5_eswitch_is_vf_vport(esw, vport_num) &&
+ vf_sf_vport &&
esw->dev == dest_mdev &&
attr->ip_version &&
attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 6aa58044b949..c59107fa9e6d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1360,7 +1360,6 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf)
if (esw->mode == MLX5_ESWITCH_OFFLOADS) {
struct devlink *devlink = priv_to_devlink(esw->dev);
- esw_offloads_del_send_to_vport_meta_rules(esw);
devl_rate_nodes_destroy(devlink);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 87ce5a208cb5..f68dc2d0dbe6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -244,6 +244,8 @@ struct mlx5_esw_offload {
struct mlx5_flow_table *ft_offloads;
struct mlx5_flow_group *vport_rx_group;
+ struct mlx5_flow_group *vport_rx_drop_group;
+ struct mlx5_flow_handle *vport_rx_drop_rule;
struct xarray vport_reps;
struct list_head peer_flows;
struct mutex peer_mutex;
@@ -344,7 +346,10 @@ void esw_offloads_disable(struct mlx5_eswitch *esw);
int esw_offloads_enable(struct mlx5_eswitch *esw);
void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw);
int esw_offloads_init_reps(struct mlx5_eswitch *esw);
-void esw_offloads_del_send_to_vport_meta_rules(struct mlx5_eswitch *esw);
+
+struct mlx5_flow_handle *
+mlx5_eswitch_add_send_to_vport_meta_rule(struct mlx5_eswitch *esw, u16 vport_num);
+void mlx5_eswitch_del_send_to_vport_meta_rule(struct mlx5_flow_handle *rule);
bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw);
int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index a9f4c652f859..c98c6af21581 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -70,6 +70,8 @@
#define MLX5_ESW_VPORT_TBL_SIZE 128
#define MLX5_ESW_VPORT_TBL_NUM_GROUPS 4
+#define MLX5_ESW_FT_OFFLOADS_DROP_RULE (1)
+
static const struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_mirror_ns = {
.max_fte = MLX5_ESW_VPORT_TBL_SIZE,
.max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS,
@@ -1058,52 +1060,23 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
mlx5_del_flow_rules(rule);
}
-static void mlx5_eswitch_del_send_to_vport_meta_rules(struct mlx5_eswitch *esw)
-{
- struct mlx5_flow_handle **flows = esw->fdb_table.offloads.send_to_vport_meta_rules;
- int i = 0, num_vfs = esw->esw_funcs.num_vfs;
-
- if (!num_vfs || !flows)
- return;
-
- for (i = 0; i < num_vfs; i++)
- mlx5_del_flow_rules(flows[i]);
-
- kvfree(flows);
- /* If changing eswitch mode from switchdev to legacy, but num_vfs is not 0,
- * meta rules could be freed again. So set it to NULL.
- */
- esw->fdb_table.offloads.send_to_vport_meta_rules = NULL;
-}
-
-void esw_offloads_del_send_to_vport_meta_rules(struct mlx5_eswitch *esw)
+void mlx5_eswitch_del_send_to_vport_meta_rule(struct mlx5_flow_handle *rule)
{
- mlx5_eswitch_del_send_to_vport_meta_rules(esw);
+ if (rule)
+ mlx5_del_flow_rules(rule);
}
-static int
-mlx5_eswitch_add_send_to_vport_meta_rules(struct mlx5_eswitch *esw)
+struct mlx5_flow_handle *
+mlx5_eswitch_add_send_to_vport_meta_rule(struct mlx5_eswitch *esw, u16 vport_num)
{
struct mlx5_flow_destination dest = {};
struct mlx5_flow_act flow_act = {0};
- int num_vfs, rule_idx = 0, err = 0;
struct mlx5_flow_handle *flow_rule;
- struct mlx5_flow_handle **flows;
struct mlx5_flow_spec *spec;
- struct mlx5_vport *vport;
- unsigned long i;
- u16 vport_num;
-
- num_vfs = esw->esw_funcs.num_vfs;
- flows = kvcalloc(num_vfs, sizeof(*flows), GFP_KERNEL);
- if (!flows)
- return -ENOMEM;
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
- if (!spec) {
- err = -ENOMEM;
- goto alloc_err;
- }
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
MLX5_SET(fte_match_param, spec->match_criteria,
misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask());
@@ -1116,34 +1089,18 @@ mlx5_eswitch_add_send_to_vport_meta_rules(struct mlx5_eswitch *esw)
dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) {
- vport_num = vport->vport;
- MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_0,
- mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num));
- dest.vport.num = vport_num;
-
- flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
- spec, &flow_act, &dest, 1);
- if (IS_ERR(flow_rule)) {
- err = PTR_ERR(flow_rule);
- esw_warn(esw->dev, "FDB: Failed to add send to vport meta rule idx %d, err %ld\n",
- rule_idx, PTR_ERR(flow_rule));
- goto rule_err;
- }
- flows[rule_idx++] = flow_rule;
- }
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num));
+ dest.vport.num = vport_num;
- esw->fdb_table.offloads.send_to_vport_meta_rules = flows;
- kvfree(spec);
- return 0;
+ flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
+ spec, &flow_act, &dest, 1);
+ if (IS_ERR(flow_rule))
+ esw_warn(esw->dev, "FDB: Failed to add send to vport meta rule vport %d, err %ld\n",
+ vport_num, PTR_ERR(flow_rule));
-rule_err:
- while (--rule_idx >= 0)
- mlx5_del_flow_rules(flows[rule_idx]);
kvfree(spec);
-alloc_err:
- kvfree(flows);
- return err;
+ return flow_rule;
}
static bool mlx5_eswitch_reg_c1_loopback_supported(struct mlx5_eswitch *esw)
@@ -1668,18 +1625,200 @@ esw_chains_destroy(struct mlx5_eswitch *esw, struct mlx5_fs_chains *chains)
#endif
+static int
+esw_create_send_to_vport_group(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *fdb,
+ u32 *flow_group_in,
+ int *ix)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *g;
+ void *match_criteria;
+ int count, err = 0;
+
+ memset(flow_group_in, 0, inlen);
+
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS);
+
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
+
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) {
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_eswitch_owner_vhca_id);
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ source_eswitch_owner_vhca_id_valid, 1);
+ }
+
+ /* See comment at table_size calculation */
+ count = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, *ix + count - 1);
+ *ix += count;
+
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(esw->dev, "Failed to create send-to-vport flow group err(%d)\n", err);
+ goto out;
+ }
+ esw->fdb_table.offloads.send_to_vport_grp = g;
+
+out:
+ return err;
+}
+
+static int
+esw_create_meta_send_to_vport_group(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *fdb,
+ u32 *flow_group_in,
+ int *ix)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *g;
+ void *match_criteria;
+ int err = 0;
+
+ if (!esw_src_port_rewrite_supported(esw))
+ return 0;
+
+ memset(flow_group_in, 0, inlen);
+
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS_2);
+
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
+
+ MLX5_SET(fte_match_param, match_criteria,
+ misc_parameters_2.metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
+ MLX5_SET(fte_match_param, match_criteria,
+ misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK);
+
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, *ix);
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ end_flow_index, *ix + esw->total_vports - 1);
+ *ix += esw->total_vports;
+
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(esw->dev,
+ "Failed to create send-to-vport meta flow group err(%d)\n", err);
+ goto send_vport_meta_err;
+ }
+ esw->fdb_table.offloads.send_to_vport_meta_grp = g;
+
+ return 0;
+
+send_vport_meta_err:
+ return err;
+}
+
+static int
+esw_create_peer_esw_miss_group(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *fdb,
+ u32 *flow_group_in,
+ int *ix)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *g;
+ void *match_criteria;
+ int err = 0;
+
+ if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ return 0;
+
+ memset(flow_group_in, 0, inlen);
+
+ esw_set_flow_group_source_port(esw, flow_group_in);
+
+ if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+ flow_group_in,
+ match_criteria);
+
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_eswitch_owner_vhca_id);
+
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ source_eswitch_owner_vhca_id_valid, 1);
+ }
+
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, *ix);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
+ *ix + esw->total_vports - 1);
+ *ix += esw->total_vports;
+
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(esw->dev, "Failed to create peer miss flow group err(%d)\n", err);
+ goto out;
+ }
+ esw->fdb_table.offloads.peer_miss_grp = g;
+
+out:
+ return err;
+}
+
+static int
+esw_create_miss_group(struct mlx5_eswitch *esw,
+ struct mlx5_flow_table *fdb,
+ u32 *flow_group_in,
+ int *ix)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *g;
+ void *match_criteria;
+ int err = 0;
+ u8 *dmac;
+
+ memset(flow_group_in, 0, inlen);
+
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS);
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+ match_criteria);
+ dmac = MLX5_ADDR_OF(fte_match_param, match_criteria,
+ outer_headers.dmac_47_16);
+ dmac[0] = 0x01;
+
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, *ix);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
+ *ix + MLX5_ESW_MISS_FLOWS);
+
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(esw->dev, "Failed to create miss flow group err(%d)\n", err);
+ goto miss_err;
+ }
+ esw->fdb_table.offloads.miss_grp = g;
+
+ err = esw_add_fdb_miss_rule(esw);
+ if (err)
+ goto miss_rule_err;
+
+ return 0;
+
+miss_rule_err:
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
+miss_err:
+ return err;
+}
+
static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct mlx5_flow_table_attr ft_attr = {};
- int num_vfs, table_size, ix, err = 0;
struct mlx5_core_dev *dev = esw->dev;
struct mlx5_flow_namespace *root_ns;
struct mlx5_flow_table *fdb = NULL;
+ int table_size, ix = 0, err = 0;
u32 flags = 0, *flow_group_in;
- struct mlx5_flow_group *g;
- void *match_criteria;
- u8 *dmac;
esw_debug(esw->dev, "Create offloads FDB Tables\n");
@@ -1713,7 +1852,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
* total vports of the peer (currently is also uses esw->total_vports).
*/
table_size = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ) +
- MLX5_ESW_MISS_FLOWS + esw->total_vports + esw->esw_funcs.num_vfs;
+ esw->total_vports * 2 + MLX5_ESW_MISS_FLOWS;
/* create the slow path fdb with encap set, so further table instances
* can be created at run time while VFs are probed if the FW allows that.
@@ -1754,139 +1893,29 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
goto fdb_chains_err;
}
- /* create send-to-vport group */
- MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
- MLX5_MATCH_MISC_PARAMETERS);
-
- match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
-
- MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn);
- MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
- if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) {
- MLX5_SET_TO_ONES(fte_match_param, match_criteria,
- misc_parameters.source_eswitch_owner_vhca_id);
- MLX5_SET(create_flow_group_in, flow_group_in,
- source_eswitch_owner_vhca_id_valid, 1);
- }
-
- /* See comment above table_size calculation */
- ix = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ);
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1);
-
- g = mlx5_create_flow_group(fdb, flow_group_in);
- if (IS_ERR(g)) {
- err = PTR_ERR(g);
- esw_warn(dev, "Failed to create send-to-vport flow group err(%d)\n", err);
+ err = esw_create_send_to_vport_group(esw, fdb, flow_group_in, &ix);
+ if (err)
goto send_vport_err;
- }
- esw->fdb_table.offloads.send_to_vport_grp = g;
-
- if (esw_src_port_rewrite_supported(esw)) {
- /* meta send to vport */
- memset(flow_group_in, 0, inlen);
- MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
- MLX5_MATCH_MISC_PARAMETERS_2);
-
- match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
-
- MLX5_SET(fte_match_param, match_criteria,
- misc_parameters_2.metadata_reg_c_0,
- mlx5_eswitch_get_vport_metadata_mask());
- MLX5_SET(fte_match_param, match_criteria,
- misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK);
-
- num_vfs = esw->esw_funcs.num_vfs;
- if (num_vfs) {
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
- MLX5_SET(create_flow_group_in, flow_group_in,
- end_flow_index, ix + num_vfs - 1);
- ix += num_vfs;
-
- g = mlx5_create_flow_group(fdb, flow_group_in);
- if (IS_ERR(g)) {
- err = PTR_ERR(g);
- esw_warn(dev, "Failed to create send-to-vport meta flow group err(%d)\n",
- err);
- goto send_vport_meta_err;
- }
- esw->fdb_table.offloads.send_to_vport_meta_grp = g;
-
- err = mlx5_eswitch_add_send_to_vport_meta_rules(esw);
- if (err)
- goto meta_rule_err;
- }
- }
-
- if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) {
- /* create peer esw miss group */
- memset(flow_group_in, 0, inlen);
-
- esw_set_flow_group_source_port(esw, flow_group_in);
-
- if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) {
- match_criteria = MLX5_ADDR_OF(create_flow_group_in,
- flow_group_in,
- match_criteria);
-
- MLX5_SET_TO_ONES(fte_match_param, match_criteria,
- misc_parameters.source_eswitch_owner_vhca_id);
-
- MLX5_SET(create_flow_group_in, flow_group_in,
- source_eswitch_owner_vhca_id_valid, 1);
- }
-
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
- ix + esw->total_vports - 1);
- ix += esw->total_vports;
-
- g = mlx5_create_flow_group(fdb, flow_group_in);
- if (IS_ERR(g)) {
- err = PTR_ERR(g);
- esw_warn(dev, "Failed to create peer miss flow group err(%d)\n", err);
- goto peer_miss_err;
- }
- esw->fdb_table.offloads.peer_miss_grp = g;
- }
-
- /* create miss group */
- memset(flow_group_in, 0, inlen);
- MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
- MLX5_MATCH_OUTER_HEADERS);
- match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
- match_criteria);
- dmac = MLX5_ADDR_OF(fte_match_param, match_criteria,
- outer_headers.dmac_47_16);
- dmac[0] = 0x01;
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
- ix + MLX5_ESW_MISS_FLOWS);
+ err = esw_create_meta_send_to_vport_group(esw, fdb, flow_group_in, &ix);
+ if (err)
+ goto send_vport_meta_err;
- g = mlx5_create_flow_group(fdb, flow_group_in);
- if (IS_ERR(g)) {
- err = PTR_ERR(g);
- esw_warn(dev, "Failed to create miss flow group err(%d)\n", err);
- goto miss_err;
- }
- esw->fdb_table.offloads.miss_grp = g;
+ err = esw_create_peer_esw_miss_group(esw, fdb, flow_group_in, &ix);
+ if (err)
+ goto peer_miss_err;
- err = esw_add_fdb_miss_rule(esw);
+ err = esw_create_miss_group(esw, fdb, flow_group_in, &ix);
if (err)
- goto miss_rule_err;
+ goto miss_err;
kvfree(flow_group_in);
return 0;
-miss_rule_err:
- mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
miss_err:
if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
peer_miss_err:
- mlx5_eswitch_del_send_to_vport_meta_rules(esw);
-meta_rule_err:
if (esw->fdb_table.offloads.send_to_vport_meta_grp)
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_meta_grp);
send_vport_meta_err:
@@ -1913,7 +1942,6 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
esw_debug(esw->dev, "Destroy offloads FDB Tables\n");
mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi);
mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
- mlx5_eswitch_del_send_to_vport_meta_rules(esw);
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
if (esw->fdb_table.offloads.send_to_vport_meta_grp)
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_meta_grp);
@@ -1931,7 +1959,7 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
atomic64_set(&esw->user_count, 0);
}
-static int esw_get_offloads_ft_size(struct mlx5_eswitch *esw)
+static int esw_get_nr_ft_offloads_steering_src_ports(struct mlx5_eswitch *esw)
{
int nvports;
@@ -1956,7 +1984,8 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw)
return -EOPNOTSUPP;
}
- ft_attr.max_fte = esw_get_offloads_ft_size(esw);
+ ft_attr.max_fte = esw_get_nr_ft_offloads_steering_src_ports(esw) +
+ MLX5_ESW_FT_OFFLOADS_DROP_RULE;
ft_attr.prio = 1;
ft_offloads = mlx5_create_flow_table(ns, &ft_attr);
@@ -1985,7 +2014,7 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw)
int nvports;
int err = 0;
- nvports = esw_get_offloads_ft_size(esw);
+ nvports = esw_get_nr_ft_offloads_steering_src_ports(esw);
flow_group_in = kvzalloc(inlen, GFP_KERNEL);
if (!flow_group_in)
return -ENOMEM;
@@ -2015,6 +2044,52 @@ static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw)
mlx5_destroy_flow_group(esw->offloads.vport_rx_group);
}
+static int esw_create_vport_rx_drop_rule_index(struct mlx5_eswitch *esw)
+{
+ /* ft_offloads table is enlarged by MLX5_ESW_FT_OFFLOADS_DROP_RULE (1)
+ * for the drop rule, which is placed at the end of the table.
+ * So return the total of vport and int_port as rule index.
+ */
+ return esw_get_nr_ft_offloads_steering_src_ports(esw);
+}
+
+static int esw_create_vport_rx_drop_group(struct mlx5_eswitch *esw)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *g;
+ u32 *flow_group_in;
+ int flow_index;
+ int err = 0;
+
+ flow_index = esw_create_vport_rx_drop_rule_index(esw);
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index);
+
+ g = mlx5_create_flow_group(esw->offloads.ft_offloads, flow_group_in);
+
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ mlx5_core_warn(esw->dev, "Failed to create vport rx drop group err %d\n", err);
+ goto out;
+ }
+
+ esw->offloads.vport_rx_drop_group = g;
+out:
+ kvfree(flow_group_in);
+ return err;
+}
+
+static void esw_destroy_vport_rx_drop_group(struct mlx5_eswitch *esw)
+{
+ if (esw->offloads.vport_rx_drop_group)
+ mlx5_destroy_flow_group(esw->offloads.vport_rx_drop_group);
+}
+
struct mlx5_flow_handle *
mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport,
struct mlx5_flow_destination *dest)
@@ -2063,6 +2138,32 @@ out:
return flow_rule;
}
+static int esw_create_vport_rx_drop_rule(struct mlx5_eswitch *esw)
+{
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *flow_rule;
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+ flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, NULL,
+ &flow_act, NULL, 0);
+ if (IS_ERR(flow_rule)) {
+ esw_warn(esw->dev,
+ "fs offloads: Failed to add vport rx drop rule err %ld\n",
+ PTR_ERR(flow_rule));
+ return PTR_ERR(flow_rule);
+ }
+
+ esw->offloads.vport_rx_drop_rule = flow_rule;
+
+ return 0;
+}
+
+static void esw_destroy_vport_rx_drop_rule(struct mlx5_eswitch *esw)
+{
+ if (esw->offloads.vport_rx_drop_rule)
+ mlx5_del_flow_rules(esw->offloads.vport_rx_drop_rule);
+}
+
static int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode)
{
u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2;
@@ -3063,8 +3164,20 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
if (err)
goto create_fg_err;
+ err = esw_create_vport_rx_drop_group(esw);
+ if (err)
+ goto create_rx_drop_fg_err;
+
+ err = esw_create_vport_rx_drop_rule(esw);
+ if (err)
+ goto create_rx_drop_rule_err;
+
return 0;
+create_rx_drop_rule_err:
+ esw_destroy_vport_rx_drop_group(esw);
+create_rx_drop_fg_err:
+ esw_destroy_vport_rx_group(esw);
create_fg_err:
esw_destroy_offloads_fdb_tables(esw);
create_fdb_err:
@@ -3082,6 +3195,8 @@ create_indir_err:
static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw)
{
+ esw_destroy_vport_rx_drop_rule(esw);
+ esw_destroy_vport_rx_drop_group(esw);
esw_destroy_vport_rx_group(esw);
esw_destroy_offloads_fdb_tables(esw);
esw_destroy_restore_table(esw);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
index ac3757beaea2..645214e4302b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
@@ -32,6 +32,7 @@
#include "en.h"
#include "ipoib.h"
+#include "en/fs_ethtool.h"
static void mlx5i_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *drvinfo)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index c02b7b08fb4c..35f797cfd21e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -35,6 +35,7 @@
#include "en.h"
#include "en/params.h"
#include "ipoib.h"
+#include "en/fs_ethtool.h"
#define IB_DEFAULT_Q_KEY 0xb1b
#define MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE 9
@@ -320,43 +321,47 @@ static void mlx5i_cleanup_tx(struct mlx5e_priv *priv)
static int mlx5i_create_flow_steering(struct mlx5e_priv *priv)
{
+ struct mlx5_flow_namespace *ns =
+ mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL);
int err;
- priv->fs->ns = mlx5_get_flow_namespace(priv->mdev,
- MLX5_FLOW_NAMESPACE_KERNEL);
- if (!priv->fs->ns)
+ if (!ns)
return -EINVAL;
- err = mlx5e_arfs_create_tables(priv);
+ mlx5e_fs_set_ns(priv->fs, ns, false);
+ err = mlx5e_arfs_create_tables(priv->fs, priv->rx_res,
+ !!(priv->netdev->hw_features & NETIF_F_NTUPLE));
if (err) {
netdev_err(priv->netdev, "Failed to create arfs tables, err=%d\n",
err);
priv->netdev->hw_features &= ~NETIF_F_NTUPLE;
}
- err = mlx5e_create_ttc_table(priv);
+ err = mlx5e_create_ttc_table(priv->fs, priv->rx_res);
if (err) {
netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n",
err);
goto err_destroy_arfs_tables;
}
- mlx5e_ethtool_init_steering(priv);
+ mlx5e_ethtool_init_steering(priv->fs);
return 0;
err_destroy_arfs_tables:
- mlx5e_arfs_destroy_tables(priv);
+ mlx5e_arfs_destroy_tables(priv->fs,
+ !!(priv->netdev->hw_features & NETIF_F_NTUPLE));
return err;
}
static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv)
{
- mlx5e_destroy_ttc_table(priv);
- mlx5e_arfs_destroy_tables(priv);
- mlx5e_ethtool_cleanup_steering(priv);
+ mlx5e_destroy_ttc_table(priv->fs);
+ mlx5e_arfs_destroy_tables(priv->fs,
+ !!(priv->netdev->hw_features & NETIF_F_NTUPLE));
+ mlx5e_ethtool_cleanup_steering(priv->fs);
}
static int mlx5i_init_rx(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 75553eb2c7f2..afbe046b35a0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -70,6 +70,8 @@ struct mlxsw_core {
struct workqueue_struct *emad_wq;
struct list_head rx_listener_list;
struct list_head event_listener_list;
+ struct list_head irq_event_handler_list;
+ struct mutex irq_event_handler_lock; /* Locks access to handlers list */
struct {
atomic64_t tid;
struct list_head trans_list;
@@ -1305,21 +1307,6 @@ mlxsw_devlink_sb_pool_set(struct devlink *devlink,
extack);
}
-static int mlxsw_devlink_port_type_set(struct devlink_port *devlink_port,
- enum devlink_port_type port_type)
-{
- struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink);
- struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
- struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port);
-
- if (!mlxsw_driver->port_type_set)
- return -EOPNOTSUPP;
-
- return mlxsw_driver->port_type_set(mlxsw_core,
- mlxsw_core_port->local_port,
- port_type);
-}
-
static int mlxsw_devlink_sb_port_pool_get(struct devlink_port *devlink_port,
unsigned int sb_index, u16 pool_index,
u32 *p_threshold)
@@ -1650,7 +1637,6 @@ static const struct devlink_ops mlxsw_devlink_ops = {
BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE),
.reload_down = mlxsw_devlink_core_bus_device_reload_down,
.reload_up = mlxsw_devlink_core_bus_device_reload_up,
- .port_type_set = mlxsw_devlink_port_type_set,
.port_split = mlxsw_devlink_port_split,
.port_unsplit = mlxsw_devlink_port_unsplit,
.sb_pool_get = mlxsw_devlink_sb_pool_get,
@@ -2090,6 +2076,18 @@ static void mlxsw_core_health_fini(struct mlxsw_core *mlxsw_core)
devlink_health_reporter_destroy(mlxsw_core->health.fw_fatal);
}
+static void mlxsw_core_irq_event_handler_init(struct mlxsw_core *mlxsw_core)
+{
+ INIT_LIST_HEAD(&mlxsw_core->irq_event_handler_list);
+ mutex_init(&mlxsw_core->irq_event_handler_lock);
+}
+
+static void mlxsw_core_irq_event_handler_fini(struct mlxsw_core *mlxsw_core)
+{
+ mutex_destroy(&mlxsw_core->irq_event_handler_lock);
+ WARN_ON(!list_empty(&mlxsw_core->irq_event_handler_list));
+}
+
static int
__mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
const struct mlxsw_bus *mlxsw_bus,
@@ -2125,6 +2123,7 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
mlxsw_core->bus = mlxsw_bus;
mlxsw_core->bus_priv = bus_priv;
mlxsw_core->bus_info = mlxsw_bus_info;
+ mlxsw_core_irq_event_handler_init(mlxsw_core);
err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile,
&mlxsw_core->res);
@@ -2233,6 +2232,7 @@ err_ports_init:
err_register_resources:
mlxsw_bus->fini(bus_priv);
err_bus_init:
+ mlxsw_core_irq_event_handler_fini(mlxsw_core);
if (!reload) {
devl_unlock(devlink);
devlink_free(devlink);
@@ -2302,6 +2302,7 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
if (!reload)
devl_resources_unregister(devlink);
mlxsw_core->bus->fini(mlxsw_core->bus_priv);
+ mlxsw_core_irq_event_handler_fini(mlxsw_core);
if (!reload) {
devl_unlock(devlink);
devlink_free(devlink);
@@ -2772,6 +2773,57 @@ int mlxsw_reg_trans_bulk_wait(struct list_head *bulk_list)
}
EXPORT_SYMBOL(mlxsw_reg_trans_bulk_wait);
+struct mlxsw_core_irq_event_handler_item {
+ struct list_head list;
+ void (*cb)(struct mlxsw_core *mlxsw_core);
+};
+
+int mlxsw_core_irq_event_handler_register(struct mlxsw_core *mlxsw_core,
+ mlxsw_irq_event_cb_t cb)
+{
+ struct mlxsw_core_irq_event_handler_item *item;
+
+ item = kzalloc(sizeof(*item), GFP_KERNEL);
+ if (!item)
+ return -ENOMEM;
+ item->cb = cb;
+ mutex_lock(&mlxsw_core->irq_event_handler_lock);
+ list_add_tail(&item->list, &mlxsw_core->irq_event_handler_list);
+ mutex_unlock(&mlxsw_core->irq_event_handler_lock);
+ return 0;
+}
+EXPORT_SYMBOL(mlxsw_core_irq_event_handler_register);
+
+void mlxsw_core_irq_event_handler_unregister(struct mlxsw_core *mlxsw_core,
+ mlxsw_irq_event_cb_t cb)
+{
+ struct mlxsw_core_irq_event_handler_item *item, *tmp;
+
+ mutex_lock(&mlxsw_core->irq_event_handler_lock);
+ list_for_each_entry_safe(item, tmp,
+ &mlxsw_core->irq_event_handler_list, list) {
+ if (item->cb == cb) {
+ list_del(&item->list);
+ kfree(item);
+ }
+ }
+ mutex_unlock(&mlxsw_core->irq_event_handler_lock);
+}
+EXPORT_SYMBOL(mlxsw_core_irq_event_handler_unregister);
+
+void mlxsw_core_irq_event_handlers_call(struct mlxsw_core *mlxsw_core)
+{
+ struct mlxsw_core_irq_event_handler_item *item;
+
+ mutex_lock(&mlxsw_core->irq_event_handler_lock);
+ list_for_each_entry(item, &mlxsw_core->irq_event_handler_list, list) {
+ if (item->cb)
+ item->cb(mlxsw_core);
+ }
+ mutex_unlock(&mlxsw_core->irq_event_handler_lock);
+}
+EXPORT_SYMBOL(mlxsw_core_irq_event_handlers_call);
+
static int mlxsw_core_reg_access_cmd(struct mlxsw_core *mlxsw_core,
const struct mlxsw_reg_info *reg,
char *payload,
@@ -3115,18 +3167,6 @@ void mlxsw_core_port_eth_set(struct mlxsw_core *mlxsw_core, u16 local_port,
}
EXPORT_SYMBOL(mlxsw_core_port_eth_set);
-void mlxsw_core_port_ib_set(struct mlxsw_core *mlxsw_core, u16 local_port,
- void *port_driver_priv)
-{
- struct mlxsw_core_port *mlxsw_core_port =
- &mlxsw_core->ports[local_port];
- struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port;
-
- mlxsw_core_port->port_driver_priv = port_driver_priv;
- devlink_port_type_ib_set(devlink_port, NULL);
-}
-EXPORT_SYMBOL(mlxsw_core_port_ib_set);
-
void mlxsw_core_port_clear(struct mlxsw_core *mlxsw_core, u16 local_port,
void *port_driver_priv)
{
@@ -3139,18 +3179,6 @@ void mlxsw_core_port_clear(struct mlxsw_core *mlxsw_core, u16 local_port,
}
EXPORT_SYMBOL(mlxsw_core_port_clear);
-enum devlink_port_type mlxsw_core_port_type_get(struct mlxsw_core *mlxsw_core,
- u16 local_port)
-{
- struct mlxsw_core_port *mlxsw_core_port =
- &mlxsw_core->ports[local_port];
- struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port;
-
- return devlink_port->type;
-}
-EXPORT_SYMBOL(mlxsw_core_port_type_get);
-
-
struct devlink_port *
mlxsw_core_port_devlink_port_get(struct mlxsw_core *mlxsw_core,
u16 local_port)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 02d9cc2ef0c8..9de9fa24f27c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -215,6 +215,14 @@ int mlxsw_reg_trans_write(struct mlxsw_core *mlxsw_core,
mlxsw_reg_trans_cb_t *cb, unsigned long cb_priv);
int mlxsw_reg_trans_bulk_wait(struct list_head *bulk_list);
+typedef void mlxsw_irq_event_cb_t(struct mlxsw_core *mlxsw_core);
+
+int mlxsw_core_irq_event_handler_register(struct mlxsw_core *mlxsw_core,
+ mlxsw_irq_event_cb_t cb);
+void mlxsw_core_irq_event_handler_unregister(struct mlxsw_core *mlxsw_core,
+ mlxsw_irq_event_cb_t cb);
+void mlxsw_core_irq_event_handlers_call(struct mlxsw_core *mlxsw_core);
+
int mlxsw_reg_query(struct mlxsw_core *mlxsw_core,
const struct mlxsw_reg_info *reg, char *payload);
int mlxsw_reg_write(struct mlxsw_core *mlxsw_core,
@@ -256,12 +264,8 @@ int mlxsw_core_cpu_port_init(struct mlxsw_core *mlxsw_core,
void mlxsw_core_cpu_port_fini(struct mlxsw_core *mlxsw_core);
void mlxsw_core_port_eth_set(struct mlxsw_core *mlxsw_core, u16 local_port,
void *port_driver_priv, struct net_device *dev);
-void mlxsw_core_port_ib_set(struct mlxsw_core *mlxsw_core, u16 local_port,
- void *port_driver_priv);
void mlxsw_core_port_clear(struct mlxsw_core *mlxsw_core, u16 local_port,
void *port_driver_priv);
-enum devlink_port_type mlxsw_core_port_type_get(struct mlxsw_core *mlxsw_core,
- u16 local_port);
struct devlink_port *
mlxsw_core_port_devlink_port_get(struct mlxsw_core *mlxsw_core,
u16 local_port);
@@ -341,8 +345,6 @@ struct mlxsw_driver {
const struct mlxsw_bus_info *mlxsw_bus_info,
struct netlink_ext_ack *extack);
void (*fini)(struct mlxsw_core *mlxsw_core);
- int (*port_type_set)(struct mlxsw_core *mlxsw_core, u16 local_port,
- enum devlink_port_type new_type);
int (*port_split)(struct mlxsw_core *mlxsw_core, u16 local_port,
unsigned int count, struct netlink_ext_ack *extack);
int (*port_unsplit)(struct mlxsw_core *mlxsw_core, u16 local_port,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c b/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c
index ca59f0b946da..83d2dc91ba2c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c
@@ -785,6 +785,21 @@ static int mlxsw_linecard_status_get_and_process(struct mlxsw_core *mlxsw_core,
return mlxsw_linecard_status_process(linecards, linecard, mddq_pl);
}
+static void mlxsw_linecards_irq_event_handler(struct mlxsw_core *mlxsw_core)
+{
+ struct mlxsw_linecards *linecards = mlxsw_core_linecards(mlxsw_core);
+ int i;
+
+ /* Handle change of line card active state. */
+ for (i = 0; i < linecards->count; i++) {
+ struct mlxsw_linecard *linecard = mlxsw_linecard_get(linecards,
+ i + 1);
+
+ mlxsw_linecard_status_get_and_process(mlxsw_core, linecards,
+ linecard);
+ }
+}
+
static const char * const mlxsw_linecard_status_event_type_name[] = {
[MLXSW_LINECARD_STATUS_EVENT_TYPE_PROVISION] = "provision",
[MLXSW_LINECARD_STATUS_EVENT_TYPE_UNPROVISION] = "unprovision",
@@ -1238,7 +1253,6 @@ static int mlxsw_linecard_init(struct mlxsw_core *mlxsw_core,
{
struct devlink_linecard *devlink_linecard;
struct mlxsw_linecard *linecard;
- int err;
linecard = mlxsw_linecard_get(linecards, slot_index);
linecard->slot_index = slot_index;
@@ -1248,17 +1262,45 @@ static int mlxsw_linecard_init(struct mlxsw_core *mlxsw_core,
devlink_linecard = devlink_linecard_create(priv_to_devlink(mlxsw_core),
slot_index, &mlxsw_linecard_ops,
linecard);
- if (IS_ERR(devlink_linecard)) {
- err = PTR_ERR(devlink_linecard);
- goto err_devlink_linecard_create;
- }
+ if (IS_ERR(devlink_linecard))
+ return PTR_ERR(devlink_linecard);
+
linecard->devlink_linecard = devlink_linecard;
INIT_DELAYED_WORK(&linecard->status_event_to_dw,
&mlxsw_linecard_status_event_to_work);
+ return 0;
+}
+
+static void mlxsw_linecard_fini(struct mlxsw_core *mlxsw_core,
+ struct mlxsw_linecards *linecards,
+ u8 slot_index)
+{
+ struct mlxsw_linecard *linecard;
+
+ linecard = mlxsw_linecard_get(linecards, slot_index);
+ cancel_delayed_work_sync(&linecard->status_event_to_dw);
+ /* Make sure all scheduled events are processed */
+ mlxsw_core_flush_owq();
+ if (linecard->active)
+ mlxsw_linecard_active_clear(linecard);
+ mlxsw_linecard_bdev_del(linecard);
+ devlink_linecard_destroy(linecard->devlink_linecard);
+ mutex_destroy(&linecard->lock);
+}
+
+static int
+mlxsw_linecard_event_delivery_init(struct mlxsw_core *mlxsw_core,
+ struct mlxsw_linecards *linecards,
+ u8 slot_index)
+{
+ struct mlxsw_linecard *linecard;
+ int err;
+
+ linecard = mlxsw_linecard_get(linecards, slot_index);
err = mlxsw_linecard_event_delivery_set(mlxsw_core, linecard, true);
if (err)
- goto err_event_delivery_set;
+ return err;
err = mlxsw_linecard_status_get_and_process(mlxsw_core, linecards,
linecard);
@@ -1269,29 +1311,18 @@ static int mlxsw_linecard_init(struct mlxsw_core *mlxsw_core,
err_status_get_and_process:
mlxsw_linecard_event_delivery_set(mlxsw_core, linecard, false);
-err_event_delivery_set:
- devlink_linecard_destroy(linecard->devlink_linecard);
-err_devlink_linecard_create:
- mutex_destroy(&linecard->lock);
return err;
}
-static void mlxsw_linecard_fini(struct mlxsw_core *mlxsw_core,
- struct mlxsw_linecards *linecards,
- u8 slot_index)
+static void
+mlxsw_linecard_event_delivery_fini(struct mlxsw_core *mlxsw_core,
+ struct mlxsw_linecards *linecards,
+ u8 slot_index)
{
struct mlxsw_linecard *linecard;
linecard = mlxsw_linecard_get(linecards, slot_index);
mlxsw_linecard_event_delivery_set(mlxsw_core, linecard, false);
- cancel_delayed_work_sync(&linecard->status_event_to_dw);
- /* Make sure all scheduled events are processed */
- mlxsw_core_flush_owq();
- if (linecard->active)
- mlxsw_linecard_active_clear(linecard);
- mlxsw_linecard_bdev_del(linecard);
- devlink_linecard_destroy(linecard->devlink_linecard);
- mutex_destroy(&linecard->lock);
}
/* LINECARDS INI BUNDLE FILE
@@ -1505,6 +1536,11 @@ int mlxsw_linecards_init(struct mlxsw_core *mlxsw_core,
if (err)
goto err_traps_register;
+ err = mlxsw_core_irq_event_handler_register(mlxsw_core,
+ mlxsw_linecards_irq_event_handler);
+ if (err)
+ goto err_irq_event_handler_register;
+
mlxsw_core_linecards_set(mlxsw_core, linecards);
for (i = 0; i < linecards->count; i++) {
@@ -1513,11 +1549,25 @@ int mlxsw_linecards_init(struct mlxsw_core *mlxsw_core,
goto err_linecard_init;
}
+ for (i = 0; i < linecards->count; i++) {
+ err = mlxsw_linecard_event_delivery_init(mlxsw_core, linecards,
+ i + 1);
+ if (err)
+ goto err_linecard_event_delivery_init;
+ }
+
return 0;
+err_linecard_event_delivery_init:
+ for (i--; i >= 0; i--)
+ mlxsw_linecard_event_delivery_fini(mlxsw_core, linecards, i + 1);
+ i = linecards->count;
err_linecard_init:
for (i--; i >= 0; i--)
mlxsw_linecard_fini(mlxsw_core, linecards, i + 1);
+ mlxsw_core_irq_event_handler_unregister(mlxsw_core,
+ mlxsw_linecards_irq_event_handler);
+err_irq_event_handler_register:
mlxsw_core_traps_unregister(mlxsw_core, mlxsw_linecard_listener,
ARRAY_SIZE(mlxsw_linecard_listener),
mlxsw_core);
@@ -1536,7 +1586,11 @@ void mlxsw_linecards_fini(struct mlxsw_core *mlxsw_core)
if (!linecards)
return;
for (i = 0; i < linecards->count; i++)
+ mlxsw_linecard_event_delivery_fini(mlxsw_core, linecards, i + 1);
+ for (i = 0; i < linecards->count; i++)
mlxsw_linecard_fini(mlxsw_core, linecards, i + 1);
+ mlxsw_core_irq_event_handler_unregister(mlxsw_core,
+ mlxsw_linecards_irq_event_handler);
mlxsw_core_traps_unregister(mlxsw_core, mlxsw_linecard_listener,
ARRAY_SIZE(mlxsw_linecard_listener),
mlxsw_core);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
index ce843ea91464..716c73e4fd59 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
@@ -9,6 +9,7 @@
#include <linux/mutex.h>
#include <linux/module.h>
#include <linux/mod_devicetable.h>
+#include <linux/platform_data/mlxreg.h>
#include <linux/slab.h>
#include "cmd.h"
@@ -51,6 +52,15 @@
#define MLXSW_I2C_TIMEOUT_MSECS 5000
#define MLXSW_I2C_MAX_DATA_SIZE 256
+/* Driver can be initialized by kernel platform driver or from the user
+ * space. In the first case IRQ line number is passed through the platform
+ * data, otherwise default IRQ line is to be used. Default IRQ is relevant
+ * only for specific I2C slave address, allowing 3.4 MHz I2C path to the chip
+ * (special hardware feature for I2C acceleration).
+ */
+#define MLXSW_I2C_DEFAULT_IRQ 17
+#define MLXSW_FAST_I2C_SLAVE 0x37
+
/**
* struct mlxsw_i2c - device private data:
* @cmd: command attributes;
@@ -63,6 +73,9 @@
* @core: switch core pointer;
* @bus_info: bus info block;
* @block_size: maximum block size allowed to pass to under layer;
+ * @pdata: device platform data;
+ * @irq_work: interrupts work item;
+ * @irq: IRQ line number;
*/
struct mlxsw_i2c {
struct {
@@ -76,6 +89,9 @@ struct mlxsw_i2c {
struct mlxsw_core *core;
struct mlxsw_bus_info bus_info;
u16 block_size;
+ struct mlxreg_core_hotplug_platform_data *pdata;
+ struct work_struct irq_work;
+ int irq;
};
#define MLXSW_I2C_READ_MSG(_client, _addr_buf, _buf, _len) { \
@@ -546,6 +562,67 @@ static void mlxsw_i2c_fini(void *bus_priv)
mlxsw_i2c->core = NULL;
}
+static void mlxsw_i2c_work_handler(struct work_struct *work)
+{
+ struct mlxsw_i2c *mlxsw_i2c;
+
+ mlxsw_i2c = container_of(work, struct mlxsw_i2c, irq_work);
+ mlxsw_core_irq_event_handlers_call(mlxsw_i2c->core);
+}
+
+static irqreturn_t mlxsw_i2c_irq_handler(int irq, void *dev)
+{
+ struct mlxsw_i2c *mlxsw_i2c = dev;
+
+ mlxsw_core_schedule_work(&mlxsw_i2c->irq_work);
+
+ /* Interrupt handler shares IRQ line with 'main' interrupt handler.
+ * Return here IRQ_NONE, while main handler will return IRQ_HANDLED.
+ */
+ return IRQ_NONE;
+}
+
+static int mlxsw_i2c_irq_init(struct mlxsw_i2c *mlxsw_i2c, u8 addr)
+{
+ int err;
+
+ /* Initialize interrupt handler if system hotplug driver is reachable,
+ * otherwise interrupt line is not enabled and interrupts will not be
+ * raised to CPU. Also request_irq() call will be not valid.
+ */
+ if (!IS_REACHABLE(CONFIG_MLXREG_HOTPLUG))
+ return 0;
+
+ /* Set default interrupt line. */
+ if (mlxsw_i2c->pdata && mlxsw_i2c->pdata->irq)
+ mlxsw_i2c->irq = mlxsw_i2c->pdata->irq;
+ else if (addr == MLXSW_FAST_I2C_SLAVE)
+ mlxsw_i2c->irq = MLXSW_I2C_DEFAULT_IRQ;
+
+ if (!mlxsw_i2c->irq)
+ return 0;
+
+ INIT_WORK(&mlxsw_i2c->irq_work, mlxsw_i2c_work_handler);
+ err = request_irq(mlxsw_i2c->irq, mlxsw_i2c_irq_handler,
+ IRQF_TRIGGER_FALLING | IRQF_SHARED, "mlxsw-i2c",
+ mlxsw_i2c);
+ if (err) {
+ dev_err(mlxsw_i2c->bus_info.dev, "Failed to request irq: %d\n",
+ err);
+ return err;
+ }
+
+ return 0;
+}
+
+static void mlxsw_i2c_irq_fini(struct mlxsw_i2c *mlxsw_i2c)
+{
+ if (!IS_REACHABLE(CONFIG_MLXREG_HOTPLUG) || !mlxsw_i2c->irq)
+ return;
+ cancel_work_sync(&mlxsw_i2c->irq_work);
+ free_irq(mlxsw_i2c->irq, mlxsw_i2c);
+}
+
static const struct mlxsw_bus mlxsw_i2c_bus = {
.kind = "i2c",
.init = mlxsw_i2c_init,
@@ -638,17 +715,24 @@ static int mlxsw_i2c_probe(struct i2c_client *client,
mlxsw_i2c->bus_info.dev = &client->dev;
mlxsw_i2c->bus_info.low_frequency = true;
mlxsw_i2c->dev = &client->dev;
+ mlxsw_i2c->pdata = client->dev.platform_data;
+
+ err = mlxsw_i2c_irq_init(mlxsw_i2c, client->addr);
+ if (err)
+ goto errout;
err = mlxsw_core_bus_device_register(&mlxsw_i2c->bus_info,
&mlxsw_i2c_bus, mlxsw_i2c, false,
NULL, NULL);
if (err) {
dev_err(&client->dev, "Fail to register core bus\n");
- return err;
+ goto err_bus_device_register;
}
return 0;
+err_bus_device_register:
+ mlxsw_i2c_irq_fini(mlxsw_i2c);
errout:
mutex_destroy(&mlxsw_i2c->cmd.lock);
i2c_set_clientdata(client, NULL);
@@ -661,6 +745,7 @@ static int mlxsw_i2c_remove(struct i2c_client *client)
struct mlxsw_i2c *mlxsw_i2c = i2c_get_clientdata(client);
mlxsw_core_bus_device_unregister(mlxsw_i2c->core, false);
+ mlxsw_i2c_irq_fini(mlxsw_i2c);
mutex_destroy(&mlxsw_i2c->cmd.lock);
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
index bb1cd4bae82e..7d3fa2883e8b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
@@ -26,20 +26,29 @@ static const struct mlxsw_fw_rev mlxsw_m_fw_rev = {
struct mlxsw_m_port;
+struct mlxsw_m_line_card {
+ bool active;
+ int module_to_port[];
+};
+
struct mlxsw_m {
struct mlxsw_m_port **ports;
- int *module_to_port;
struct mlxsw_core *core;
const struct mlxsw_bus_info *bus_info;
u8 base_mac[ETH_ALEN];
u8 max_ports;
+ u8 max_modules_per_slot; /* Maximum number of modules per-slot. */
+ u8 num_of_slots; /* Including the main board. */
+ struct mlxsw_m_line_card **line_cards;
};
struct mlxsw_m_port {
struct net_device *dev;
struct mlxsw_m *mlxsw_m;
u16 local_port;
+ u8 slot_index;
u8 module;
+ u8 module_offset;
};
static int mlxsw_m_base_mac_get(struct mlxsw_m *mlxsw_m)
@@ -111,8 +120,9 @@ static int mlxsw_m_get_module_info(struct net_device *netdev,
struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev);
struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core;
- return mlxsw_env_get_module_info(netdev, core, 0, mlxsw_m_port->module,
- modinfo);
+ return mlxsw_env_get_module_info(netdev, core,
+ mlxsw_m_port->slot_index,
+ mlxsw_m_port->module, modinfo);
}
static int
@@ -122,7 +132,8 @@ mlxsw_m_get_module_eeprom(struct net_device *netdev, struct ethtool_eeprom *ee,
struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev);
struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core;
- return mlxsw_env_get_module_eeprom(netdev, core, 0,
+ return mlxsw_env_get_module_eeprom(netdev, core,
+ mlxsw_m_port->slot_index,
mlxsw_m_port->module, ee, data);
}
@@ -134,7 +145,8 @@ mlxsw_m_get_module_eeprom_by_page(struct net_device *netdev,
struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev);
struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core;
- return mlxsw_env_get_module_eeprom_by_page(core, 0,
+ return mlxsw_env_get_module_eeprom_by_page(core,
+ mlxsw_m_port->slot_index,
mlxsw_m_port->module,
page, extack);
}
@@ -144,7 +156,8 @@ static int mlxsw_m_reset(struct net_device *netdev, u32 *flags)
struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev);
struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core;
- return mlxsw_env_reset_module(netdev, core, 0, mlxsw_m_port->module,
+ return mlxsw_env_reset_module(netdev, core, mlxsw_m_port->slot_index,
+ mlxsw_m_port->module,
flags);
}
@@ -156,7 +169,8 @@ mlxsw_m_get_module_power_mode(struct net_device *netdev,
struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev);
struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core;
- return mlxsw_env_get_module_power_mode(core, 0, mlxsw_m_port->module,
+ return mlxsw_env_get_module_power_mode(core, mlxsw_m_port->slot_index,
+ mlxsw_m_port->module,
params, extack);
}
@@ -168,7 +182,8 @@ mlxsw_m_set_module_power_mode(struct net_device *netdev,
struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev);
struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core;
- return mlxsw_env_set_module_power_mode(core, 0, mlxsw_m_port->module,
+ return mlxsw_env_set_module_power_mode(core, mlxsw_m_port->slot_index,
+ mlxsw_m_port->module,
params->policy, extack);
}
@@ -184,7 +199,7 @@ static const struct ethtool_ops mlxsw_m_port_ethtool_ops = {
static int
mlxsw_m_port_module_info_get(struct mlxsw_m *mlxsw_m, u16 local_port,
- u8 *p_module, u8 *p_width)
+ u8 *p_module, u8 *p_width, u8 *p_slot_index)
{
char pmlp_pl[MLXSW_REG_PMLP_LEN];
int err;
@@ -195,6 +210,7 @@ mlxsw_m_port_module_info_get(struct mlxsw_m *mlxsw_m, u16 local_port,
return err;
*p_module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0);
*p_width = mlxsw_reg_pmlp_width_get(pmlp_pl);
+ *p_slot_index = mlxsw_reg_pmlp_slot_index_get(pmlp_pl, 0);
return 0;
}
@@ -212,18 +228,25 @@ mlxsw_m_port_dev_addr_get(struct mlxsw_m_port *mlxsw_m_port)
if (err)
return err;
mlxsw_reg_ppad_mac_memcpy_from(ppad_pl, addr);
- eth_hw_addr_gen(mlxsw_m_port->dev, addr, mlxsw_m_port->module + 1);
+ eth_hw_addr_gen(mlxsw_m_port->dev, addr, mlxsw_m_port->module + 1 +
+ mlxsw_m_port->module_offset);
return 0;
}
+static bool mlxsw_m_port_created(struct mlxsw_m *mlxsw_m, u16 local_port)
+{
+ return mlxsw_m->ports[local_port];
+}
+
static int
-mlxsw_m_port_create(struct mlxsw_m *mlxsw_m, u16 local_port, u8 module)
+mlxsw_m_port_create(struct mlxsw_m *mlxsw_m, u16 local_port, u8 slot_index,
+ u8 module)
{
struct mlxsw_m_port *mlxsw_m_port;
struct net_device *dev;
int err;
- err = mlxsw_core_port_init(mlxsw_m->core, local_port, 0,
+ err = mlxsw_core_port_init(mlxsw_m->core, local_port, slot_index,
module + 1, false, 0, false,
0, mlxsw_m->base_mac,
sizeof(mlxsw_m->base_mac));
@@ -246,6 +269,15 @@ mlxsw_m_port_create(struct mlxsw_m *mlxsw_m, u16 local_port, u8 module)
mlxsw_m_port->mlxsw_m = mlxsw_m;
mlxsw_m_port->local_port = local_port;
mlxsw_m_port->module = module;
+ mlxsw_m_port->slot_index = slot_index;
+ /* Add module offset for line card. Offset for main board iz zero.
+ * For line card in slot #n offset is calculated as (#n - 1)
+ * multiplied by maximum modules number, which could be found on a line
+ * card.
+ */
+ mlxsw_m_port->module_offset = mlxsw_m_port->slot_index ?
+ (mlxsw_m_port->slot_index - 1) *
+ mlxsw_m->max_modules_per_slot : 0;
dev->netdev_ops = &mlxsw_m_port_netdev_ops;
dev->ethtool_ops = &mlxsw_m_port_ethtool_ops;
@@ -291,19 +323,29 @@ static void mlxsw_m_port_remove(struct mlxsw_m *mlxsw_m, u16 local_port)
mlxsw_core_port_fini(mlxsw_m->core, local_port);
}
+static int*
+mlxsw_m_port_mapping_get(struct mlxsw_m *mlxsw_m, u8 slot_index, u8 module)
+{
+ return &mlxsw_m->line_cards[slot_index]->module_to_port[module];
+}
+
static int mlxsw_m_port_module_map(struct mlxsw_m *mlxsw_m, u16 local_port,
u8 *last_module)
{
unsigned int max_ports = mlxsw_core_max_ports(mlxsw_m->core);
- u8 module, width;
+ u8 module, width, slot_index;
+ int *module_to_port;
int err;
/* Fill out to local port mapping array */
err = mlxsw_m_port_module_info_get(mlxsw_m, local_port, &module,
- &width);
+ &width, &slot_index);
if (err)
return err;
+ /* Skip if line card has been already configured */
+ if (mlxsw_m->line_cards[slot_index]->active)
+ return 0;
if (!width)
return 0;
/* Skip, if port belongs to the cluster */
@@ -313,91 +355,216 @@ static int mlxsw_m_port_module_map(struct mlxsw_m *mlxsw_m, u16 local_port,
if (WARN_ON_ONCE(module >= max_ports))
return -EINVAL;
- mlxsw_env_module_port_map(mlxsw_m->core, 0, module);
- mlxsw_m->module_to_port[module] = ++mlxsw_m->max_ports;
+ mlxsw_env_module_port_map(mlxsw_m->core, slot_index, module);
+ module_to_port = mlxsw_m_port_mapping_get(mlxsw_m, slot_index, module);
+ *module_to_port = local_port;
return 0;
}
-static void mlxsw_m_port_module_unmap(struct mlxsw_m *mlxsw_m, u8 module)
+static void
+mlxsw_m_port_module_unmap(struct mlxsw_m *mlxsw_m, u8 slot_index, u8 module)
{
- mlxsw_m->module_to_port[module] = -1;
- mlxsw_env_module_port_unmap(mlxsw_m->core, 0, module);
+ int *module_to_port = mlxsw_m_port_mapping_get(mlxsw_m, slot_index,
+ module);
+ *module_to_port = -1;
+ mlxsw_env_module_port_unmap(mlxsw_m->core, slot_index, module);
}
-static int mlxsw_m_ports_create(struct mlxsw_m *mlxsw_m)
+static int mlxsw_m_linecards_init(struct mlxsw_m *mlxsw_m)
{
unsigned int max_ports = mlxsw_core_max_ports(mlxsw_m->core);
- u8 last_module = max_ports;
- int i;
- int err;
+ char mgpir_pl[MLXSW_REG_MGPIR_LEN];
+ u8 num_of_modules;
+ int i, j, err;
+
+ mlxsw_reg_mgpir_pack(mgpir_pl, 0);
+ err = mlxsw_reg_query(mlxsw_m->core, MLXSW_REG(mgpir), mgpir_pl);
+ if (err)
+ return err;
+
+ mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL, &num_of_modules,
+ &mlxsw_m->num_of_slots);
+ /* If the system is modular, get the maximum number of modules per-slot.
+ * Otherwise, get the maximum number of modules on the main board.
+ */
+ if (mlxsw_m->num_of_slots)
+ mlxsw_m->max_modules_per_slot =
+ mlxsw_reg_mgpir_max_modules_per_slot_get(mgpir_pl);
+ else
+ mlxsw_m->max_modules_per_slot = num_of_modules;
+ /* Add slot for main board. */
+ mlxsw_m->num_of_slots += 1;
mlxsw_m->ports = kcalloc(max_ports, sizeof(*mlxsw_m->ports),
GFP_KERNEL);
if (!mlxsw_m->ports)
return -ENOMEM;
- mlxsw_m->module_to_port = kmalloc_array(max_ports, sizeof(int),
- GFP_KERNEL);
- if (!mlxsw_m->module_to_port) {
- err = -ENOMEM;
- goto err_module_to_port_alloc;
+ mlxsw_m->line_cards = kcalloc(mlxsw_m->num_of_slots,
+ sizeof(*mlxsw_m->line_cards),
+ GFP_KERNEL);
+ if (!mlxsw_m->line_cards)
+ goto err_kcalloc;
+
+ for (i = 0; i < mlxsw_m->num_of_slots; i++) {
+ mlxsw_m->line_cards[i] =
+ kzalloc(struct_size(mlxsw_m->line_cards[i],
+ module_to_port,
+ mlxsw_m->max_modules_per_slot),
+ GFP_KERNEL);
+ if (!mlxsw_m->line_cards[i])
+ goto err_kmalloc_array;
+
+ /* Invalidate the entries of module to local port mapping array. */
+ for (j = 0; j < mlxsw_m->max_modules_per_slot; j++)
+ mlxsw_m->line_cards[i]->module_to_port[j] = -1;
}
- /* Invalidate the entries of module to local port mapping array */
- for (i = 0; i < max_ports; i++)
- mlxsw_m->module_to_port[i] = -1;
+ return 0;
- /* Fill out module to local port mapping array */
- for (i = 1; i < max_ports; i++) {
- err = mlxsw_m_port_module_map(mlxsw_m, i, &last_module);
- if (err)
- goto err_module_to_port_map;
+err_kmalloc_array:
+ for (i--; i >= 0; i--)
+ kfree(mlxsw_m->line_cards[i]);
+err_kcalloc:
+ kfree(mlxsw_m->ports);
+ return err;
+}
+
+static void mlxsw_m_linecards_fini(struct mlxsw_m *mlxsw_m)
+{
+ int i = mlxsw_m->num_of_slots;
+
+ for (i--; i >= 0; i--)
+ kfree(mlxsw_m->line_cards[i]);
+ kfree(mlxsw_m->line_cards);
+ kfree(mlxsw_m->ports);
+}
+
+static void
+mlxsw_m_linecard_port_module_unmap(struct mlxsw_m *mlxsw_m, u8 slot_index)
+{
+ int i;
+
+ for (i = mlxsw_m->max_modules_per_slot - 1; i >= 0; i--) {
+ int *module_to_port;
+
+ module_to_port = mlxsw_m_port_mapping_get(mlxsw_m, slot_index, i);
+ if (*module_to_port > 0)
+ mlxsw_m_port_module_unmap(mlxsw_m, slot_index, i);
}
+}
- /* Create port objects for each valid entry */
- for (i = 0; i < mlxsw_m->max_ports; i++) {
- if (mlxsw_m->module_to_port[i] > 0) {
- err = mlxsw_m_port_create(mlxsw_m,
- mlxsw_m->module_to_port[i],
- i);
+static int
+mlxsw_m_linecard_ports_create(struct mlxsw_m *mlxsw_m, u8 slot_index)
+{
+ int *module_to_port;
+ int i, err;
+
+ for (i = 0; i < mlxsw_m->max_modules_per_slot; i++) {
+ module_to_port = mlxsw_m_port_mapping_get(mlxsw_m, slot_index, i);
+ if (*module_to_port > 0) {
+ err = mlxsw_m_port_create(mlxsw_m, *module_to_port,
+ slot_index, i);
if (err)
- goto err_module_to_port_create;
+ goto err_port_create;
+ /* Mark slot as active */
+ if (!mlxsw_m->line_cards[slot_index]->active)
+ mlxsw_m->line_cards[slot_index]->active = true;
}
}
-
return 0;
-err_module_to_port_create:
+err_port_create:
for (i--; i >= 0; i--) {
- if (mlxsw_m->module_to_port[i] > 0)
- mlxsw_m_port_remove(mlxsw_m,
- mlxsw_m->module_to_port[i]);
+ module_to_port = mlxsw_m_port_mapping_get(mlxsw_m, slot_index, i);
+ if (*module_to_port > 0 &&
+ mlxsw_m_port_created(mlxsw_m, *module_to_port)) {
+ mlxsw_m_port_remove(mlxsw_m, *module_to_port);
+ /* Mark slot as inactive */
+ if (mlxsw_m->line_cards[slot_index]->active)
+ mlxsw_m->line_cards[slot_index]->active = false;
+ }
}
- i = max_ports;
-err_module_to_port_map:
- for (i--; i > 0; i--)
- mlxsw_m_port_module_unmap(mlxsw_m, i);
- kfree(mlxsw_m->module_to_port);
-err_module_to_port_alloc:
- kfree(mlxsw_m->ports);
return err;
}
-static void mlxsw_m_ports_remove(struct mlxsw_m *mlxsw_m)
+static void
+mlxsw_m_linecard_ports_remove(struct mlxsw_m *mlxsw_m, u8 slot_index)
{
int i;
- for (i = 0; i < mlxsw_m->max_ports; i++) {
- if (mlxsw_m->module_to_port[i] > 0) {
- mlxsw_m_port_remove(mlxsw_m,
- mlxsw_m->module_to_port[i]);
- mlxsw_m_port_module_unmap(mlxsw_m, i);
+ for (i = 0; i < mlxsw_m->max_modules_per_slot; i++) {
+ int *module_to_port = mlxsw_m_port_mapping_get(mlxsw_m,
+ slot_index, i);
+
+ if (*module_to_port > 0 &&
+ mlxsw_m_port_created(mlxsw_m, *module_to_port)) {
+ mlxsw_m_port_remove(mlxsw_m, *module_to_port);
+ mlxsw_m_port_module_unmap(mlxsw_m, slot_index, i);
}
}
+}
- kfree(mlxsw_m->module_to_port);
- kfree(mlxsw_m->ports);
+static int mlxsw_m_ports_module_map(struct mlxsw_m *mlxsw_m)
+{
+ unsigned int max_ports = mlxsw_core_max_ports(mlxsw_m->core);
+ u8 last_module = max_ports;
+ int i, err;
+
+ for (i = 1; i < max_ports; i++) {
+ err = mlxsw_m_port_module_map(mlxsw_m, i, &last_module);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int mlxsw_m_ports_create(struct mlxsw_m *mlxsw_m)
+{
+ int err;
+
+ /* Fill out module to local port mapping array */
+ err = mlxsw_m_ports_module_map(mlxsw_m);
+ if (err)
+ goto err_ports_module_map;
+
+ /* Create port objects for each valid entry */
+ err = mlxsw_m_linecard_ports_create(mlxsw_m, 0);
+ if (err)
+ goto err_linecard_ports_create;
+
+ return 0;
+
+err_linecard_ports_create:
+err_ports_module_map:
+ mlxsw_m_linecard_port_module_unmap(mlxsw_m, 0);
+
+ return err;
+}
+
+static void mlxsw_m_ports_remove(struct mlxsw_m *mlxsw_m)
+{
+ mlxsw_m_linecard_ports_remove(mlxsw_m, 0);
+}
+
+static void
+mlxsw_m_ports_remove_selected(struct mlxsw_core *mlxsw_core,
+ bool (*selector)(void *priv, u16 local_port),
+ void *priv)
+{
+ struct mlxsw_m *mlxsw_m = mlxsw_core_driver_priv(mlxsw_core);
+ struct mlxsw_linecard *linecard_priv = priv;
+ struct mlxsw_m_line_card *linecard;
+
+ linecard = mlxsw_m->line_cards[linecard_priv->slot_index];
+
+ if (WARN_ON(!linecard->active))
+ return;
+
+ mlxsw_m_linecard_ports_remove(mlxsw_m, linecard_priv->slot_index);
+ linecard->active = false;
}
static int mlxsw_m_fw_rev_validate(struct mlxsw_m *mlxsw_m)
@@ -418,6 +585,60 @@ static int mlxsw_m_fw_rev_validate(struct mlxsw_m *mlxsw_m)
return -EINVAL;
}
+static void
+mlxsw_m_got_active(struct mlxsw_core *mlxsw_core, u8 slot_index, void *priv)
+{
+ struct mlxsw_m_line_card *linecard;
+ struct mlxsw_m *mlxsw_m = priv;
+ int err;
+
+ linecard = mlxsw_m->line_cards[slot_index];
+ /* Skip if line card has been already configured during init */
+ if (linecard->active)
+ return;
+
+ /* Fill out module to local port mapping array */
+ err = mlxsw_m_ports_module_map(mlxsw_m);
+ if (err)
+ goto err_ports_module_map;
+
+ /* Create port objects for each valid entry */
+ err = mlxsw_m_linecard_ports_create(mlxsw_m, slot_index);
+ if (err) {
+ dev_err(mlxsw_m->bus_info->dev, "Failed to create port for line card at slot %d\n",
+ slot_index);
+ goto err_linecard_ports_create;
+ }
+
+ linecard->active = true;
+
+ return;
+
+err_linecard_ports_create:
+err_ports_module_map:
+ mlxsw_m_linecard_port_module_unmap(mlxsw_m, slot_index);
+}
+
+static void
+mlxsw_m_got_inactive(struct mlxsw_core *mlxsw_core, u8 slot_index, void *priv)
+{
+ struct mlxsw_m_line_card *linecard;
+ struct mlxsw_m *mlxsw_m = priv;
+
+ linecard = mlxsw_m->line_cards[slot_index];
+
+ if (WARN_ON(!linecard->active))
+ return;
+
+ mlxsw_m_linecard_ports_remove(mlxsw_m, slot_index);
+ linecard->active = false;
+}
+
+static struct mlxsw_linecards_event_ops mlxsw_m_event_ops = {
+ .got_active = mlxsw_m_got_active,
+ .got_inactive = mlxsw_m_got_inactive,
+};
+
static int mlxsw_m_init(struct mlxsw_core *mlxsw_core,
const struct mlxsw_bus_info *mlxsw_bus_info,
struct netlink_ext_ack *extack)
@@ -438,13 +659,33 @@ static int mlxsw_m_init(struct mlxsw_core *mlxsw_core,
return err;
}
+ err = mlxsw_m_linecards_init(mlxsw_m);
+ if (err) {
+ dev_err(mlxsw_m->bus_info->dev, "Failed to create line cards\n");
+ return err;
+ }
+
+ err = mlxsw_linecards_event_ops_register(mlxsw_core,
+ &mlxsw_m_event_ops, mlxsw_m);
+ if (err) {
+ dev_err(mlxsw_m->bus_info->dev, "Failed to register line cards operations\n");
+ goto linecards_event_ops_register;
+ }
+
err = mlxsw_m_ports_create(mlxsw_m);
if (err) {
dev_err(mlxsw_m->bus_info->dev, "Failed to create ports\n");
- return err;
+ goto err_ports_create;
}
return 0;
+
+err_ports_create:
+ mlxsw_linecards_event_ops_unregister(mlxsw_core,
+ &mlxsw_m_event_ops, mlxsw_m);
+linecards_event_ops_register:
+ mlxsw_m_linecards_fini(mlxsw_m);
+ return err;
}
static void mlxsw_m_fini(struct mlxsw_core *mlxsw_core)
@@ -452,6 +693,9 @@ static void mlxsw_m_fini(struct mlxsw_core *mlxsw_core)
struct mlxsw_m *mlxsw_m = mlxsw_core_driver_priv(mlxsw_core);
mlxsw_m_ports_remove(mlxsw_m);
+ mlxsw_linecards_event_ops_unregister(mlxsw_core,
+ &mlxsw_m_event_ops, mlxsw_m);
+ mlxsw_m_linecards_fini(mlxsw_m);
}
static const struct mlxsw_config_profile mlxsw_m_config_profile;
@@ -461,6 +705,7 @@ static struct mlxsw_driver mlxsw_m_driver = {
.priv_size = sizeof(struct mlxsw_m),
.init = mlxsw_m_init,
.fini = mlxsw_m_fini,
+ .ports_remove_selected = mlxsw_m_ports_remove_selected,
.profile = &mlxsw_m_config_profile,
};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index f27bdecdf952..d71d7f9a20f1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -4729,25 +4729,6 @@ MLXSW_ITEM32(reg, ptys, ext_eth_proto_cap, 0x08, 0, 32);
*/
MLXSW_ITEM32(reg, ptys, eth_proto_cap, 0x0C, 0, 32);
-/* reg_ptys_ib_link_width_cap
- * IB port supported widths.
- * Access: RO
- */
-MLXSW_ITEM32(reg, ptys, ib_link_width_cap, 0x10, 16, 16);
-
-#define MLXSW_REG_PTYS_IB_SPEED_SDR BIT(0)
-#define MLXSW_REG_PTYS_IB_SPEED_DDR BIT(1)
-#define MLXSW_REG_PTYS_IB_SPEED_QDR BIT(2)
-#define MLXSW_REG_PTYS_IB_SPEED_FDR10 BIT(3)
-#define MLXSW_REG_PTYS_IB_SPEED_FDR BIT(4)
-#define MLXSW_REG_PTYS_IB_SPEED_EDR BIT(5)
-
-/* reg_ptys_ib_proto_cap
- * IB port supported speeds and protocols.
- * Access: RO
- */
-MLXSW_ITEM32(reg, ptys, ib_proto_cap, 0x10, 0, 16);
-
/* reg_ptys_ext_eth_proto_admin
* Extended speed and protocol to set port to.
* Access: RW
@@ -4760,18 +4741,6 @@ MLXSW_ITEM32(reg, ptys, ext_eth_proto_admin, 0x14, 0, 32);
*/
MLXSW_ITEM32(reg, ptys, eth_proto_admin, 0x18, 0, 32);
-/* reg_ptys_ib_link_width_admin
- * IB width to set port to.
- * Access: RW
- */
-MLXSW_ITEM32(reg, ptys, ib_link_width_admin, 0x1C, 16, 16);
-
-/* reg_ptys_ib_proto_admin
- * IB speeds and protocols to set port to.
- * Access: RW
- */
-MLXSW_ITEM32(reg, ptys, ib_proto_admin, 0x1C, 0, 16);
-
/* reg_ptys_ext_eth_proto_oper
* The extended current speed and protocol configured for the port.
* Access: RO
@@ -4784,18 +4753,6 @@ MLXSW_ITEM32(reg, ptys, ext_eth_proto_oper, 0x20, 0, 32);
*/
MLXSW_ITEM32(reg, ptys, eth_proto_oper, 0x24, 0, 32);
-/* reg_ptys_ib_link_width_oper
- * The current IB width to set port to.
- * Access: RO
- */
-MLXSW_ITEM32(reg, ptys, ib_link_width_oper, 0x28, 16, 16);
-
-/* reg_ptys_ib_proto_oper
- * The current IB speed and protocol.
- * Access: RO
- */
-MLXSW_ITEM32(reg, ptys, ib_proto_oper, 0x28, 0, 16);
-
enum mlxsw_reg_ptys_connector_type {
MLXSW_REG_PTYS_CONNECTOR_TYPE_UNKNOWN_OR_NO_CONNECTOR,
MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_NONE,
@@ -4866,33 +4823,6 @@ static inline void mlxsw_reg_ptys_ext_eth_unpack(char *payload,
mlxsw_reg_ptys_ext_eth_proto_oper_get(payload);
}
-static inline void mlxsw_reg_ptys_ib_pack(char *payload, u16 local_port,
- u16 proto_admin, u16 link_width)
-{
- MLXSW_REG_ZERO(ptys, payload);
- mlxsw_reg_ptys_local_port_set(payload, local_port);
- mlxsw_reg_ptys_proto_mask_set(payload, MLXSW_REG_PTYS_PROTO_MASK_IB);
- mlxsw_reg_ptys_ib_proto_admin_set(payload, proto_admin);
- mlxsw_reg_ptys_ib_link_width_admin_set(payload, link_width);
-}
-
-static inline void mlxsw_reg_ptys_ib_unpack(char *payload, u16 *p_ib_proto_cap,
- u16 *p_ib_link_width_cap,
- u16 *p_ib_proto_oper,
- u16 *p_ib_link_width_oper)
-{
- if (p_ib_proto_cap)
- *p_ib_proto_cap = mlxsw_reg_ptys_ib_proto_cap_get(payload);
- if (p_ib_link_width_cap)
- *p_ib_link_width_cap =
- mlxsw_reg_ptys_ib_link_width_cap_get(payload);
- if (p_ib_proto_oper)
- *p_ib_proto_oper = mlxsw_reg_ptys_ib_proto_oper_get(payload);
- if (p_ib_link_width_oper)
- *p_ib_link_width_oper =
- mlxsw_reg_ptys_ib_link_width_oper_get(payload);
-}
-
/* PPAD - Port Physical Address Register
* -------------------------------------
* The PPAD register configures the per port physical MAC address.
@@ -5666,27 +5596,6 @@ static inline void mlxsw_reg_ppcnt_pack(char *payload, u16 local_port,
mlxsw_reg_ppcnt_prio_tc_set(payload, prio_tc);
}
-/* PLIB - Port Local to InfiniBand Port
- * ------------------------------------
- * The PLIB register performs mapping from Local Port into InfiniBand Port.
- */
-#define MLXSW_REG_PLIB_ID 0x500A
-#define MLXSW_REG_PLIB_LEN 0x10
-
-MLXSW_REG_DEFINE(plib, MLXSW_REG_PLIB_ID, MLXSW_REG_PLIB_LEN);
-
-/* reg_plib_local_port
- * Local port number.
- * Access: Index
- */
-MLXSW_ITEM32_LP(reg, plib, 0x00, 16, 0x00, 12);
-
-/* reg_plib_ib_port
- * InfiniBand port remapping for local_port.
- * Access: RW
- */
-MLXSW_ITEM32(reg, plib, ib_port, 0x00, 0, 8);
-
/* PPTB - Port Prio To Buffer Register
* -----------------------------------
* Configures the switch priority to buffer table.
@@ -12962,7 +12871,6 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
MLXSW_REG(paos),
MLXSW_REG(pfcc),
MLXSW_REG(ppcnt),
- MLXSW_REG(plib),
MLXSW_REG(pptb),
MLXSW_REG(pbmc),
MLXSW_REG(pspa),
diff --git a/drivers/net/ethernet/micrel/ks8851_spi.c b/drivers/net/ethernet/micrel/ks8851_spi.c
index 82d55fc27edc..70bc7253454f 100644
--- a/drivers/net/ethernet/micrel/ks8851_spi.c
+++ b/drivers/net/ethernet/micrel/ks8851_spi.c
@@ -413,7 +413,8 @@ static int ks8851_probe_spi(struct spi_device *spi)
spi->bits_per_word = 8;
- ks = netdev_priv(netdev);
+ kss = netdev_priv(netdev);
+ ks = &kss->ks8851;
ks->lock = ks8851_lock_spi;
ks->unlock = ks8851_unlock_spi;
@@ -433,8 +434,6 @@ static int ks8851_probe_spi(struct spi_device *spi)
IRQ_RXPSI) /* RX process stop */
ks->rc_ier = STD_IRQ;
- kss = to_ks8851_spi(ks);
-
kss->spidev = spi;
mutex_init(&kss->lock);
INIT_WORK(&kss->tx_work, ks8851_tx_work);
diff --git a/drivers/net/ethernet/microchip/lan966x/Kconfig b/drivers/net/ethernet/microchip/lan966x/Kconfig
index 4241ff0e5098..49e1464a4313 100644
--- a/drivers/net/ethernet/microchip/lan966x/Kconfig
+++ b/drivers/net/ethernet/microchip/lan966x/Kconfig
@@ -4,6 +4,7 @@ config LAN966X_SWITCH
depends on HAS_IOMEM
depends on OF
depends on NET_SWITCHDEV
+ depends on BRIDGE || BRIDGE=n
select PHYLINK
select PACKING
help
diff --git a/drivers/net/ethernet/microchip/lan966x/Makefile b/drivers/net/ethernet/microchip/lan966x/Makefile
index fd2e0ebb2427..0c22c86bdaa9 100644
--- a/drivers/net/ethernet/microchip/lan966x/Makefile
+++ b/drivers/net/ethernet/microchip/lan966x/Makefile
@@ -8,4 +8,4 @@ obj-$(CONFIG_LAN966X_SWITCH) += lan966x-switch.o
lan966x-switch-objs := lan966x_main.o lan966x_phylink.o lan966x_port.o \
lan966x_mac.o lan966x_ethtool.o lan966x_switchdev.o \
lan966x_vlan.o lan966x_fdb.o lan966x_mdb.o \
- lan966x_ptp.o lan966x_fdma.o
+ lan966x_ptp.o lan966x_fdma.o lan966x_lag.o
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdb.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdb.c
index da5ca7188679..2ea263e893ee 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_fdb.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdb.c
@@ -8,6 +8,7 @@ struct lan966x_fdb_event_work {
struct work_struct work;
struct switchdev_notifier_fdb_info fdb_info;
struct net_device *dev;
+ struct net_device *orig_dev;
struct lan966x *lan966x;
unsigned long event;
};
@@ -127,75 +128,119 @@ void lan966x_fdb_deinit(struct lan966x *lan966x)
lan966x_fdb_purge_entries(lan966x);
}
-static void lan966x_fdb_event_work(struct work_struct *work)
+void lan966x_fdb_flush_workqueue(struct lan966x *lan966x)
+{
+ flush_workqueue(lan966x->fdb_work);
+}
+
+static void lan966x_fdb_port_event_work(struct lan966x_fdb_event_work *fdb_work)
{
- struct lan966x_fdb_event_work *fdb_work =
- container_of(work, struct lan966x_fdb_event_work, work);
struct switchdev_notifier_fdb_info *fdb_info;
- struct net_device *dev = fdb_work->dev;
struct lan966x_port *port;
struct lan966x *lan966x;
- int ret;
- fdb_info = &fdb_work->fdb_info;
lan966x = fdb_work->lan966x;
+ port = netdev_priv(fdb_work->orig_dev);
+ fdb_info = &fdb_work->fdb_info;
- if (lan966x_netdevice_check(dev)) {
- port = netdev_priv(dev);
+ switch (fdb_work->event) {
+ case SWITCHDEV_FDB_ADD_TO_DEVICE:
+ if (!fdb_info->added_by_user)
+ break;
+ lan966x_mac_add_entry(lan966x, port, fdb_info->addr,
+ fdb_info->vid);
+ break;
+ case SWITCHDEV_FDB_DEL_TO_DEVICE:
+ if (!fdb_info->added_by_user)
+ break;
+ lan966x_mac_del_entry(lan966x, fdb_info->addr,
+ fdb_info->vid);
+ break;
+ }
+}
+
+static void lan966x_fdb_bridge_event_work(struct lan966x_fdb_event_work *fdb_work)
+{
+ struct switchdev_notifier_fdb_info *fdb_info;
+ struct lan966x *lan966x;
+ int ret;
- switch (fdb_work->event) {
- case SWITCHDEV_FDB_ADD_TO_DEVICE:
- if (!fdb_info->added_by_user)
- break;
- lan966x_mac_add_entry(lan966x, port, fdb_info->addr,
- fdb_info->vid);
+ lan966x = fdb_work->lan966x;
+ fdb_info = &fdb_work->fdb_info;
+
+ /* In case the bridge is called */
+ switch (fdb_work->event) {
+ case SWITCHDEV_FDB_ADD_TO_DEVICE:
+ /* If there is no front port in this vlan, there is no
+ * point to copy the frame to CPU because it would be
+ * just dropped at later point. So add it only if
+ * there is a port but it is required to store the fdb
+ * entry for later point when a port actually gets in
+ * the vlan.
+ */
+ lan966x_fdb_add_entry(lan966x, fdb_info);
+ if (!lan966x_vlan_cpu_member_cpu_vlan_mask(lan966x,
+ fdb_info->vid))
break;
- case SWITCHDEV_FDB_DEL_TO_DEVICE:
- if (!fdb_info->added_by_user)
- break;
- lan966x_mac_del_entry(lan966x, fdb_info->addr,
- fdb_info->vid);
+
+ lan966x_mac_cpu_learn(lan966x, fdb_info->addr,
+ fdb_info->vid);
+ break;
+ case SWITCHDEV_FDB_DEL_TO_DEVICE:
+ ret = lan966x_fdb_del_entry(lan966x, fdb_info);
+ if (!lan966x_vlan_cpu_member_cpu_vlan_mask(lan966x,
+ fdb_info->vid))
break;
- }
- } else {
- if (!netif_is_bridge_master(dev))
- goto out;
-
- /* In case the bridge is called */
- switch (fdb_work->event) {
- case SWITCHDEV_FDB_ADD_TO_DEVICE:
- /* If there is no front port in this vlan, there is no
- * point to copy the frame to CPU because it would be
- * just dropped at later point. So add it only if
- * there is a port but it is required to store the fdb
- * entry for later point when a port actually gets in
- * the vlan.
- */
- lan966x_fdb_add_entry(lan966x, fdb_info);
- if (!lan966x_vlan_cpu_member_cpu_vlan_mask(lan966x,
- fdb_info->vid))
- break;
-
- lan966x_mac_cpu_learn(lan966x, fdb_info->addr,
- fdb_info->vid);
+
+ if (ret)
+ lan966x_mac_cpu_forget(lan966x, fdb_info->addr,
+ fdb_info->vid);
+ break;
+ }
+}
+
+static void lan966x_fdb_lag_event_work(struct lan966x_fdb_event_work *fdb_work)
+{
+ struct switchdev_notifier_fdb_info *fdb_info;
+ struct lan966x_port *port;
+ struct lan966x *lan966x;
+
+ if (!lan966x_lag_first_port(fdb_work->orig_dev, fdb_work->dev))
+ return;
+
+ lan966x = fdb_work->lan966x;
+ port = netdev_priv(fdb_work->dev);
+ fdb_info = &fdb_work->fdb_info;
+
+ switch (fdb_work->event) {
+ case SWITCHDEV_FDB_ADD_TO_DEVICE:
+ if (!fdb_info->added_by_user)
break;
- case SWITCHDEV_FDB_DEL_TO_DEVICE:
- ret = lan966x_fdb_del_entry(lan966x, fdb_info);
- if (!lan966x_vlan_cpu_member_cpu_vlan_mask(lan966x,
- fdb_info->vid))
- break;
-
- if (ret)
- lan966x_mac_cpu_forget(lan966x, fdb_info->addr,
- fdb_info->vid);
+ lan966x_mac_add_entry(lan966x, port, fdb_info->addr,
+ fdb_info->vid);
+ break;
+ case SWITCHDEV_FDB_DEL_TO_DEVICE:
+ if (!fdb_info->added_by_user)
break;
- }
+ lan966x_mac_del_entry(lan966x, fdb_info->addr, fdb_info->vid);
+ break;
}
+}
+
+static void lan966x_fdb_event_work(struct work_struct *work)
+{
+ struct lan966x_fdb_event_work *fdb_work =
+ container_of(work, struct lan966x_fdb_event_work, work);
+
+ if (lan966x_netdevice_check(fdb_work->orig_dev))
+ lan966x_fdb_port_event_work(fdb_work);
+ else if (netif_is_bridge_master(fdb_work->orig_dev))
+ lan966x_fdb_bridge_event_work(fdb_work);
+ else if (netif_is_lag_master(fdb_work->orig_dev))
+ lan966x_fdb_lag_event_work(fdb_work);
-out:
kfree(fdb_work->fdb_info.addr);
kfree(fdb_work);
- dev_put(dev);
}
int lan966x_handle_fdb(struct net_device *dev,
@@ -221,7 +266,8 @@ int lan966x_handle_fdb(struct net_device *dev,
if (!fdb_work)
return -ENOMEM;
- fdb_work->dev = orig_dev;
+ fdb_work->dev = dev;
+ fdb_work->orig_dev = orig_dev;
fdb_work->lan966x = lan966x;
fdb_work->event = event;
INIT_WORK(&fdb_work->work, lan966x_fdb_event_work);
@@ -231,7 +277,6 @@ int lan966x_handle_fdb(struct net_device *dev,
goto err_addr_alloc;
ether_addr_copy((u8 *)fdb_work->fdb_info.addr, fdb_info->addr);
- dev_hold(orig_dev);
queue_work(lan966x->fdb_work, &fdb_work->work);
break;
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c b/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c
new file mode 100644
index 000000000000..41fa2523d91d
--- /dev/null
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c
@@ -0,0 +1,363 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/if_bridge.h>
+
+#include "lan966x_main.h"
+
+static void lan966x_lag_set_aggr_pgids(struct lan966x *lan966x)
+{
+ u32 visited = GENMASK(lan966x->num_phys_ports - 1, 0);
+ int p, lag, i;
+
+ /* Reset destination and aggregation PGIDS */
+ for (p = 0; p < lan966x->num_phys_ports; ++p)
+ lan_wr(ANA_PGID_PGID_SET(BIT(p)),
+ lan966x, ANA_PGID(p));
+
+ for (p = PGID_AGGR; p < PGID_SRC; ++p)
+ lan_wr(ANA_PGID_PGID_SET(visited),
+ lan966x, ANA_PGID(p));
+
+ /* The visited ports bitmask holds the list of ports offloading any
+ * bonding interface. Initially we mark all these ports as unvisited,
+ * then every time we visit a port in this bitmask, we know that it is
+ * the lowest numbered port, i.e. the one whose logical ID == physical
+ * port ID == LAG ID. So we mark as visited all further ports in the
+ * bitmask that are offloading the same bonding interface. This way,
+ * we set up the aggregation PGIDs only once per bonding interface.
+ */
+ for (p = 0; p < lan966x->num_phys_ports; ++p) {
+ struct lan966x_port *port = lan966x->ports[p];
+
+ if (!port || !port->bond)
+ continue;
+
+ visited &= ~BIT(p);
+ }
+
+ /* Now, set PGIDs for each active LAG */
+ for (lag = 0; lag < lan966x->num_phys_ports; ++lag) {
+ struct net_device *bond = lan966x->ports[lag]->bond;
+ int num_active_ports = 0;
+ unsigned long bond_mask;
+ u8 aggr_idx[16];
+
+ if (!bond || (visited & BIT(lag)))
+ continue;
+
+ bond_mask = lan966x_lag_get_mask(lan966x, bond);
+
+ for_each_set_bit(p, &bond_mask, lan966x->num_phys_ports) {
+ struct lan966x_port *port = lan966x->ports[p];
+
+ lan_wr(ANA_PGID_PGID_SET(bond_mask),
+ lan966x, ANA_PGID(p));
+ if (port->lag_tx_active)
+ aggr_idx[num_active_ports++] = p;
+ }
+
+ for (i = PGID_AGGR; i < PGID_SRC; ++i) {
+ u32 ac;
+
+ ac = lan_rd(lan966x, ANA_PGID(i));
+ ac &= ~bond_mask;
+ /* Don't do division by zero if there was no active
+ * port. Just make all aggregation codes zero.
+ */
+ if (num_active_ports)
+ ac |= BIT(aggr_idx[i % num_active_ports]);
+ lan_wr(ANA_PGID_PGID_SET(ac),
+ lan966x, ANA_PGID(i));
+ }
+
+ /* Mark all ports in the same LAG as visited to avoid applying
+ * the same config again.
+ */
+ for (p = lag; p < lan966x->num_phys_ports; p++) {
+ struct lan966x_port *port = lan966x->ports[p];
+
+ if (!port)
+ continue;
+
+ if (port->bond == bond)
+ visited |= BIT(p);
+ }
+ }
+}
+
+static void lan966x_lag_set_port_ids(struct lan966x *lan966x)
+{
+ struct lan966x_port *port;
+ u32 bond_mask;
+ u32 lag_id;
+ int p;
+
+ for (p = 0; p < lan966x->num_phys_ports; ++p) {
+ port = lan966x->ports[p];
+ if (!port)
+ continue;
+
+ lag_id = port->chip_port;
+
+ bond_mask = lan966x_lag_get_mask(lan966x, port->bond);
+ if (bond_mask)
+ lag_id = __ffs(bond_mask);
+
+ lan_rmw(ANA_PORT_CFG_PORTID_VAL_SET(lag_id),
+ ANA_PORT_CFG_PORTID_VAL,
+ lan966x, ANA_PORT_CFG(port->chip_port));
+ }
+}
+
+static void lan966x_lag_update_ids(struct lan966x *lan966x)
+{
+ lan966x_lag_set_port_ids(lan966x);
+ lan966x_update_fwd_mask(lan966x);
+ lan966x_lag_set_aggr_pgids(lan966x);
+}
+
+int lan966x_lag_port_join(struct lan966x_port *port,
+ struct net_device *brport_dev,
+ struct net_device *bond,
+ struct netlink_ext_ack *extack)
+{
+ struct lan966x *lan966x = port->lan966x;
+ struct net_device *dev = port->dev;
+ u32 lag_id = -1;
+ u32 bond_mask;
+ int err;
+
+ bond_mask = lan966x_lag_get_mask(lan966x, bond);
+ if (bond_mask)
+ lag_id = __ffs(bond_mask);
+
+ port->bond = bond;
+ lan966x_lag_update_ids(lan966x);
+
+ err = switchdev_bridge_port_offload(brport_dev, dev, port,
+ &lan966x_switchdev_nb,
+ &lan966x_switchdev_blocking_nb,
+ false, extack);
+ if (err)
+ goto out;
+
+ lan966x_port_stp_state_set(port, br_port_get_stp_state(brport_dev));
+
+ if (lan966x_lag_first_port(port->bond, port->dev) &&
+ lag_id != -1)
+ lan966x_mac_lag_replace_port_entry(lan966x,
+ lan966x->ports[lag_id],
+ port);
+
+ return 0;
+
+out:
+ port->bond = NULL;
+ lan966x_lag_update_ids(lan966x);
+
+ return err;
+}
+
+void lan966x_lag_port_leave(struct lan966x_port *port, struct net_device *bond)
+{
+ struct lan966x *lan966x = port->lan966x;
+ u32 bond_mask;
+ u32 lag_id;
+
+ if (lan966x_lag_first_port(port->bond, port->dev)) {
+ bond_mask = lan966x_lag_get_mask(lan966x, port->bond);
+ bond_mask &= ~BIT(port->chip_port);
+ if (bond_mask) {
+ lag_id = __ffs(bond_mask);
+ lan966x_mac_lag_replace_port_entry(lan966x, port,
+ lan966x->ports[lag_id]);
+ } else {
+ lan966x_mac_lag_remove_port_entry(lan966x, port);
+ }
+ }
+
+ port->bond = NULL;
+ lan966x_lag_update_ids(lan966x);
+ lan966x_port_stp_state_set(port, BR_STATE_FORWARDING);
+}
+
+static bool lan966x_lag_port_check_hash_types(struct lan966x *lan966x,
+ enum netdev_lag_hash hash_type)
+{
+ int p;
+
+ for (p = 0; p < lan966x->num_phys_ports; ++p) {
+ struct lan966x_port *port = lan966x->ports[p];
+
+ if (!port || !port->bond)
+ continue;
+
+ if (port->hash_type != hash_type)
+ return false;
+ }
+
+ return true;
+}
+
+int lan966x_lag_port_prechangeupper(struct net_device *dev,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct lan966x_port *port = netdev_priv(dev);
+ struct lan966x *lan966x = port->lan966x;
+ struct netdev_lag_upper_info *lui;
+ struct netlink_ext_ack *extack;
+
+ extack = netdev_notifier_info_to_extack(&info->info);
+ lui = info->upper_info;
+ if (!lui) {
+ port->hash_type = NETDEV_LAG_HASH_NONE;
+ return NOTIFY_DONE;
+ }
+
+ if (lui->tx_type != NETDEV_LAG_TX_TYPE_HASH) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "LAG device using unsupported Tx type");
+ return -EINVAL;
+ }
+
+ if (!lan966x_lag_port_check_hash_types(lan966x, lui->hash_type)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "LAG devices can have only the same hash_type");
+ return -EINVAL;
+ }
+
+ switch (lui->hash_type) {
+ case NETDEV_LAG_HASH_L2:
+ lan_wr(ANA_AGGR_CFG_AC_DMAC_ENA_SET(1) |
+ ANA_AGGR_CFG_AC_SMAC_ENA_SET(1),
+ lan966x, ANA_AGGR_CFG);
+ break;
+ case NETDEV_LAG_HASH_L34:
+ lan_wr(ANA_AGGR_CFG_AC_IP6_TCPUDP_ENA_SET(1) |
+ ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA_SET(1) |
+ ANA_AGGR_CFG_AC_IP4_SIPDIP_ENA_SET(1),
+ lan966x, ANA_AGGR_CFG);
+ break;
+ case NETDEV_LAG_HASH_L23:
+ lan_wr(ANA_AGGR_CFG_AC_DMAC_ENA_SET(1) |
+ ANA_AGGR_CFG_AC_SMAC_ENA_SET(1) |
+ ANA_AGGR_CFG_AC_IP6_TCPUDP_ENA_SET(1) |
+ ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA_SET(1),
+ lan966x, ANA_AGGR_CFG);
+ break;
+ default:
+ NL_SET_ERR_MSG_MOD(extack,
+ "LAG device using unsupported hash type");
+ return -EINVAL;
+ }
+
+ port->hash_type = lui->hash_type;
+
+ return NOTIFY_OK;
+}
+
+int lan966x_lag_port_changelowerstate(struct net_device *dev,
+ struct netdev_notifier_changelowerstate_info *info)
+{
+ struct netdev_lag_lower_state_info *lag = info->lower_state_info;
+ struct lan966x_port *port = netdev_priv(dev);
+ struct lan966x *lan966x = port->lan966x;
+ bool is_active;
+
+ if (!port->bond)
+ return NOTIFY_DONE;
+
+ is_active = lag->link_up && lag->tx_enabled;
+ if (port->lag_tx_active == is_active)
+ return NOTIFY_DONE;
+
+ port->lag_tx_active = is_active;
+ lan966x_lag_set_aggr_pgids(lan966x);
+
+ return NOTIFY_OK;
+}
+
+int lan966x_lag_netdev_prechangeupper(struct net_device *dev,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct lan966x_port *port;
+ struct net_device *lower;
+ struct list_head *iter;
+ int err;
+
+ netdev_for_each_lower_dev(dev, lower, iter) {
+ if (!lan966x_netdevice_check(lower))
+ continue;
+
+ port = netdev_priv(lower);
+ if (port->bond != dev)
+ continue;
+
+ err = lan966x_port_prechangeupper(lower, dev, info);
+ if (err)
+ return err;
+ }
+
+ return NOTIFY_DONE;
+}
+
+int lan966x_lag_netdev_changeupper(struct net_device *dev,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct lan966x_port *port;
+ struct net_device *lower;
+ struct list_head *iter;
+ int err;
+
+ netdev_for_each_lower_dev(dev, lower, iter) {
+ if (!lan966x_netdevice_check(lower))
+ continue;
+
+ port = netdev_priv(lower);
+ if (port->bond != dev)
+ continue;
+
+ err = lan966x_port_changeupper(lower, dev, info);
+ if (err)
+ return err;
+ }
+
+ return NOTIFY_DONE;
+}
+
+bool lan966x_lag_first_port(struct net_device *lag, struct net_device *dev)
+{
+ struct lan966x_port *port = netdev_priv(dev);
+ struct lan966x *lan966x = port->lan966x;
+ unsigned long bond_mask;
+
+ if (port->bond != lag)
+ return false;
+
+ bond_mask = lan966x_lag_get_mask(lan966x, lag);
+ if (bond_mask && port->chip_port == __ffs(bond_mask))
+ return true;
+
+ return false;
+}
+
+u32 lan966x_lag_get_mask(struct lan966x *lan966x, struct net_device *bond)
+{
+ struct lan966x_port *port;
+ u32 mask = 0;
+ int p;
+
+ if (!bond)
+ return mask;
+
+ for (p = 0; p < lan966x->num_phys_ports; p++) {
+ port = lan966x->ports[p];
+ if (!port)
+ continue;
+
+ if (port->bond == bond)
+ mask |= BIT(p);
+ }
+
+ return mask;
+}
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c
index 5893770bfd94..baa3a30c039f 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c
@@ -22,6 +22,7 @@ struct lan966x_mac_entry {
u16 vid;
u16 port_index;
int row;
+ bool lag;
};
struct lan966x_mac_raw_entry {
@@ -69,15 +70,14 @@ static void lan966x_mac_select(struct lan966x *lan966x,
lan_wr(mach, lan966x, ANA_MACHDATA);
}
-static int __lan966x_mac_learn(struct lan966x *lan966x, int pgid,
- bool cpu_copy,
- const unsigned char mac[ETH_ALEN],
- unsigned int vid,
- enum macaccess_entry_type type)
+static int __lan966x_mac_learn_locked(struct lan966x *lan966x, int pgid,
+ bool cpu_copy,
+ const unsigned char mac[ETH_ALEN],
+ unsigned int vid,
+ enum macaccess_entry_type type)
{
- int ret;
+ lockdep_assert_held(&lan966x->mac_lock);
- spin_lock(&lan966x->mac_lock);
lan966x_mac_select(lan966x, mac, vid);
/* Issue a write command */
@@ -89,7 +89,19 @@ static int __lan966x_mac_learn(struct lan966x *lan966x, int pgid,
ANA_MACACCESS_MAC_TABLE_CMD_SET(MACACCESS_CMD_LEARN),
lan966x, ANA_MACACCESS);
- ret = lan966x_mac_wait_for_completion(lan966x);
+ return lan966x_mac_wait_for_completion(lan966x);
+}
+
+static int __lan966x_mac_learn(struct lan966x *lan966x, int pgid,
+ bool cpu_copy,
+ const unsigned char mac[ETH_ALEN],
+ unsigned int vid,
+ enum macaccess_entry_type type)
+{
+ int ret;
+
+ spin_lock(&lan966x->mac_lock);
+ ret = __lan966x_mac_learn_locked(lan966x, pgid, cpu_copy, mac, vid, type);
spin_unlock(&lan966x->mac_lock);
return ret;
@@ -119,6 +131,16 @@ int lan966x_mac_learn(struct lan966x *lan966x, int port,
return __lan966x_mac_learn(lan966x, port, false, mac, vid, type);
}
+static int lan966x_mac_learn_locked(struct lan966x *lan966x, int port,
+ const unsigned char mac[ETH_ALEN],
+ unsigned int vid,
+ enum macaccess_entry_type type)
+{
+ WARN_ON(type != ENTRYTYPE_NORMAL && type != ENTRYTYPE_LOCKED);
+
+ return __lan966x_mac_learn_locked(lan966x, port, false, mac, vid, type);
+}
+
static int lan966x_mac_forget_locked(struct lan966x *lan966x,
const unsigned char mac[ETH_ALEN],
unsigned int vid,
@@ -178,8 +200,9 @@ void lan966x_mac_init(struct lan966x *lan966x)
INIT_LIST_HEAD(&lan966x->mac_entries);
}
-static struct lan966x_mac_entry *lan966x_mac_alloc_entry(const unsigned char *mac,
- u16 vid, u16 port_index)
+static struct lan966x_mac_entry *lan966x_mac_alloc_entry(struct lan966x_port *port,
+ const unsigned char *mac,
+ u16 vid)
{
struct lan966x_mac_entry *mac_entry;
@@ -189,8 +212,9 @@ static struct lan966x_mac_entry *lan966x_mac_alloc_entry(const unsigned char *ma
memcpy(mac_entry->mac, mac, ETH_ALEN);
mac_entry->vid = vid;
- mac_entry->port_index = port_index;
+ mac_entry->port_index = port->chip_port;
mac_entry->row = LAN966X_MAC_INVALID_ROW;
+ mac_entry->lag = port->bond ? true : false;
return mac_entry;
}
@@ -269,7 +293,7 @@ int lan966x_mac_add_entry(struct lan966x *lan966x, struct lan966x_port *port,
goto mac_learn;
}
- mac_entry = lan966x_mac_alloc_entry(addr, vid, port->chip_port);
+ mac_entry = lan966x_mac_alloc_entry(port, addr, vid);
if (!mac_entry) {
spin_unlock(&lan966x->mac_lock);
return -ENOMEM;
@@ -278,7 +302,8 @@ int lan966x_mac_add_entry(struct lan966x *lan966x, struct lan966x_port *port,
list_add_tail(&mac_entry->list, &lan966x->mac_entries);
spin_unlock(&lan966x->mac_lock);
- lan966x_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, addr, vid, port->dev);
+ lan966x_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, addr, vid,
+ port->bond ?: port->dev);
mac_learn:
lan966x_mac_learn(lan966x, port->chip_port, addr, vid, ENTRYTYPE_LOCKED);
@@ -309,6 +334,50 @@ int lan966x_mac_del_entry(struct lan966x *lan966x, const unsigned char *addr,
return 0;
}
+void lan966x_mac_lag_replace_port_entry(struct lan966x *lan966x,
+ struct lan966x_port *src,
+ struct lan966x_port *dst)
+{
+ struct lan966x_mac_entry *mac_entry;
+
+ spin_lock(&lan966x->mac_lock);
+ list_for_each_entry(mac_entry, &lan966x->mac_entries, list) {
+ if (mac_entry->port_index == src->chip_port &&
+ mac_entry->lag) {
+ lan966x_mac_forget_locked(lan966x, mac_entry->mac,
+ mac_entry->vid,
+ ENTRYTYPE_LOCKED);
+
+ lan966x_mac_learn_locked(lan966x, dst->chip_port,
+ mac_entry->mac, mac_entry->vid,
+ ENTRYTYPE_LOCKED);
+ mac_entry->port_index = dst->chip_port;
+ }
+ }
+ spin_unlock(&lan966x->mac_lock);
+}
+
+void lan966x_mac_lag_remove_port_entry(struct lan966x *lan966x,
+ struct lan966x_port *src)
+{
+ struct lan966x_mac_entry *mac_entry, *tmp;
+
+ spin_lock(&lan966x->mac_lock);
+ list_for_each_entry_safe(mac_entry, tmp, &lan966x->mac_entries,
+ list) {
+ if (mac_entry->port_index == src->chip_port &&
+ mac_entry->lag) {
+ lan966x_mac_forget_locked(lan966x, mac_entry->mac,
+ mac_entry->vid,
+ ENTRYTYPE_LOCKED);
+
+ list_del(&mac_entry->list);
+ kfree(mac_entry);
+ }
+ }
+ spin_unlock(&lan966x->mac_lock);
+}
+
void lan966x_mac_purge_entries(struct lan966x *lan966x)
{
struct lan966x_mac_entry *mac_entry, *tmp;
@@ -354,6 +423,7 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row,
struct lan966x_mac_entry *mac_entry, *tmp;
unsigned char mac[ETH_ALEN] __aligned(2);
struct list_head mac_deleted_entries;
+ struct lan966x_port *port;
u32 dest_idx;
u32 column;
u16 vid;
@@ -406,9 +476,10 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row,
/* Notify the bridge that the entry doesn't exist
* anymore in the HW
*/
+ port = lan966x->ports[mac_entry->port_index];
lan966x_mac_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE,
mac_entry->mac, mac_entry->vid,
- lan966x->ports[mac_entry->port_index]->dev);
+ port->bond ?: port->dev);
list_del(&mac_entry->list);
kfree(mac_entry);
}
@@ -440,7 +511,8 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row,
continue;
}
- mac_entry = lan966x_mac_alloc_entry(mac, vid, dest_idx);
+ port = lan966x->ports[dest_idx];
+ mac_entry = lan966x_mac_alloc_entry(port, mac, vid);
if (!mac_entry) {
spin_unlock(&lan966x->mac_lock);
return;
@@ -451,7 +523,7 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row,
spin_unlock(&lan966x->mac_lock);
lan966x_mac_notifiers(SWITCHDEV_FDB_ADD_TO_BRIDGE,
- mac, vid, lan966x->ports[dest_idx]->dev);
+ mac, vid, port->bond ?: port->dev);
}
}
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
index d928b75f3780..2ad078608c45 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
@@ -778,6 +778,8 @@ static int lan966x_probe_port(struct lan966x *lan966x, u32 p,
port->phylink_config.supported_interfaces);
__set_bit(PHY_INTERFACE_MODE_QSGMII,
port->phylink_config.supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_QUSGMII,
+ port->phylink_config.supported_interfaces);
__set_bit(PHY_INTERFACE_MODE_1000BASEX,
port->phylink_config.supported_interfaces);
__set_bit(PHY_INTERFACE_MODE_2500BASEX,
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
index 2787055c1847..6135d311c407 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
@@ -292,11 +292,17 @@ struct lan966x_port {
u8 ptp_cmd;
u16 ts_id;
struct sk_buff_head tx_skbs;
+
+ struct net_device *bond;
+ bool lag_tx_active;
+ enum netdev_lag_hash hash_type;
};
extern const struct phylink_mac_ops lan966x_phylink_mac_ops;
extern const struct phylink_pcs_ops lan966x_phylink_pcs_ops;
extern const struct ethtool_ops lan966x_ethtool_ops;
+extern struct notifier_block lan966x_switchdev_nb __read_mostly;
+extern struct notifier_block lan966x_switchdev_blocking_nb __read_mostly;
bool lan966x_netdevice_check(const struct net_device *dev);
@@ -345,6 +351,11 @@ int lan966x_mac_add_entry(struct lan966x *lan966x,
struct lan966x_port *port,
const unsigned char *addr,
u16 vid);
+void lan966x_mac_lag_replace_port_entry(struct lan966x *lan966x,
+ struct lan966x_port *src,
+ struct lan966x_port *dst);
+void lan966x_mac_lag_remove_port_entry(struct lan966x *lan966x,
+ struct lan966x_port *src);
void lan966x_mac_purge_entries(struct lan966x *lan966x);
irqreturn_t lan966x_mac_irq_handler(struct lan966x *lan966x);
@@ -369,6 +380,7 @@ void lan966x_fdb_write_entries(struct lan966x *lan966x, u16 vid);
void lan966x_fdb_erase_entries(struct lan966x *lan966x, u16 vid);
int lan966x_fdb_init(struct lan966x *lan966x);
void lan966x_fdb_deinit(struct lan966x *lan966x);
+void lan966x_fdb_flush_workqueue(struct lan966x *lan966x);
int lan966x_handle_fdb(struct net_device *dev,
struct net_device *orig_dev,
unsigned long event, const void *ctx,
@@ -406,6 +418,33 @@ int lan966x_fdma_init(struct lan966x *lan966x);
void lan966x_fdma_deinit(struct lan966x *lan966x);
irqreturn_t lan966x_fdma_irq_handler(int irq, void *args);
+int lan966x_lag_port_join(struct lan966x_port *port,
+ struct net_device *brport_dev,
+ struct net_device *bond,
+ struct netlink_ext_ack *extack);
+void lan966x_lag_port_leave(struct lan966x_port *port, struct net_device *bond);
+int lan966x_lag_port_prechangeupper(struct net_device *dev,
+ struct netdev_notifier_changeupper_info *info);
+int lan966x_lag_port_changelowerstate(struct net_device *dev,
+ struct netdev_notifier_changelowerstate_info *info);
+int lan966x_lag_netdev_prechangeupper(struct net_device *dev,
+ struct netdev_notifier_changeupper_info *info);
+int lan966x_lag_netdev_changeupper(struct net_device *dev,
+ struct netdev_notifier_changeupper_info *info);
+bool lan966x_lag_first_port(struct net_device *lag, struct net_device *dev);
+u32 lan966x_lag_get_mask(struct lan966x *lan966x, struct net_device *bond);
+
+int lan966x_port_changeupper(struct net_device *dev,
+ struct net_device *brport_dev,
+ struct netdev_notifier_changeupper_info *info);
+int lan966x_port_prechangeupper(struct net_device *dev,
+ struct net_device *brport_dev,
+ struct netdev_notifier_changeupper_info *info);
+void lan966x_port_stp_state_set(struct lan966x_port *port, u8 state);
+void lan966x_port_ageing_set(struct lan966x_port *port,
+ unsigned long ageing_clock_t);
+void lan966x_update_fwd_mask(struct lan966x *lan966x);
+
static inline void __iomem *lan_addr(void __iomem *base[],
int id, int tinst, int tcnt,
int gbase, int ginst,
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_phylink.c b/drivers/net/ethernet/microchip/lan966x/lan966x_phylink.c
index 38a7e95d69b4..87f3d3a57aed 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_phylink.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_phylink.c
@@ -28,11 +28,12 @@ static int lan966x_phylink_mac_prepare(struct phylink_config *config,
phy_interface_t iface)
{
struct lan966x_port *port = netdev_priv(to_net_dev(config->dev));
+ phy_interface_t serdes_mode = iface;
int err;
if (port->serdes) {
err = phy_set_mode_ext(port->serdes, PHY_MODE_ETHERNET,
- iface);
+ serdes_mode);
if (err) {
netdev_err(to_net_dev(config->dev),
"Could not set mode of SerDes\n");
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c
index f141644e4372..bbf42fc8c8d5 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c
@@ -168,7 +168,7 @@ static void lan966x_port_link_up(struct lan966x_port *port)
/* Also the GIGA_MODE_ENA(1) needs to be set regardless of the
* port speed for QSGMII ports.
*/
- if (config->portmode == PHY_INTERFACE_MODE_QSGMII)
+ if (phy_interface_num_ports(config->portmode) == 4)
mode = DEV_MAC_MODE_CFG_GIGA_MODE_ENA_SET(1);
lan_wr(config->duplex | mode,
@@ -331,10 +331,14 @@ int lan966x_port_pcs_set(struct lan966x_port *port,
struct lan966x *lan966x = port->lan966x;
bool inband_aneg = false;
bool outband;
+ bool full_preamble = false;
+
+ if (config->portmode == PHY_INTERFACE_MODE_QUSGMII)
+ full_preamble = true;
if (config->inband) {
if (config->portmode == PHY_INTERFACE_MODE_SGMII ||
- config->portmode == PHY_INTERFACE_MODE_QSGMII)
+ phy_interface_num_ports(config->portmode) == 4)
inband_aneg = true; /* Cisco-SGMII in-band-aneg */
else if (config->portmode == PHY_INTERFACE_MODE_1000BASEX &&
config->autoneg)
@@ -345,9 +349,15 @@ int lan966x_port_pcs_set(struct lan966x_port *port,
outband = true;
}
- /* Disable or enable inband */
- lan_rmw(DEV_PCS1G_MODE_CFG_SGMII_MODE_ENA_SET(outband),
- DEV_PCS1G_MODE_CFG_SGMII_MODE_ENA,
+ /* Disable or enable inband.
+ * For QUSGMII, we rely on the preamble to transmit data such as
+ * timestamps, therefore force full preamble transmission, and prevent
+ * premable shortening
+ */
+ lan_rmw(DEV_PCS1G_MODE_CFG_SGMII_MODE_ENA_SET(outband) |
+ DEV_PCS1G_MODE_CFG_SAVE_PREAMBLE_ENA_SET(full_preamble),
+ DEV_PCS1G_MODE_CFG_SGMII_MODE_ENA |
+ DEV_PCS1G_MODE_CFG_SAVE_PREAMBLE_ENA,
lan966x, DEV_PCS1G_MODE_CFG(port->chip_port));
/* Enable PCS */
@@ -396,7 +406,7 @@ void lan966x_port_init(struct lan966x_port *port)
if (lan966x->fdma)
lan966x_fdma_netdev_init(lan966x, port->dev);
- if (config->portmode != PHY_INTERFACE_MODE_QSGMII)
+ if (phy_interface_num_ports(config->portmode) != 4)
return;
lan_rmw(DEV_CLOCK_CFG_PCS_RX_RST_SET(0) |
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h
index 8265ad89f0bc..f2d83fc540d2 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h
@@ -363,6 +363,51 @@ enum lan966x_target {
#define ANA_PFC_CFG_FC_LINK_SPEED_GET(x)\
FIELD_GET(ANA_PFC_CFG_FC_LINK_SPEED, x)
+/* ANA:COMMON:AGGR_CFG */
+#define ANA_AGGR_CFG __REG(TARGET_ANA, 0, 1, 31232, 0, 1, 552, 0, 0, 1, 4)
+
+#define ANA_AGGR_CFG_AC_RND_ENA BIT(6)
+#define ANA_AGGR_CFG_AC_RND_ENA_SET(x)\
+ FIELD_PREP(ANA_AGGR_CFG_AC_RND_ENA, x)
+#define ANA_AGGR_CFG_AC_RND_ENA_GET(x)\
+ FIELD_GET(ANA_AGGR_CFG_AC_RND_ENA, x)
+
+#define ANA_AGGR_CFG_AC_DMAC_ENA BIT(5)
+#define ANA_AGGR_CFG_AC_DMAC_ENA_SET(x)\
+ FIELD_PREP(ANA_AGGR_CFG_AC_DMAC_ENA, x)
+#define ANA_AGGR_CFG_AC_DMAC_ENA_GET(x)\
+ FIELD_GET(ANA_AGGR_CFG_AC_DMAC_ENA, x)
+
+#define ANA_AGGR_CFG_AC_SMAC_ENA BIT(4)
+#define ANA_AGGR_CFG_AC_SMAC_ENA_SET(x)\
+ FIELD_PREP(ANA_AGGR_CFG_AC_SMAC_ENA, x)
+#define ANA_AGGR_CFG_AC_SMAC_ENA_GET(x)\
+ FIELD_GET(ANA_AGGR_CFG_AC_SMAC_ENA, x)
+
+#define ANA_AGGR_CFG_AC_IP6_FLOW_LBL_ENA BIT(3)
+#define ANA_AGGR_CFG_AC_IP6_FLOW_LBL_ENA_SET(x)\
+ FIELD_PREP(ANA_AGGR_CFG_AC_IP6_FLOW_LBL_ENA, x)
+#define ANA_AGGR_CFG_AC_IP6_FLOW_LBL_ENA_GET(x)\
+ FIELD_GET(ANA_AGGR_CFG_AC_IP6_FLOW_LBL_ENA, x)
+
+#define ANA_AGGR_CFG_AC_IP6_TCPUDP_ENA BIT(2)
+#define ANA_AGGR_CFG_AC_IP6_TCPUDP_ENA_SET(x)\
+ FIELD_PREP(ANA_AGGR_CFG_AC_IP6_TCPUDP_ENA, x)
+#define ANA_AGGR_CFG_AC_IP6_TCPUDP_ENA_GET(x)\
+ FIELD_GET(ANA_AGGR_CFG_AC_IP6_TCPUDP_ENA, x)
+
+#define ANA_AGGR_CFG_AC_IP4_SIPDIP_ENA BIT(1)
+#define ANA_AGGR_CFG_AC_IP4_SIPDIP_ENA_SET(x)\
+ FIELD_PREP(ANA_AGGR_CFG_AC_IP4_SIPDIP_ENA, x)
+#define ANA_AGGR_CFG_AC_IP4_SIPDIP_ENA_GET(x)\
+ FIELD_GET(ANA_AGGR_CFG_AC_IP4_SIPDIP_ENA, x)
+
+#define ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA BIT(0)
+#define ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA_SET(x)\
+ FIELD_PREP(ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA, x)
+#define ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA_GET(x)\
+ FIELD_GET(ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA, x)
+
/* CHIP_TOP:CUPHY_CFG:CUPHY_PORT_CFG */
#define CHIP_TOP_CUPHY_PORT_CFG(r) __REG(TARGET_CHIP_TOP, 0, 1, 16, 0, 1, 20, 8, r, 2, 4)
@@ -504,6 +549,12 @@ enum lan966x_target {
#define DEV_PCS1G_MODE_CFG_SGMII_MODE_ENA_GET(x)\
FIELD_GET(DEV_PCS1G_MODE_CFG_SGMII_MODE_ENA, x)
+#define DEV_PCS1G_MODE_CFG_SAVE_PREAMBLE_ENA BIT(1)
+#define DEV_PCS1G_MODE_CFG_SAVE_PREAMBLE_ENA_SET(x)\
+ FIELD_PREP(DEV_PCS1G_MODE_CFG_SAVE_PREAMBLE_ENA, x)
+#define DEV_PCS1G_MODE_CFG_SAVE_PREAMBLE_ENA_GET(x)\
+ FIELD_GET(DEV_PCS1G_MODE_CFG_SAVE_PREAMBLE_ENA, x)
+
/* DEV:PCS1G_CFG_STATUS:PCS1G_SD_CFG */
#define DEV_PCS1G_SD_CFG(t) __REG(TARGET_DEV, t, 8, 72, 0, 1, 68, 8, 0, 1, 4)
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
index df2bee678559..1c88120eb291 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
@@ -6,8 +6,6 @@
#include "lan966x_main.h"
static struct notifier_block lan966x_netdevice_nb __read_mostly;
-static struct notifier_block lan966x_switchdev_nb __read_mostly;
-static struct notifier_block lan966x_switchdev_blocking_nb __read_mostly;
static void lan966x_port_set_mcast_ip_flood(struct lan966x_port *port,
u32 pgid_ip)
@@ -132,7 +130,7 @@ static int lan966x_port_pre_bridge_flags(struct lan966x_port *port,
return 0;
}
-static void lan966x_update_fwd_mask(struct lan966x *lan966x)
+void lan966x_update_fwd_mask(struct lan966x *lan966x)
{
int i;
@@ -140,9 +138,14 @@ static void lan966x_update_fwd_mask(struct lan966x *lan966x)
struct lan966x_port *port = lan966x->ports[i];
unsigned long mask = 0;
- if (port && lan966x->bridge_fwd_mask & BIT(i))
+ if (port && lan966x->bridge_fwd_mask & BIT(i)) {
mask = lan966x->bridge_fwd_mask & ~BIT(i);
+ if (port->bond)
+ mask &= ~lan966x_lag_get_mask(lan966x,
+ port->bond);
+ }
+
mask |= BIT(CPU_PORT);
lan_wr(ANA_PGID_PGID_SET(mask),
@@ -150,7 +153,7 @@ static void lan966x_update_fwd_mask(struct lan966x *lan966x)
}
}
-static void lan966x_port_stp_state_set(struct lan966x_port *port, u8 state)
+void lan966x_port_stp_state_set(struct lan966x_port *port, u8 state)
{
struct lan966x *lan966x = port->lan966x;
bool learn_ena = false;
@@ -171,8 +174,8 @@ static void lan966x_port_stp_state_set(struct lan966x_port *port, u8 state)
lan966x_update_fwd_mask(lan966x);
}
-static void lan966x_port_ageing_set(struct lan966x_port *port,
- unsigned long ageing_clock_t)
+void lan966x_port_ageing_set(struct lan966x_port *port,
+ unsigned long ageing_clock_t)
{
unsigned long ageing_jiffies = clock_t_to_jiffies(ageing_clock_t);
u32 ageing_time = jiffies_to_msecs(ageing_jiffies) / 1000;
@@ -241,6 +244,7 @@ static int lan966x_port_attr_set(struct net_device *dev, const void *ctx,
}
static int lan966x_port_bridge_join(struct lan966x_port *port,
+ struct net_device *brport_dev,
struct net_device *bridge,
struct netlink_ext_ack *extack)
{
@@ -258,7 +262,7 @@ static int lan966x_port_bridge_join(struct lan966x_port *port,
}
}
- err = switchdev_bridge_port_offload(dev, dev, port,
+ err = switchdev_bridge_port_offload(brport_dev, dev, port,
&lan966x_switchdev_nb,
&lan966x_switchdev_blocking_nb,
false, extack);
@@ -295,8 +299,9 @@ static void lan966x_port_bridge_leave(struct lan966x_port *port,
lan966x_vlan_port_apply(port);
}
-static int lan966x_port_changeupper(struct net_device *dev,
- struct netdev_notifier_changeupper_info *info)
+int lan966x_port_changeupper(struct net_device *dev,
+ struct net_device *brport_dev,
+ struct netdev_notifier_changeupper_info *info)
{
struct lan966x_port *port = netdev_priv(dev);
struct netlink_ext_ack *extack;
@@ -306,44 +311,68 @@ static int lan966x_port_changeupper(struct net_device *dev,
if (netif_is_bridge_master(info->upper_dev)) {
if (info->linking)
- err = lan966x_port_bridge_join(port, info->upper_dev,
+ err = lan966x_port_bridge_join(port, brport_dev,
+ info->upper_dev,
extack);
else
lan966x_port_bridge_leave(port, info->upper_dev);
}
+ if (netif_is_lag_master(info->upper_dev)) {
+ if (info->linking)
+ err = lan966x_lag_port_join(port, info->upper_dev,
+ info->upper_dev,
+ extack);
+ else
+ lan966x_lag_port_leave(port, info->upper_dev);
+ }
+
return err;
}
-static int lan966x_port_prechangeupper(struct net_device *dev,
- struct netdev_notifier_changeupper_info *info)
+int lan966x_port_prechangeupper(struct net_device *dev,
+ struct net_device *brport_dev,
+ struct netdev_notifier_changeupper_info *info)
{
struct lan966x_port *port = netdev_priv(dev);
+ int err = NOTIFY_DONE;
- if (netif_is_bridge_master(info->upper_dev) && !info->linking)
- switchdev_bridge_port_unoffload(port->dev, port,
- NULL, NULL);
+ if (netif_is_bridge_master(info->upper_dev) && !info->linking) {
+ switchdev_bridge_port_unoffload(port->dev, port, NULL, NULL);
+ lan966x_fdb_flush_workqueue(port->lan966x);
+ }
- return NOTIFY_DONE;
+ if (netif_is_lag_master(info->upper_dev)) {
+ err = lan966x_lag_port_prechangeupper(dev, info);
+ if (err || info->linking)
+ return err;
+
+ switchdev_bridge_port_unoffload(brport_dev, port, NULL, NULL);
+ lan966x_fdb_flush_workqueue(port->lan966x);
+ }
+
+ return err;
}
-static int lan966x_foreign_bridging_check(struct net_device *bridge,
+static int lan966x_foreign_bridging_check(struct net_device *upper,
+ bool *has_foreign,
+ bool *seen_lan966x,
struct netlink_ext_ack *extack)
{
struct lan966x *lan966x = NULL;
- bool has_foreign = false;
struct net_device *dev;
struct list_head *iter;
- if (!netif_is_bridge_master(bridge))
+ if (!netif_is_bridge_master(upper) &&
+ !netif_is_lag_master(upper))
return 0;
- netdev_for_each_lower_dev(bridge, dev, iter) {
+ netdev_for_each_lower_dev(upper, dev, iter) {
if (lan966x_netdevice_check(dev)) {
struct lan966x_port *port = netdev_priv(dev);
if (lan966x) {
- /* Bridge already has at least one port of a
+ /* Upper already has at least one port of a
* lan966x switch inside it, check that it's
* the same instance of the driver.
*/
@@ -354,15 +383,24 @@ static int lan966x_foreign_bridging_check(struct net_device *bridge,
}
} else {
/* This is the first lan966x port inside this
- * bridge
+ * upper device
*/
lan966x = port->lan966x;
+ *seen_lan966x = true;
}
+ } else if (netif_is_lag_master(dev)) {
+ /* Allow to have bond interfaces that have only lan966x
+ * devices
+ */
+ if (lan966x_foreign_bridging_check(dev, has_foreign,
+ seen_lan966x,
+ extack))
+ return -EINVAL;
} else {
- has_foreign = true;
+ *has_foreign = true;
}
- if (lan966x && has_foreign) {
+ if (*seen_lan966x && *has_foreign) {
NL_SET_ERR_MSG_MOD(extack,
"Bridging lan966x ports with foreign interfaces disallowed");
return -EINVAL;
@@ -375,7 +413,12 @@ static int lan966x_foreign_bridging_check(struct net_device *bridge,
static int lan966x_bridge_check(struct net_device *dev,
struct netdev_notifier_changeupper_info *info)
{
+ bool has_foreign = false;
+ bool seen_lan966x = false;
+
return lan966x_foreign_bridging_check(info->upper_dev,
+ &has_foreign,
+ &seen_lan966x,
info->info.extack);
}
@@ -386,21 +429,44 @@ static int lan966x_netdevice_port_event(struct net_device *dev,
int err = 0;
if (!lan966x_netdevice_check(dev)) {
- if (event == NETDEV_CHANGEUPPER)
- return lan966x_bridge_check(dev, ptr);
+ switch (event) {
+ case NETDEV_CHANGEUPPER:
+ case NETDEV_PRECHANGEUPPER:
+ err = lan966x_bridge_check(dev, ptr);
+ if (err)
+ return err;
+
+ if (netif_is_lag_master(dev)) {
+ if (event == NETDEV_CHANGEUPPER)
+ err = lan966x_lag_netdev_changeupper(dev,
+ ptr);
+ else
+ err = lan966x_lag_netdev_prechangeupper(dev,
+ ptr);
+
+ return err;
+ }
+ break;
+ default:
+ return 0;
+ }
+
return 0;
}
switch (event) {
case NETDEV_PRECHANGEUPPER:
- err = lan966x_port_prechangeupper(dev, ptr);
+ err = lan966x_port_prechangeupper(dev, dev, ptr);
break;
case NETDEV_CHANGEUPPER:
err = lan966x_bridge_check(dev, ptr);
if (err)
return err;
- err = lan966x_port_changeupper(dev, ptr);
+ err = lan966x_port_changeupper(dev, dev, ptr);
+ break;
+ case NETDEV_CHANGELOWERSTATE:
+ err = lan966x_lag_port_changelowerstate(dev, ptr);
break;
}
@@ -418,19 +484,23 @@ static int lan966x_netdevice_event(struct notifier_block *nb,
return notifier_from_errno(ret);
}
-/* We don't offload uppers such as LAG as bridge ports, so every device except
- * the bridge itself is foreign.
- */
static bool lan966x_foreign_dev_check(const struct net_device *dev,
const struct net_device *foreign_dev)
{
struct lan966x_port *port = netdev_priv(dev);
struct lan966x *lan966x = port->lan966x;
+ int i;
if (netif_is_bridge_master(foreign_dev))
if (lan966x->bridge == foreign_dev)
return false;
+ if (netif_is_lag_master(foreign_dev))
+ for (i = 0; i < lan966x->num_phys_ports; ++i)
+ if (lan966x->ports[i] &&
+ lan966x->ports[i]->bond == foreign_dev)
+ return false;
+
return true;
}
@@ -571,11 +641,11 @@ static struct notifier_block lan966x_netdevice_nb __read_mostly = {
.notifier_call = lan966x_netdevice_event,
};
-static struct notifier_block lan966x_switchdev_nb __read_mostly = {
+struct notifier_block lan966x_switchdev_nb __read_mostly = {
.notifier_call = lan966x_switchdev_event,
};
-static struct notifier_block lan966x_switchdev_blocking_nb __read_mostly = {
+struct notifier_block lan966x_switchdev_blocking_nb __read_mostly = {
.notifier_call = lan966x_switchdev_blocking_event,
};
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 306026e6aa11..dddaffdaad9a 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -2064,6 +2064,16 @@ static int ocelot_bond_get_id(struct ocelot *ocelot, struct net_device *bond)
return __ffs(bond_mask);
}
+/* Returns the mask of user ports assigned to this DSA tag_8021q CPU port.
+ * Note that when CPU ports are in a LAG, the user ports are assigned to the
+ * 'primary' CPU port, the one whose physical port number gives the logical
+ * port number of the LAG.
+ *
+ * We leave PGID_SRC poorly configured for the 'secondary' CPU port in the LAG
+ * (to which no user port is assigned), but it appears that forwarding from
+ * this secondary CPU port looks at the PGID_SRC associated with the logical
+ * port ID that it's assigned to, which *is* configured properly.
+ */
static u32 ocelot_dsa_8021q_cpu_assigned_ports(struct ocelot *ocelot,
struct ocelot_port *cpu)
{
@@ -2080,9 +2090,15 @@ static u32 ocelot_dsa_8021q_cpu_assigned_ports(struct ocelot *ocelot,
mask |= BIT(port);
}
+ if (cpu->bond)
+ mask &= ~ocelot_get_bond_mask(ocelot, cpu->bond);
+
return mask;
}
+/* Returns the DSA tag_8021q CPU port that the given port is assigned to,
+ * or the bit mask of CPU ports if said CPU port is in a LAG.
+ */
u32 ocelot_port_assigned_dsa_8021q_cpu_mask(struct ocelot *ocelot, int port)
{
struct ocelot_port *ocelot_port = ocelot->ports[port];
@@ -2091,6 +2107,9 @@ u32 ocelot_port_assigned_dsa_8021q_cpu_mask(struct ocelot *ocelot, int port)
if (!cpu_port)
return 0;
+ if (cpu_port->bond)
+ return ocelot_get_bond_mask(ocelot, cpu_port->bond);
+
return BIT(cpu_port->index);
}
EXPORT_SYMBOL_GPL(ocelot_port_assigned_dsa_8021q_cpu_mask);
@@ -2214,61 +2233,61 @@ static void ocelot_update_pgid_cpu(struct ocelot *ocelot)
ocelot_write_rix(ocelot, pgid_cpu, ANA_PGID_PGID, PGID_CPU);
}
-void ocelot_port_assign_dsa_8021q_cpu(struct ocelot *ocelot, int port,
- int cpu)
+void ocelot_port_setup_dsa_8021q_cpu(struct ocelot *ocelot, int cpu)
{
struct ocelot_port *cpu_port = ocelot->ports[cpu];
u16 vid;
mutex_lock(&ocelot->fwd_domain_lock);
- ocelot->ports[port]->dsa_8021q_cpu = cpu_port;
-
- if (!cpu_port->is_dsa_8021q_cpu) {
- cpu_port->is_dsa_8021q_cpu = true;
-
- for (vid = OCELOT_RSV_VLAN_RANGE_START; vid < VLAN_N_VID; vid++)
- ocelot_vlan_member_add(ocelot, cpu, vid, true);
+ cpu_port->is_dsa_8021q_cpu = true;
- ocelot_update_pgid_cpu(ocelot);
- }
+ for (vid = OCELOT_RSV_VLAN_RANGE_START; vid < VLAN_N_VID; vid++)
+ ocelot_vlan_member_add(ocelot, cpu, vid, true);
- ocelot_apply_bridge_fwd_mask(ocelot, true);
+ ocelot_update_pgid_cpu(ocelot);
mutex_unlock(&ocelot->fwd_domain_lock);
}
-EXPORT_SYMBOL_GPL(ocelot_port_assign_dsa_8021q_cpu);
+EXPORT_SYMBOL_GPL(ocelot_port_setup_dsa_8021q_cpu);
-void ocelot_port_unassign_dsa_8021q_cpu(struct ocelot *ocelot, int port)
+void ocelot_port_teardown_dsa_8021q_cpu(struct ocelot *ocelot, int cpu)
{
- struct ocelot_port *cpu_port = ocelot->ports[port]->dsa_8021q_cpu;
- bool keep = false;
+ struct ocelot_port *cpu_port = ocelot->ports[cpu];
u16 vid;
- int p;
mutex_lock(&ocelot->fwd_domain_lock);
- ocelot->ports[port]->dsa_8021q_cpu = NULL;
+ cpu_port->is_dsa_8021q_cpu = false;
- for (p = 0; p < ocelot->num_phys_ports; p++) {
- if (!ocelot->ports[p])
- continue;
+ for (vid = OCELOT_RSV_VLAN_RANGE_START; vid < VLAN_N_VID; vid++)
+ ocelot_vlan_member_del(ocelot, cpu_port->index, vid);
- if (ocelot->ports[p]->dsa_8021q_cpu == cpu_port) {
- keep = true;
- break;
- }
- }
+ ocelot_update_pgid_cpu(ocelot);
- if (!keep) {
- cpu_port->is_dsa_8021q_cpu = false;
+ mutex_unlock(&ocelot->fwd_domain_lock);
+}
+EXPORT_SYMBOL_GPL(ocelot_port_teardown_dsa_8021q_cpu);
+
+void ocelot_port_assign_dsa_8021q_cpu(struct ocelot *ocelot, int port,
+ int cpu)
+{
+ struct ocelot_port *cpu_port = ocelot->ports[cpu];
- for (vid = OCELOT_RSV_VLAN_RANGE_START; vid < VLAN_N_VID; vid++)
- ocelot_vlan_member_del(ocelot, cpu_port->index, vid);
+ mutex_lock(&ocelot->fwd_domain_lock);
- ocelot_update_pgid_cpu(ocelot);
- }
+ ocelot->ports[port]->dsa_8021q_cpu = cpu_port;
+ ocelot_apply_bridge_fwd_mask(ocelot, true);
+
+ mutex_unlock(&ocelot->fwd_domain_lock);
+}
+EXPORT_SYMBOL_GPL(ocelot_port_assign_dsa_8021q_cpu);
+void ocelot_port_unassign_dsa_8021q_cpu(struct ocelot *ocelot, int port)
+{
+ mutex_lock(&ocelot->fwd_domain_lock);
+
+ ocelot->ports[port]->dsa_8021q_cpu = NULL;
ocelot_apply_bridge_fwd_mask(ocelot, true);
mutex_unlock(&ocelot->fwd_domain_lock);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 83c97154c0c7..3ab3e4536b99 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -1301,9 +1301,14 @@ static bool offload_pre_check(struct flow_cls_offload *flow)
{
struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
struct flow_dissector *dissector = rule->match.dissector;
+ struct flow_match_ct ct;
- if (dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CT))
- return false;
+ if (dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CT)) {
+ flow_rule_match_ct(rule, &ct);
+ /* Allow special case where CT match is all 0 */
+ if (memchr_inv(ct.key, 0, sizeof(*ct.key)))
+ return false;
+ }
if (flow->common.chain_index)
return false;
diff --git a/drivers/net/ethernet/realtek/r8169.h b/drivers/net/ethernet/realtek/r8169.h
index 8da4b66b71b5..36d3826762ba 100644
--- a/drivers/net/ethernet/realtek/r8169.h
+++ b/drivers/net/ethernet/realtek/r8169.h
@@ -51,20 +51,20 @@ enum mac_version {
RTL_GIGA_MAC_VER_38,
RTL_GIGA_MAC_VER_39,
RTL_GIGA_MAC_VER_40,
- RTL_GIGA_MAC_VER_41,
+ /* support for RTL_GIGA_MAC_VER_41 has been removed */
RTL_GIGA_MAC_VER_42,
RTL_GIGA_MAC_VER_43,
RTL_GIGA_MAC_VER_44,
- RTL_GIGA_MAC_VER_45,
+ /* support for RTL_GIGA_MAC_VER_45 has been removed */
RTL_GIGA_MAC_VER_46,
- RTL_GIGA_MAC_VER_47,
+ /* support for RTL_GIGA_MAC_VER_47 has been removed */
RTL_GIGA_MAC_VER_48,
- RTL_GIGA_MAC_VER_49,
- RTL_GIGA_MAC_VER_50,
+ /* support for RTL_GIGA_MAC_VER_49 has been removed */
+ /* support for RTL_GIGA_MAC_VER_50 has been removed */
RTL_GIGA_MAC_VER_51,
RTL_GIGA_MAC_VER_52,
RTL_GIGA_MAC_VER_53,
- RTL_GIGA_MAC_VER_60,
+ /* support for RTL_GIGA_MAC_VER_60 has been removed */
RTL_GIGA_MAC_VER_61,
RTL_GIGA_MAC_VER_63,
RTL_GIGA_MAC_NONE
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 1b7fdb4f056b..4e0fae8dbf7c 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -49,10 +49,8 @@
#define FIRMWARE_8106E_2 "rtl_nic/rtl8106e-2.fw"
#define FIRMWARE_8168G_2 "rtl_nic/rtl8168g-2.fw"
#define FIRMWARE_8168G_3 "rtl_nic/rtl8168g-3.fw"
-#define FIRMWARE_8168H_1 "rtl_nic/rtl8168h-1.fw"
#define FIRMWARE_8168H_2 "rtl_nic/rtl8168h-2.fw"
#define FIRMWARE_8168FP_3 "rtl_nic/rtl8168fp-3.fw"
-#define FIRMWARE_8107E_1 "rtl_nic/rtl8107e-1.fw"
#define FIRMWARE_8107E_2 "rtl_nic/rtl8107e-2.fw"
#define FIRMWARE_8125A_3 "rtl_nic/rtl8125a-3.fw"
#define FIRMWARE_8125B_2 "rtl_nic/rtl8125b-2.fw"
@@ -131,20 +129,14 @@ static const struct {
[RTL_GIGA_MAC_VER_38] = {"RTL8411", FIRMWARE_8411_1 },
[RTL_GIGA_MAC_VER_39] = {"RTL8106e", FIRMWARE_8106E_1},
[RTL_GIGA_MAC_VER_40] = {"RTL8168g/8111g", FIRMWARE_8168G_2},
- [RTL_GIGA_MAC_VER_41] = {"RTL8168g/8111g" },
[RTL_GIGA_MAC_VER_42] = {"RTL8168gu/8111gu", FIRMWARE_8168G_3},
[RTL_GIGA_MAC_VER_43] = {"RTL8106eus", FIRMWARE_8106E_2},
[RTL_GIGA_MAC_VER_44] = {"RTL8411b", FIRMWARE_8411_2 },
- [RTL_GIGA_MAC_VER_45] = {"RTL8168h/8111h", FIRMWARE_8168H_1},
[RTL_GIGA_MAC_VER_46] = {"RTL8168h/8111h", FIRMWARE_8168H_2},
- [RTL_GIGA_MAC_VER_47] = {"RTL8107e", FIRMWARE_8107E_1},
[RTL_GIGA_MAC_VER_48] = {"RTL8107e", FIRMWARE_8107E_2},
- [RTL_GIGA_MAC_VER_49] = {"RTL8168ep/8111ep" },
- [RTL_GIGA_MAC_VER_50] = {"RTL8168ep/8111ep" },
[RTL_GIGA_MAC_VER_51] = {"RTL8168ep/8111ep" },
[RTL_GIGA_MAC_VER_52] = {"RTL8168fp/RTL8117", FIRMWARE_8168FP_3},
[RTL_GIGA_MAC_VER_53] = {"RTL8168fp/RTL8117", },
- [RTL_GIGA_MAC_VER_60] = {"RTL8125A" },
[RTL_GIGA_MAC_VER_61] = {"RTL8125A", FIRMWARE_8125A_3},
/* reserve 62 for CFG_METHOD_4 in the vendor driver */
[RTL_GIGA_MAC_VER_63] = {"RTL8125B", FIRMWARE_8125B_2},
@@ -658,10 +650,8 @@ MODULE_FIRMWARE(FIRMWARE_8106E_1);
MODULE_FIRMWARE(FIRMWARE_8106E_2);
MODULE_FIRMWARE(FIRMWARE_8168G_2);
MODULE_FIRMWARE(FIRMWARE_8168G_3);
-MODULE_FIRMWARE(FIRMWARE_8168H_1);
MODULE_FIRMWARE(FIRMWARE_8168H_2);
MODULE_FIRMWARE(FIRMWARE_8168FP_3);
-MODULE_FIRMWARE(FIRMWARE_8107E_1);
MODULE_FIRMWARE(FIRMWARE_8107E_2);
MODULE_FIRMWARE(FIRMWARE_8125A_3);
MODULE_FIRMWARE(FIRMWARE_8125B_2);
@@ -689,7 +679,7 @@ static void rtl_pci_commit(struct rtl8169_private *tp)
static bool rtl_is_8125(struct rtl8169_private *tp)
{
- return tp->mac_version >= RTL_GIGA_MAC_VER_60;
+ return tp->mac_version >= RTL_GIGA_MAC_VER_61;
}
static bool rtl_is_8168evl_up(struct rtl8169_private *tp)
@@ -892,8 +882,6 @@ static void rtl8168g_phy_suspend_quirk(struct rtl8169_private *tp, int value)
{
switch (tp->mac_version) {
case RTL_GIGA_MAC_VER_40:
- case RTL_GIGA_MAC_VER_41:
- case RTL_GIGA_MAC_VER_49:
if (value & BMCR_RESET || !(value & BMCR_PDOWN))
rtl_eri_set_bits(tp, 0x1a8, 0xfc000000);
else
@@ -1207,7 +1195,7 @@ static enum rtl_dash_type rtl_check_dash(struct rtl8169_private *tp)
case RTL_GIGA_MAC_VER_28:
case RTL_GIGA_MAC_VER_31:
return r8168dp_check_dash(tp) ? RTL_DASH_DP : RTL_DASH_NONE;
- case RTL_GIGA_MAC_VER_49 ... RTL_GIGA_MAC_VER_53:
+ case RTL_GIGA_MAC_VER_51 ... RTL_GIGA_MAC_VER_53:
return r8168ep_check_dash(tp) ? RTL_DASH_EP : RTL_DASH_NONE;
default:
return RTL_DASH_NONE;
@@ -2088,8 +2076,6 @@ static enum mac_version rtl8169_get_mac_version(u16 xid, bool gmii)
if (ver != RTL_GIGA_MAC_NONE && !gmii) {
if (ver == RTL_GIGA_MAC_VER_42)
ver = RTL_GIGA_MAC_VER_43;
- else if (ver == RTL_GIGA_MAC_VER_45)
- ver = RTL_GIGA_MAC_VER_47;
else if (ver == RTL_GIGA_MAC_VER_46)
ver = RTL_GIGA_MAC_VER_48;
}
@@ -2271,7 +2257,7 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_53:
RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF);
break;
- case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_63:
+ case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_63:
RTL_W32(tp, RxConfig, RX_FETCH_DFLT_8125 | RX_DMA_BURST);
break;
default:
@@ -2455,7 +2441,7 @@ static void rtl_wait_txrx_fifo_empty(struct rtl8169_private *tp)
rtl_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 42);
rtl_loop_wait_high(tp, &rtl_rxtx_empty_cond, 100, 42);
break;
- case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_61:
+ case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_61:
rtl_loop_wait_high(tp, &rtl_rxtx_empty_cond, 100, 42);
break;
case RTL_GIGA_MAC_VER_63:
@@ -2700,8 +2686,8 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable)
RTL_W8(tp, Config2, RTL_R8(tp, Config2) | ClkReqEn);
switch (tp->mac_version) {
- case RTL_GIGA_MAC_VER_45 ... RTL_GIGA_MAC_VER_48:
- case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_63:
+ case RTL_GIGA_MAC_VER_46 ... RTL_GIGA_MAC_VER_48:
+ case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_63:
/* reset ephy tx/rx disable timer */
r8168_mac_ocp_modify(tp, 0xe094, 0xff00, 0);
/* chip can trigger L1.2 */
@@ -2712,8 +2698,8 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable)
}
} else {
switch (tp->mac_version) {
- case RTL_GIGA_MAC_VER_45 ... RTL_GIGA_MAC_VER_48:
- case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_63:
+ case RTL_GIGA_MAC_VER_46 ... RTL_GIGA_MAC_VER_48:
+ case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_63:
r8168_mac_ocp_modify(tp, 0xe092, 0x00ff, 0);
break;
default:
@@ -3288,45 +3274,6 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
rtl_pcie_state_l2l3_disable(tp);
}
-static void rtl_hw_start_8168ep_1(struct rtl8169_private *tp)
-{
- static const struct ephy_info e_info_8168ep_1[] = {
- { 0x00, 0xffff, 0x10ab },
- { 0x06, 0xffff, 0xf030 },
- { 0x08, 0xffff, 0x2006 },
- { 0x0d, 0xffff, 0x1666 },
- { 0x0c, 0x3ff0, 0x0000 }
- };
-
- /* disable aspm and clock request before access ephy */
- rtl_hw_aspm_clkreq_enable(tp, false);
- rtl_ephy_init(tp, e_info_8168ep_1);
-
- rtl_hw_start_8168ep(tp);
-
- rtl_hw_aspm_clkreq_enable(tp, true);
-}
-
-static void rtl_hw_start_8168ep_2(struct rtl8169_private *tp)
-{
- static const struct ephy_info e_info_8168ep_2[] = {
- { 0x00, 0xffff, 0x10a3 },
- { 0x19, 0xffff, 0xfc00 },
- { 0x1e, 0xffff, 0x20ea }
- };
-
- /* disable aspm and clock request before access ephy */
- rtl_hw_aspm_clkreq_enable(tp, false);
- rtl_ephy_init(tp, e_info_8168ep_2);
-
- rtl_hw_start_8168ep(tp);
-
- RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
- RTL_W8(tp, MISC_1, RTL_R8(tp, MISC_1) & ~PFM_D3COLD_EN);
-
- rtl_hw_aspm_clkreq_enable(tp, true);
-}
-
static void rtl_hw_start_8168ep_3(struct rtl8169_private *tp)
{
static const struct ephy_info e_info_8168ep_3[] = {
@@ -3625,46 +3572,6 @@ static void rtl_hw_start_8125_common(struct rtl8169_private *tp)
udelay(10);
}
-static void rtl_hw_start_8125a_1(struct rtl8169_private *tp)
-{
- static const struct ephy_info e_info_8125a_1[] = {
- { 0x01, 0xffff, 0xa812 },
- { 0x09, 0xffff, 0x520c },
- { 0x04, 0xffff, 0xd000 },
- { 0x0d, 0xffff, 0xf702 },
- { 0x0a, 0xffff, 0x8653 },
- { 0x06, 0xffff, 0x001e },
- { 0x08, 0xffff, 0x3595 },
- { 0x20, 0xffff, 0x9455 },
- { 0x21, 0xffff, 0x99ff },
- { 0x02, 0xffff, 0x6046 },
- { 0x29, 0xffff, 0xfe00 },
- { 0x23, 0xffff, 0xab62 },
-
- { 0x41, 0xffff, 0xa80c },
- { 0x49, 0xffff, 0x520c },
- { 0x44, 0xffff, 0xd000 },
- { 0x4d, 0xffff, 0xf702 },
- { 0x4a, 0xffff, 0x8653 },
- { 0x46, 0xffff, 0x001e },
- { 0x48, 0xffff, 0x3595 },
- { 0x60, 0xffff, 0x9455 },
- { 0x61, 0xffff, 0x99ff },
- { 0x42, 0xffff, 0x6046 },
- { 0x69, 0xffff, 0xfe00 },
- { 0x63, 0xffff, 0xab62 },
- };
-
- rtl_set_def_aspm_entry_latency(tp);
-
- /* disable aspm and clock request before access ephy */
- rtl_hw_aspm_clkreq_enable(tp, false);
- rtl_ephy_init(tp, e_info_8125a_1);
-
- rtl_hw_start_8125_common(tp);
- rtl_hw_aspm_clkreq_enable(tp, true);
-}
-
static void rtl_hw_start_8125a_2(struct rtl8169_private *tp)
{
static const struct ephy_info e_info_8125a_2[] = {
@@ -3748,20 +3655,14 @@ static void rtl_hw_config(struct rtl8169_private *tp)
[RTL_GIGA_MAC_VER_38] = rtl_hw_start_8411,
[RTL_GIGA_MAC_VER_39] = rtl_hw_start_8106,
[RTL_GIGA_MAC_VER_40] = rtl_hw_start_8168g_1,
- [RTL_GIGA_MAC_VER_41] = rtl_hw_start_8168g_1,
[RTL_GIGA_MAC_VER_42] = rtl_hw_start_8168g_2,
[RTL_GIGA_MAC_VER_43] = rtl_hw_start_8168g_2,
[RTL_GIGA_MAC_VER_44] = rtl_hw_start_8411_2,
- [RTL_GIGA_MAC_VER_45] = rtl_hw_start_8168h_1,
[RTL_GIGA_MAC_VER_46] = rtl_hw_start_8168h_1,
- [RTL_GIGA_MAC_VER_47] = rtl_hw_start_8168h_1,
[RTL_GIGA_MAC_VER_48] = rtl_hw_start_8168h_1,
- [RTL_GIGA_MAC_VER_49] = rtl_hw_start_8168ep_1,
- [RTL_GIGA_MAC_VER_50] = rtl_hw_start_8168ep_2,
[RTL_GIGA_MAC_VER_51] = rtl_hw_start_8168ep_3,
[RTL_GIGA_MAC_VER_52] = rtl_hw_start_8117,
[RTL_GIGA_MAC_VER_53] = rtl_hw_start_8117,
- [RTL_GIGA_MAC_VER_60] = rtl_hw_start_8125a_1,
[RTL_GIGA_MAC_VER_61] = rtl_hw_start_8125a_2,
[RTL_GIGA_MAC_VER_63] = rtl_hw_start_8125b,
};
@@ -4156,7 +4057,6 @@ static unsigned int rtl_quirk_packet_padto(struct rtl8169_private *tp,
switch (tp->mac_version) {
case RTL_GIGA_MAC_VER_34:
- case RTL_GIGA_MAC_VER_60:
case RTL_GIGA_MAC_VER_61:
case RTL_GIGA_MAC_VER_63:
padto = max_t(unsigned int, padto, ETH_ZLEN);
@@ -5194,13 +5094,13 @@ static void rtl_hw_init_8125(struct rtl8169_private *tp)
static void rtl_hw_initialize(struct rtl8169_private *tp)
{
switch (tp->mac_version) {
- case RTL_GIGA_MAC_VER_49 ... RTL_GIGA_MAC_VER_53:
+ case RTL_GIGA_MAC_VER_51 ... RTL_GIGA_MAC_VER_53:
rtl8168ep_stop_cmac(tp);
fallthrough;
case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_48:
rtl_hw_init_8168g(tp);
break;
- case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_63:
+ case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_63:
rtl_hw_init_8125(tp);
break;
default:
@@ -5291,7 +5191,7 @@ done:
/* register is set if system vendor successfully tested ASPM 1.2 */
static bool rtl_aspm_is_safe(struct rtl8169_private *tp)
{
- if (tp->mac_version >= RTL_GIGA_MAC_VER_60 &&
+ if (tp->mac_version >= RTL_GIGA_MAC_VER_61 &&
r8168_mac_ocp_read(tp, 0xc0b2) & 0xf)
return true;
@@ -5378,7 +5278,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
*/
if (rtl_aspm_is_safe(tp))
rc = 0;
- else if (tp->mac_version >= RTL_GIGA_MAC_VER_45)
+ else if (tp->mac_version >= RTL_GIGA_MAC_VER_46)
rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2);
else
rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1);
diff --git a/drivers/net/ethernet/realtek/r8169_phy_config.c b/drivers/net/ethernet/realtek/r8169_phy_config.c
index 15c295f90196..8c04cc56b9da 100644
--- a/drivers/net/ethernet/realtek/r8169_phy_config.c
+++ b/drivers/net/ethernet/realtek/r8169_phy_config.c
@@ -793,71 +793,6 @@ static void rtl8168g_2_hw_phy_config(struct rtl8169_private *tp,
rtl8168g_config_eee_phy(phydev);
}
-static void rtl8168h_1_hw_phy_config(struct rtl8169_private *tp,
- struct phy_device *phydev)
-{
- u16 dout_tapbin;
- u32 data;
-
- r8169_apply_firmware(tp);
-
- /* CHN EST parameters adjust - giga master */
- r8168g_phy_param(phydev, 0x809b, 0xf800, 0x8000);
- r8168g_phy_param(phydev, 0x80a2, 0xff00, 0x8000);
- r8168g_phy_param(phydev, 0x80a4, 0xff00, 0x8500);
- r8168g_phy_param(phydev, 0x809c, 0xff00, 0xbd00);
-
- /* CHN EST parameters adjust - giga slave */
- r8168g_phy_param(phydev, 0x80ad, 0xf800, 0x7000);
- r8168g_phy_param(phydev, 0x80b4, 0xff00, 0x5000);
- r8168g_phy_param(phydev, 0x80ac, 0xff00, 0x4000);
-
- /* CHN EST parameters adjust - fnet */
- r8168g_phy_param(phydev, 0x808e, 0xff00, 0x1200);
- r8168g_phy_param(phydev, 0x8090, 0xff00, 0xe500);
- r8168g_phy_param(phydev, 0x8092, 0xff00, 0x9f00);
-
- /* enable R-tune & PGA-retune function */
- dout_tapbin = 0;
- data = phy_read_paged(phydev, 0x0a46, 0x13);
- data &= 3;
- data <<= 2;
- dout_tapbin |= data;
- data = phy_read_paged(phydev, 0x0a46, 0x12);
- data &= 0xc000;
- data >>= 14;
- dout_tapbin |= data;
- dout_tapbin = ~(dout_tapbin ^ 0x08);
- dout_tapbin <<= 12;
- dout_tapbin &= 0xf000;
-
- r8168g_phy_param(phydev, 0x827a, 0xf000, dout_tapbin);
- r8168g_phy_param(phydev, 0x827b, 0xf000, dout_tapbin);
- r8168g_phy_param(phydev, 0x827c, 0xf000, dout_tapbin);
- r8168g_phy_param(phydev, 0x827d, 0xf000, dout_tapbin);
- r8168g_phy_param(phydev, 0x0811, 0x0000, 0x0800);
- phy_modify_paged(phydev, 0x0a42, 0x16, 0x0000, 0x0002);
-
- rtl8168g_enable_gphy_10m(phydev);
-
- /* SAR ADC performance */
- phy_modify_paged(phydev, 0x0bca, 0x17, BIT(12) | BIT(13), BIT(14));
-
- r8168g_phy_param(phydev, 0x803f, 0x3000, 0x0000);
- r8168g_phy_param(phydev, 0x8047, 0x3000, 0x0000);
- r8168g_phy_param(phydev, 0x804f, 0x3000, 0x0000);
- r8168g_phy_param(phydev, 0x8057, 0x3000, 0x0000);
- r8168g_phy_param(phydev, 0x805f, 0x3000, 0x0000);
- r8168g_phy_param(phydev, 0x8067, 0x3000, 0x0000);
- r8168g_phy_param(phydev, 0x806f, 0x3000, 0x0000);
-
- /* disable phy pfm mode */
- phy_modify_paged(phydev, 0x0a44, 0x11, BIT(7), 0);
-
- rtl8168g_disable_aldps(phydev);
- rtl8168h_config_eee_phy(phydev);
-}
-
static void rtl8168h_2_hw_phy_config(struct rtl8169_private *tp,
struct phy_device *phydev)
{
@@ -895,27 +830,6 @@ static void rtl8168h_2_hw_phy_config(struct rtl8169_private *tp,
rtl8168g_config_eee_phy(phydev);
}
-static void rtl8168ep_1_hw_phy_config(struct rtl8169_private *tp,
- struct phy_device *phydev)
-{
- /* Enable PHY auto speed down */
- phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(3) | BIT(2));
-
- rtl8168g_phy_adjust_10m_aldps(phydev);
-
- /* Enable EEE auto-fallback function */
- phy_modify_paged(phydev, 0x0a4b, 0x11, 0, BIT(2));
-
- /* Enable UC LPF tune function */
- r8168g_phy_param(phydev, 0x8012, 0x0000, 0x8000);
-
- /* set rg_sel_sdm_rate */
- phy_modify_paged(phydev, 0x0c42, 0x11, BIT(13), BIT(14));
-
- rtl8168g_disable_aldps(phydev);
- rtl8168g_config_eee_phy(phydev);
-}
-
static void rtl8168ep_2_hw_phy_config(struct rtl8169_private *tp,
struct phy_device *phydev)
{
@@ -1081,44 +995,6 @@ static void rtl8125_legacy_force_mode(struct phy_device *phydev)
phy_modify_paged(phydev, 0xa5b, 0x12, BIT(15), 0);
}
-static void rtl8125a_1_hw_phy_config(struct rtl8169_private *tp,
- struct phy_device *phydev)
-{
- phy_modify_paged(phydev, 0xad4, 0x10, 0x03ff, 0x0084);
- phy_modify_paged(phydev, 0xad4, 0x17, 0x0000, 0x0010);
- phy_modify_paged(phydev, 0xad1, 0x13, 0x03ff, 0x0006);
- phy_modify_paged(phydev, 0xad3, 0x11, 0x003f, 0x0006);
- phy_modify_paged(phydev, 0xac0, 0x14, 0x0000, 0x1100);
- phy_modify_paged(phydev, 0xac8, 0x15, 0xf000, 0x7000);
- phy_modify_paged(phydev, 0xad1, 0x14, 0x0000, 0x0400);
- phy_modify_paged(phydev, 0xad1, 0x15, 0x0000, 0x03ff);
- phy_modify_paged(phydev, 0xad1, 0x16, 0x0000, 0x03ff);
-
- r8168g_phy_param(phydev, 0x80ea, 0xff00, 0xc400);
- r8168g_phy_param(phydev, 0x80eb, 0x0700, 0x0300);
- r8168g_phy_param(phydev, 0x80f8, 0xff00, 0x1c00);
- r8168g_phy_param(phydev, 0x80f1, 0xff00, 0x3000);
- r8168g_phy_param(phydev, 0x80fe, 0xff00, 0xa500);
- r8168g_phy_param(phydev, 0x8102, 0xff00, 0x5000);
- r8168g_phy_param(phydev, 0x8105, 0xff00, 0x3300);
- r8168g_phy_param(phydev, 0x8100, 0xff00, 0x7000);
- r8168g_phy_param(phydev, 0x8104, 0xff00, 0xf000);
- r8168g_phy_param(phydev, 0x8106, 0xff00, 0x6500);
- r8168g_phy_param(phydev, 0x80dc, 0xff00, 0xed00);
- r8168g_phy_param(phydev, 0x80df, 0x0000, 0x0100);
- r8168g_phy_param(phydev, 0x80e1, 0x0100, 0x0000);
-
- phy_modify_paged(phydev, 0xbf0, 0x13, 0x003f, 0x0038);
- r8168g_phy_param(phydev, 0x819f, 0xffff, 0xd0b6);
-
- phy_write_paged(phydev, 0xbc3, 0x12, 0x5555);
- phy_modify_paged(phydev, 0xbf0, 0x15, 0x0e00, 0x0a00);
- phy_modify_paged(phydev, 0xa5c, 0x10, 0x0400, 0x0000);
- rtl8168g_enable_gphy_10m(phydev);
-
- rtl8125a_config_eee_phy(phydev);
-}
-
static void rtl8125a_2_hw_phy_config(struct rtl8169_private *tp,
struct phy_device *phydev)
{
@@ -1266,20 +1142,14 @@ void r8169_hw_phy_config(struct rtl8169_private *tp, struct phy_device *phydev,
[RTL_GIGA_MAC_VER_38] = rtl8411_hw_phy_config,
[RTL_GIGA_MAC_VER_39] = rtl8106e_hw_phy_config,
[RTL_GIGA_MAC_VER_40] = rtl8168g_1_hw_phy_config,
- [RTL_GIGA_MAC_VER_41] = NULL,
[RTL_GIGA_MAC_VER_42] = rtl8168g_2_hw_phy_config,
[RTL_GIGA_MAC_VER_43] = rtl8168g_2_hw_phy_config,
[RTL_GIGA_MAC_VER_44] = rtl8168g_2_hw_phy_config,
- [RTL_GIGA_MAC_VER_45] = rtl8168h_1_hw_phy_config,
[RTL_GIGA_MAC_VER_46] = rtl8168h_2_hw_phy_config,
- [RTL_GIGA_MAC_VER_47] = rtl8168h_1_hw_phy_config,
[RTL_GIGA_MAC_VER_48] = rtl8168h_2_hw_phy_config,
- [RTL_GIGA_MAC_VER_49] = rtl8168ep_1_hw_phy_config,
- [RTL_GIGA_MAC_VER_50] = rtl8168ep_2_hw_phy_config,
[RTL_GIGA_MAC_VER_51] = rtl8168ep_2_hw_phy_config,
[RTL_GIGA_MAC_VER_52] = rtl8117_hw_phy_config,
[RTL_GIGA_MAC_VER_53] = rtl8117_hw_phy_config,
- [RTL_GIGA_MAC_VER_60] = rtl8125a_1_hw_phy_config,
[RTL_GIGA_MAC_VER_61] = rtl8125a_2_hw_phy_config,
[RTL_GIGA_MAC_VER_63] = rtl8125b_hw_phy_config,
};
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 4f2b82a884b9..7d3c7ca7caf4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -610,7 +610,6 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
plat->int_snapshot_num = AUX_SNAPSHOT1;
plat->ext_snapshot_num = AUX_SNAPSHOT0;
- plat->has_crossts = true;
plat->crosststamp = intel_crosststamp;
plat->int_snapshot_en = 0;
diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index fb30bc5d56cb..fce06663e1e1 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -33,6 +33,7 @@ config TI_DAVINCI_MDIO
tristate "TI DaVinci MDIO Support"
depends on ARCH_DAVINCI || ARCH_OMAP2PLUS || ARCH_KEYSTONE || ARCH_K3 || COMPILE_TEST
select PHYLIB
+ select MDIO_BITBANG
help
This driver supports TI's DaVinci MDIO module.
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index f4a6b590a1e3..7ef5d8208a4e 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -74,6 +74,9 @@
#define AM65_CPSW_PORTN_REG_TS_VLAN_LTYPE_REG 0x318
#define AM65_CPSW_PORTN_REG_TS_CTL_LTYPE2 0x31C
+#define AM65_CPSW_SGMII_CONTROL_REG 0x010
+#define AM65_CPSW_SGMII_CONTROL_MR_AN_ENABLE BIT(0)
+
#define AM65_CPSW_CTL_VLAN_AWARE BIT(1)
#define AM65_CPSW_CTL_P0_ENABLE BIT(2)
#define AM65_CPSW_CTL_P0_TX_CRC_REMOVE BIT(13)
@@ -590,11 +593,6 @@ static int am65_cpsw_nuss_ndo_slave_open(struct net_device *ndev)
/* mac_sl should be configured via phy-link interface */
am65_cpsw_sl_ctl_reset(port);
- ret = phy_set_mode_ext(port->slave.ifphy, PHY_MODE_ETHERNET,
- port->slave.phy_if);
- if (ret)
- goto error_cleanup;
-
ret = phylink_of_phy_connect(port->slave.phylink, port->slave.phy_node, 0);
if (ret)
goto error_cleanup;
@@ -1409,7 +1407,14 @@ static const struct net_device_ops am65_cpsw_nuss_netdev_ops = {
static void am65_cpsw_nuss_mac_config(struct phylink_config *config, unsigned int mode,
const struct phylink_link_state *state)
{
- /* Currently not used */
+ struct am65_cpsw_slave_data *slave = container_of(config, struct am65_cpsw_slave_data,
+ phylink_config);
+ struct am65_cpsw_port *port = container_of(slave, struct am65_cpsw_port, slave);
+ struct am65_cpsw_common *common = port->common;
+
+ if (common->pdata.extra_modes & BIT(state->interface))
+ writel(AM65_CPSW_SGMII_CONTROL_MR_AN_ENABLE,
+ port->sgmii_base + AM65_CPSW_SGMII_CONTROL_REG);
}
static void am65_cpsw_nuss_mac_link_down(struct phylink_config *config, unsigned int mode,
@@ -1847,6 +1852,8 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common)
port->common = common;
port->port_base = common->cpsw_base + AM65_CPSW_NU_PORTS_BASE +
AM65_CPSW_NU_PORTS_OFFSET * (port_id);
+ if (common->pdata.extra_modes)
+ port->sgmii_base = common->ss_base + AM65_CPSW_SGMII_BASE * (port_id);
port->stat_base = common->cpsw_base + AM65_CPSW_NU_STATS_BASE +
(AM65_CPSW_NU_STATS_PORT_OFFSET * port_id);
port->name = of_get_property(port_np, "label", NULL);
@@ -1886,6 +1893,10 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common)
goto of_node_put;
}
+ ret = phy_set_mode_ext(port->slave.ifphy, PHY_MODE_ETHERNET, port->slave.phy_if);
+ if (ret)
+ goto of_node_put;
+
ret = of_get_mac_address(port_np, port->slave.mac_addr);
if (ret) {
am65_cpsw_am654_get_efuse_macid(port_np,
@@ -1981,7 +1992,18 @@ am65_cpsw_nuss_init_port_ndev(struct am65_cpsw_common *common, u32 port_idx)
port->slave.phylink_config.type = PHYLINK_NETDEV;
port->slave.phylink_config.mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100 | MAC_1000FD;
- phy_interface_set_rgmii(port->slave.phylink_config.supported_interfaces);
+ if (phy_interface_mode_is_rgmii(port->slave.phy_if)) {
+ phy_interface_set_rgmii(port->slave.phylink_config.supported_interfaces);
+ } else if (port->slave.phy_if == PHY_INTERFACE_MODE_RMII) {
+ __set_bit(PHY_INTERFACE_MODE_RMII,
+ port->slave.phylink_config.supported_interfaces);
+ } else if (common->pdata.extra_modes & BIT(port->slave.phy_if)) {
+ __set_bit(PHY_INTERFACE_MODE_QSGMII,
+ port->slave.phylink_config.supported_interfaces);
+ } else {
+ dev_err(dev, "selected phy-mode is not supported\n");
+ return -EOPNOTSUPP;
+ }
phylink = phylink_create(&port->slave.phylink_config,
of_node_to_fwnode(port->slave.phy_node),
@@ -2611,10 +2633,18 @@ static const struct am65_cpsw_pdata am64x_cpswxg_pdata = {
.fdqring_mode = K3_RINGACC_RING_MODE_RING,
};
+static const struct am65_cpsw_pdata j7200_cpswxg_pdata = {
+ .quirks = 0,
+ .ale_dev_id = "am64-cpswxg",
+ .fdqring_mode = K3_RINGACC_RING_MODE_RING,
+ .extra_modes = BIT(PHY_INTERFACE_MODE_QSGMII),
+};
+
static const struct of_device_id am65_cpsw_nuss_of_mtable[] = {
{ .compatible = "ti,am654-cpsw-nuss", .data = &am65x_sr1_0},
{ .compatible = "ti,j721e-cpsw-nuss", .data = &j721e_pdata},
{ .compatible = "ti,am642-cpsw-nuss", .data = &am64x_cpswxg_pdata},
+ { .compatible = "ti,j7200-cpswxg-nuss", .data = &j7200_cpswxg_pdata},
{ /* sentinel */ },
};
MODULE_DEVICE_TABLE(of, am65_cpsw_nuss_of_mtable);
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
index ac945631bf2f..2c9850fdfcb6 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
@@ -46,6 +46,7 @@ struct am65_cpsw_port {
const char *name;
u32 port_id;
void __iomem *port_base;
+ void __iomem *sgmii_base;
void __iomem *stat_base;
void __iomem *fetch_ram_base;
bool disabled;
@@ -88,6 +89,7 @@ struct am65_cpsw_rx_chn {
struct am65_cpsw_pdata {
u32 quirks;
+ u64 extra_modes;
enum k3_ring_mode fdqring_mode;
const char *ale_dev_id;
};
diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c
index ea3772618043..946b9753ccfb 100644
--- a/drivers/net/ethernet/ti/davinci_mdio.c
+++ b/drivers/net/ethernet/ti/davinci_mdio.c
@@ -26,6 +26,8 @@
#include <linux/of_device.h>
#include <linux/of_mdio.h>
#include <linux/pinctrl/consumer.h>
+#include <linux/mdio-bitbang.h>
+#include <linux/sys_soc.h>
/*
* This timeout definition is a worst-case ultra defensive measure against
@@ -41,6 +43,7 @@
struct davinci_mdio_of_param {
int autosuspend_delay_ms;
+ bool manual_mode;
};
struct davinci_mdio_regs {
@@ -49,6 +52,15 @@ struct davinci_mdio_regs {
#define CONTROL_IDLE BIT(31)
#define CONTROL_ENABLE BIT(30)
#define CONTROL_MAX_DIV (0xffff)
+#define CONTROL_CLKDIV GENMASK(15, 0)
+
+#define MDIO_MAN_MDCLK_O BIT(2)
+#define MDIO_MAN_OE BIT(1)
+#define MDIO_MAN_PIN BIT(0)
+#define MDIO_MANUALMODE BIT(31)
+
+#define MDIO_PIN 0
+
u32 alive;
u32 link;
@@ -59,7 +71,9 @@ struct davinci_mdio_regs {
u32 userintmasked;
u32 userintmaskset;
u32 userintmaskclr;
- u32 __reserved_1[20];
+ u32 manualif;
+ u32 poll;
+ u32 __reserved_1[18];
struct {
u32 access;
@@ -79,6 +93,7 @@ static const struct mdio_platform_data default_pdata = {
struct davinci_mdio_data {
struct mdio_platform_data pdata;
+ struct mdiobb_ctrl bb_ctrl;
struct davinci_mdio_regs __iomem *regs;
struct clk *clk;
struct device *dev;
@@ -90,6 +105,7 @@ struct davinci_mdio_data {
*/
bool skip_scan;
u32 clk_div;
+ bool manual_mode;
};
static void davinci_mdio_init_clk(struct davinci_mdio_data *data)
@@ -128,9 +144,122 @@ static void davinci_mdio_enable(struct davinci_mdio_data *data)
writel(data->clk_div | CONTROL_ENABLE, &data->regs->control);
}
-static int davinci_mdio_reset(struct mii_bus *bus)
+static void davinci_mdio_disable(struct davinci_mdio_data *data)
+{
+ u32 reg;
+
+ /* Disable MDIO state machine */
+ reg = readl(&data->regs->control);
+
+ reg &= ~CONTROL_CLKDIV;
+ reg |= data->clk_div;
+
+ reg &= ~CONTROL_ENABLE;
+ writel(reg, &data->regs->control);
+}
+
+static void davinci_mdio_enable_manual_mode(struct davinci_mdio_data *data)
+{
+ u32 reg;
+ /* set manual mode */
+ reg = readl(&data->regs->poll);
+ reg |= MDIO_MANUALMODE;
+ writel(reg, &data->regs->poll);
+}
+
+static void davinci_set_mdc(struct mdiobb_ctrl *ctrl, int level)
+{
+ struct davinci_mdio_data *data;
+ u32 reg;
+
+ data = container_of(ctrl, struct davinci_mdio_data, bb_ctrl);
+ reg = readl(&data->regs->manualif);
+
+ if (level)
+ reg |= MDIO_MAN_MDCLK_O;
+ else
+ reg &= ~MDIO_MAN_MDCLK_O;
+
+ writel(reg, &data->regs->manualif);
+}
+
+static void davinci_set_mdio_dir(struct mdiobb_ctrl *ctrl, int output)
+{
+ struct davinci_mdio_data *data;
+ u32 reg;
+
+ data = container_of(ctrl, struct davinci_mdio_data, bb_ctrl);
+ reg = readl(&data->regs->manualif);
+
+ if (output)
+ reg |= MDIO_MAN_OE;
+ else
+ reg &= ~MDIO_MAN_OE;
+
+ writel(reg, &data->regs->manualif);
+}
+
+static void davinci_set_mdio_data(struct mdiobb_ctrl *ctrl, int value)
+{
+ struct davinci_mdio_data *data;
+ u32 reg;
+
+ data = container_of(ctrl, struct davinci_mdio_data, bb_ctrl);
+ reg = readl(&data->regs->manualif);
+
+ if (value)
+ reg |= MDIO_MAN_PIN;
+ else
+ reg &= ~MDIO_MAN_PIN;
+
+ writel(reg, &data->regs->manualif);
+}
+
+static int davinci_get_mdio_data(struct mdiobb_ctrl *ctrl)
+{
+ struct davinci_mdio_data *data;
+ unsigned long reg;
+
+ data = container_of(ctrl, struct davinci_mdio_data, bb_ctrl);
+ reg = readl(&data->regs->manualif);
+ return test_bit(MDIO_PIN, &reg);
+}
+
+static int davinci_mdiobb_read(struct mii_bus *bus, int phy, int reg)
+{
+ int ret;
+
+ ret = pm_runtime_resume_and_get(bus->parent);
+ if (ret < 0)
+ return ret;
+
+ ret = mdiobb_read(bus, phy, reg);
+
+ pm_runtime_mark_last_busy(bus->parent);
+ pm_runtime_put_autosuspend(bus->parent);
+
+ return ret;
+}
+
+static int davinci_mdiobb_write(struct mii_bus *bus, int phy, int reg,
+ u16 val)
+{
+ int ret;
+
+ ret = pm_runtime_resume_and_get(bus->parent);
+ if (ret < 0)
+ return ret;
+
+ ret = mdiobb_write(bus, phy, reg, val);
+
+ pm_runtime_mark_last_busy(bus->parent);
+ pm_runtime_put_autosuspend(bus->parent);
+
+ return ret;
+}
+
+static int davinci_mdio_common_reset(struct davinci_mdio_data *data)
{
- struct davinci_mdio_data *data = bus->priv;
u32 phy_mask, ver;
int ret;
@@ -138,6 +267,11 @@ static int davinci_mdio_reset(struct mii_bus *bus)
if (ret < 0)
return ret;
+ if (data->manual_mode) {
+ davinci_mdio_disable(data);
+ davinci_mdio_enable_manual_mode(data);
+ }
+
/* wait for scan logic to settle */
msleep(PHY_MAX_ADDR * data->access_time);
@@ -171,6 +305,23 @@ done:
return 0;
}
+static int davinci_mdio_reset(struct mii_bus *bus)
+{
+ struct davinci_mdio_data *data = bus->priv;
+
+ return davinci_mdio_common_reset(data);
+}
+
+static int davinci_mdiobb_reset(struct mii_bus *bus)
+{
+ struct mdiobb_ctrl *ctrl = bus->priv;
+ struct davinci_mdio_data *data;
+
+ data = container_of(ctrl, struct davinci_mdio_data, bb_ctrl);
+
+ return davinci_mdio_common_reset(data);
+}
+
/* wait until hardware is ready for another user access */
static inline int wait_for_user_access(struct davinci_mdio_data *data)
{
@@ -318,6 +469,28 @@ static int davinci_mdio_probe_dt(struct mdio_platform_data *data,
return 0;
}
+struct k3_mdio_soc_data {
+ bool manual_mode;
+};
+
+static const struct k3_mdio_soc_data am65_mdio_soc_data = {
+ .manual_mode = true,
+};
+
+static const struct soc_device_attribute k3_mdio_socinfo[] = {
+ { .family = "AM62X", .revision = "SR1.0", .data = &am65_mdio_soc_data },
+ { .family = "AM64X", .revision = "SR1.0", .data = &am65_mdio_soc_data },
+ { .family = "AM64X", .revision = "SR2.0", .data = &am65_mdio_soc_data },
+ { .family = "AM65X", .revision = "SR1.0", .data = &am65_mdio_soc_data },
+ { .family = "AM65X", .revision = "SR2.0", .data = &am65_mdio_soc_data },
+ { .family = "J7200", .revision = "SR1.0", .data = &am65_mdio_soc_data },
+ { .family = "J7200", .revision = "SR2.0", .data = &am65_mdio_soc_data },
+ { .family = "J721E", .revision = "SR1.0", .data = &am65_mdio_soc_data },
+ { .family = "J721E", .revision = "SR2.0", .data = &am65_mdio_soc_data },
+ { .family = "J721S2", .revision = "SR1.0", .data = &am65_mdio_soc_data},
+ { /* sentinel */ },
+};
+
#if IS_ENABLED(CONFIG_OF)
static const struct davinci_mdio_of_param of_cpsw_mdio_data = {
.autosuspend_delay_ms = 100,
@@ -331,6 +504,14 @@ static const struct of_device_id davinci_mdio_of_mtable[] = {
MODULE_DEVICE_TABLE(of, davinci_mdio_of_mtable);
#endif
+static const struct mdiobb_ops davinci_mdiobb_ops = {
+ .owner = THIS_MODULE,
+ .set_mdc = davinci_set_mdc,
+ .set_mdio_dir = davinci_set_mdio_dir,
+ .set_mdio_data = davinci_set_mdio_data,
+ .get_mdio_data = davinci_get_mdio_data,
+};
+
static int davinci_mdio_probe(struct platform_device *pdev)
{
struct mdio_platform_data *pdata = dev_get_platdata(&pdev->dev);
@@ -345,7 +526,26 @@ static int davinci_mdio_probe(struct platform_device *pdev)
if (!data)
return -ENOMEM;
- data->bus = devm_mdiobus_alloc(dev);
+ data->manual_mode = false;
+ data->bb_ctrl.ops = &davinci_mdiobb_ops;
+
+ if (IS_ENABLED(CONFIG_OF) && dev->of_node) {
+ const struct soc_device_attribute *soc_match_data;
+
+ soc_match_data = soc_device_match(k3_mdio_socinfo);
+ if (soc_match_data && soc_match_data->data) {
+ const struct k3_mdio_soc_data *socdata =
+ soc_match_data->data;
+
+ data->manual_mode = socdata->manual_mode;
+ }
+ }
+
+ if (data->manual_mode)
+ data->bus = alloc_mdio_bitbang(&data->bb_ctrl);
+ else
+ data->bus = devm_mdiobus_alloc(dev);
+
if (!data->bus) {
dev_err(dev, "failed to alloc mii bus\n");
return -ENOMEM;
@@ -371,11 +571,20 @@ static int davinci_mdio_probe(struct platform_device *pdev)
}
data->bus->name = dev_name(dev);
- data->bus->read = davinci_mdio_read;
- data->bus->write = davinci_mdio_write;
- data->bus->reset = davinci_mdio_reset;
+
+ if (data->manual_mode) {
+ data->bus->read = davinci_mdiobb_read;
+ data->bus->write = davinci_mdiobb_write;
+ data->bus->reset = davinci_mdiobb_reset;
+
+ dev_info(dev, "Configuring MDIO in manual mode\n");
+ } else {
+ data->bus->read = davinci_mdio_read;
+ data->bus->write = davinci_mdio_write;
+ data->bus->reset = davinci_mdio_reset;
+ data->bus->priv = data;
+ }
data->bus->parent = dev;
- data->bus->priv = data;
data->clk = devm_clk_get(dev, "fck");
if (IS_ERR(data->clk)) {
@@ -433,9 +642,13 @@ static int davinci_mdio_remove(struct platform_device *pdev)
{
struct davinci_mdio_data *data = platform_get_drvdata(pdev);
- if (data->bus)
+ if (data->bus) {
mdiobus_unregister(data->bus);
+ if (data->manual_mode)
+ free_mdio_bitbang(data->bus);
+ }
+
pm_runtime_dont_use_autosuspend(&pdev->dev);
pm_runtime_disable(&pdev->dev);
@@ -452,7 +665,9 @@ static int davinci_mdio_runtime_suspend(struct device *dev)
ctrl = readl(&data->regs->control);
ctrl &= ~CONTROL_ENABLE;
writel(ctrl, &data->regs->control);
- wait_for_idle(data);
+
+ if (!data->manual_mode)
+ wait_for_idle(data);
return 0;
}
@@ -461,7 +676,12 @@ static int davinci_mdio_runtime_resume(struct device *dev)
{
struct davinci_mdio_data *data = dev_get_drvdata(dev);
- davinci_mdio_enable(data);
+ if (data->manual_mode) {
+ davinci_mdio_disable(data);
+ davinci_mdio_enable_manual_mode(data);
+ } else {
+ davinci_mdio_enable(data);
+ }
return 0;
}
#endif
diff --git a/drivers/net/ethernet/vertexcom/mse102x.c b/drivers/net/ethernet/vertexcom/mse102x.c
index eb39a45de012..30a2f38491fe 100644
--- a/drivers/net/ethernet/vertexcom/mse102x.c
+++ b/drivers/net/ethernet/vertexcom/mse102x.c
@@ -762,6 +762,6 @@ static struct spi_driver mse102x_driver = {
module_spi_driver(mse102x_driver);
MODULE_DESCRIPTION("MSE102x Network driver");
-MODULE_AUTHOR("Stefan Wahren <stefan.wahren@in-tech.com>");
+MODULE_AUTHOR("Stefan Wahren <stefan.wahren@chargebyte.com>");
MODULE_LICENSE("GPL");
MODULE_ALIAS("spi:" DRV_NAME);
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 7962c37b3f14..01aa94776ce3 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -503,12 +503,9 @@ static struct sk_buff *geneve_gro_receive(struct sock *sk,
off_gnv = skb_gro_offset(skb);
hlen = off_gnv + sizeof(*gh);
- gh = skb_gro_header_fast(skb, off_gnv);
- if (skb_gro_header_hard(skb, hlen)) {
- gh = skb_gro_header_slow(skb, hlen, off_gnv);
- if (unlikely(!gh))
- goto out;
- }
+ gh = skb_gro_header(skb, hlen, off_gnv);
+ if (unlikely(!gh))
+ goto out;
if (gh->ver != GENEVE_VER || gh->oam)
goto out;
diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index e88f783c297e..efea94c27880 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -984,7 +984,17 @@ static int nsim_dev_info_get(struct devlink *devlink,
struct devlink_info_req *req,
struct netlink_ext_ack *extack)
{
- return devlink_info_driver_name_put(req, DRV_NAME);
+ int err;
+
+ err = devlink_info_driver_name_put(req, DRV_NAME);
+ if (err)
+ return err;
+ err = devlink_info_version_stored_put_ext(req, "fw.mgmt", "10.20.30",
+ DEVLINK_INFO_VERSION_TYPE_COMPONENT);
+ if (err)
+ return err;
+ return devlink_info_version_running_put_ext(req, "fw.mgmt", "10.20.30",
+ DEVLINK_INFO_VERSION_TYPE_COMPONENT);
}
#define NSIM_DEV_FLASH_SIZE 500000
@@ -1312,8 +1322,7 @@ nsim_dev_devlink_trap_drop_counter_get(struct devlink *devlink,
static const struct devlink_ops nsim_dev_devlink_ops = {
.eswitch_mode_set = nsim_devlink_eswitch_mode_set,
.eswitch_mode_get = nsim_devlink_eswitch_mode_get,
- .supported_flash_update_params = DEVLINK_SUPPORT_FLASH_UPDATE_COMPONENT |
- DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK,
+ .supported_flash_update_params = DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK,
.reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT),
.reload_down = nsim_dev_reload_down,
.reload_up = nsim_dev_reload_up,
diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index 59fe356942b5..11ebd59bf2eb 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -115,6 +115,7 @@
#define AT803X_DEBUG_REG_HIB_CTRL 0x0b
#define AT803X_DEBUG_HIB_CTRL_SEL_RST_80U BIT(10)
#define AT803X_DEBUG_HIB_CTRL_EN_ANY_CHANGE BIT(13)
+#define AT803X_DEBUG_HIB_CTRL_PS_HIB_EN BIT(15)
#define AT803X_DEBUG_REG_3C 0x3C
@@ -192,6 +193,9 @@
#define AT803X_KEEP_PLL_ENABLED BIT(0)
#define AT803X_DISABLE_SMARTEEE BIT(1)
+/* disable hibernation mode */
+#define AT803X_DISABLE_HIBERNATION_MODE BIT(2)
+
/* ADC threshold */
#define QCA808X_PHY_DEBUG_ADC_THRESHOLD 0x2c80
#define QCA808X_ADC_THRESHOLD_MASK GENMASK(7, 0)
@@ -730,6 +734,9 @@ static int at803x_parse_dt(struct phy_device *phydev)
if (of_property_read_bool(node, "qca,disable-smarteee"))
priv->flags |= AT803X_DISABLE_SMARTEEE;
+ if (of_property_read_bool(node, "qca,disable-hibernation-mode"))
+ priv->flags |= AT803X_DISABLE_HIBERNATION_MODE;
+
if (!of_property_read_u32(node, "qca,smarteee-tw-us-1g", &tw)) {
if (!tw || tw > 255) {
phydev_err(phydev, "invalid qca,smarteee-tw-us-1g\n");
@@ -999,6 +1006,20 @@ static int at8031_pll_config(struct phy_device *phydev)
AT803X_DEBUG_PLL_ON, 0);
}
+static int at803x_hibernation_mode_config(struct phy_device *phydev)
+{
+ struct at803x_priv *priv = phydev->priv;
+
+ /* The default after hardware reset is hibernation mode enabled. After
+ * software reset, the value is retained.
+ */
+ if (!(priv->flags & AT803X_DISABLE_HIBERNATION_MODE))
+ return 0;
+
+ return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_HIB_CTRL,
+ AT803X_DEBUG_HIB_CTRL_PS_HIB_EN, 0);
+}
+
static int at803x_config_init(struct phy_device *phydev)
{
struct at803x_priv *priv = phydev->priv;
@@ -1051,6 +1072,10 @@ static int at803x_config_init(struct phy_device *phydev)
if (ret < 0)
return ret;
+ ret = at803x_hibernation_mode_config(phydev);
+ if (ret < 0)
+ return ret;
+
/* Ar803x extended next page bit is enabled by default. Cisco
* multigig switches read this bit and attempt to negotiate 10Gbps
* rates even if the next page bit is disabled. This is incorrect
diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 31fbcdddc9ad..ad71c88c87e7 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -766,6 +766,41 @@ static irqreturn_t brcm_fet_handle_interrupt(struct phy_device *phydev)
return IRQ_HANDLED;
}
+static int brcm_fet_suspend(struct phy_device *phydev)
+{
+ int reg, err, err2, brcmtest;
+
+ /* We cannot use a read/modify/write here otherwise the PHY continues
+ * to drive LEDs which defeats the purpose of low power mode.
+ */
+ err = phy_write(phydev, MII_BMCR, BMCR_PDOWN);
+ if (err < 0)
+ return err;
+
+ /* Enable shadow register access */
+ brcmtest = phy_read(phydev, MII_BRCM_FET_BRCMTEST);
+ if (brcmtest < 0)
+ return brcmtest;
+
+ reg = brcmtest | MII_BRCM_FET_BT_SRE;
+
+ err = phy_write(phydev, MII_BRCM_FET_BRCMTEST, reg);
+ if (err < 0)
+ return err;
+
+ /* Set standby mode */
+ err = phy_modify(phydev, MII_BRCM_FET_SHDW_AUXMODE4,
+ MII_BRCM_FET_SHDW_AM4_STANDBY,
+ MII_BRCM_FET_SHDW_AM4_STANDBY);
+
+ /* Disable shadow register access */
+ err2 = phy_write(phydev, MII_BRCM_FET_BRCMTEST, brcmtest);
+ if (!err)
+ err = err2;
+
+ return err;
+}
+
static int bcm54xx_phy_probe(struct phy_device *phydev)
{
struct bcm54xx_phy_priv *priv;
@@ -1033,6 +1068,8 @@ static struct phy_driver broadcom_drivers[] = {
.config_init = brcm_fet_config_init,
.config_intr = brcm_fet_config_intr,
.handle_interrupt = brcm_fet_handle_interrupt,
+ .suspend = brcm_fet_suspend,
+ .resume = brcm_fet_config_init,
}, {
.phy_id = PHY_ID_BCM5241,
.phy_id_mask = 0xfffffff0,
@@ -1041,6 +1078,8 @@ static struct phy_driver broadcom_drivers[] = {
.config_init = brcm_fet_config_init,
.config_intr = brcm_fet_config_intr,
.handle_interrupt = brcm_fet_handle_interrupt,
+ .suspend = brcm_fet_suspend,
+ .resume = brcm_fet_config_init,
}, {
.phy_id = PHY_ID_BCM5395,
.phy_id_mask = 0xfffffff0,
diff --git a/drivers/net/phy/nxp-tja11xx.c b/drivers/net/phy/nxp-tja11xx.c
index 2a8195c50d14..ec91e671f8aa 100644
--- a/drivers/net/phy/nxp-tja11xx.c
+++ b/drivers/net/phy/nxp-tja11xx.c
@@ -10,6 +10,7 @@
#include <linux/mdio.h>
#include <linux/mii.h>
#include <linux/module.h>
+#include <linux/of.h>
#include <linux/phy.h>
#include <linux/hwmon.h>
#include <linux/bitfield.h>
@@ -34,6 +35,11 @@
#define MII_CFG1 18
#define MII_CFG1_MASTER_SLAVE BIT(15)
#define MII_CFG1_AUTO_OP BIT(14)
+#define MII_CFG1_INTERFACE_MODE_MASK GENMASK(9, 8)
+#define MII_CFG1_MII_MODE (0x0 << 8)
+#define MII_CFG1_RMII_MODE_REFCLK_IN BIT(8)
+#define MII_CFG1_RMII_MODE_REFCLK_OUT BIT(9)
+#define MII_CFG1_REVMII_MODE GENMASK(9, 8)
#define MII_CFG1_SLEEP_CONFIRM BIT(6)
#define MII_CFG1_LED_MODE_MASK GENMASK(5, 4)
#define MII_CFG1_LED_MODE_LINKUP 0
@@ -72,11 +78,15 @@
#define MII_COMMCFG 27
#define MII_COMMCFG_AUTO_OP BIT(15)
+/* Configure REF_CLK as input in RMII mode */
+#define TJA110X_RMII_MODE_REFCLK_IN BIT(0)
+
struct tja11xx_priv {
char *hwmon_name;
struct device *hwmon_dev;
struct phy_device *phydev;
struct work_struct phy_register_work;
+ u32 flags;
};
struct tja11xx_phy_stats {
@@ -251,8 +261,34 @@ do_test:
return __genphy_config_aneg(phydev, changed);
}
+static int tja11xx_get_interface_mode(struct phy_device *phydev)
+{
+ struct tja11xx_priv *priv = phydev->priv;
+ int mii_mode;
+
+ switch (phydev->interface) {
+ case PHY_INTERFACE_MODE_MII:
+ mii_mode = MII_CFG1_MII_MODE;
+ break;
+ case PHY_INTERFACE_MODE_REVMII:
+ mii_mode = MII_CFG1_REVMII_MODE;
+ break;
+ case PHY_INTERFACE_MODE_RMII:
+ if (priv->flags & TJA110X_RMII_MODE_REFCLK_IN)
+ mii_mode = MII_CFG1_RMII_MODE_REFCLK_IN;
+ else
+ mii_mode = MII_CFG1_RMII_MODE_REFCLK_OUT;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return mii_mode;
+}
+
static int tja11xx_config_init(struct phy_device *phydev)
{
+ u16 reg_mask, reg_val;
int ret;
ret = tja11xx_enable_reg_write(phydev);
@@ -265,15 +301,32 @@ static int tja11xx_config_init(struct phy_device *phydev)
switch (phydev->phy_id & PHY_ID_MASK) {
case PHY_ID_TJA1100:
- ret = phy_modify(phydev, MII_CFG1,
- MII_CFG1_AUTO_OP | MII_CFG1_LED_MODE_MASK |
- MII_CFG1_LED_ENABLE,
- MII_CFG1_AUTO_OP | MII_CFG1_LED_MODE_LINKUP |
- MII_CFG1_LED_ENABLE);
+ reg_mask = MII_CFG1_AUTO_OP | MII_CFG1_LED_MODE_MASK |
+ MII_CFG1_LED_ENABLE;
+ reg_val = MII_CFG1_AUTO_OP | MII_CFG1_LED_MODE_LINKUP |
+ MII_CFG1_LED_ENABLE;
+
+ reg_mask |= MII_CFG1_INTERFACE_MODE_MASK;
+ ret = tja11xx_get_interface_mode(phydev);
+ if (ret < 0)
+ return ret;
+
+ reg_val |= (ret & 0xffff);
+ ret = phy_modify(phydev, MII_CFG1, reg_mask, reg_val);
if (ret)
return ret;
break;
case PHY_ID_TJA1101:
+ reg_mask = MII_CFG1_INTERFACE_MODE_MASK;
+ ret = tja11xx_get_interface_mode(phydev);
+ if (ret < 0)
+ return ret;
+
+ reg_val = ret & 0xffff;
+ ret = phy_modify(phydev, MII_CFG1, reg_mask, reg_val);
+ if (ret)
+ return ret;
+ fallthrough;
case PHY_ID_TJA1102:
ret = phy_set_bits(phydev, MII_COMMCFG, MII_COMMCFG_AUTO_OP);
if (ret)
@@ -458,16 +511,36 @@ static int tja11xx_hwmon_register(struct phy_device *phydev,
return PTR_ERR_OR_ZERO(priv->hwmon_dev);
}
+static int tja11xx_parse_dt(struct phy_device *phydev)
+{
+ struct device_node *node = phydev->mdio.dev.of_node;
+ struct tja11xx_priv *priv = phydev->priv;
+
+ if (!IS_ENABLED(CONFIG_OF_MDIO))
+ return 0;
+
+ if (of_property_read_bool(node, "nxp,rmii-refclk-in"))
+ priv->flags |= TJA110X_RMII_MODE_REFCLK_IN;
+
+ return 0;
+}
+
static int tja11xx_probe(struct phy_device *phydev)
{
struct device *dev = &phydev->mdio.dev;
struct tja11xx_priv *priv;
+ int ret;
priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
priv->phydev = phydev;
+ phydev->priv = priv;
+
+ ret = tja11xx_parse_dt(phydev);
+ if (ret)
+ return ret;
return tja11xx_hwmon_register(phydev, priv);
}
diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c
index 1f2531a1a876..f8ec12d3d6ae 100644
--- a/drivers/net/phy/phy-core.c
+++ b/drivers/net/phy/phy-core.c
@@ -74,6 +74,58 @@ const char *phy_duplex_to_str(unsigned int duplex)
}
EXPORT_SYMBOL_GPL(phy_duplex_to_str);
+/**
+ * phy_interface_num_ports - Return the number of links that can be carried by
+ * a given MAC-PHY physical link. Returns 0 if this is
+ * unknown, the number of links else.
+ *
+ * @interface: The interface mode we want to get the number of ports
+ */
+int phy_interface_num_ports(phy_interface_t interface)
+{
+ switch (interface) {
+ case PHY_INTERFACE_MODE_NA:
+ return 0;
+ case PHY_INTERFACE_MODE_INTERNAL:
+ case PHY_INTERFACE_MODE_MII:
+ case PHY_INTERFACE_MODE_GMII:
+ case PHY_INTERFACE_MODE_TBI:
+ case PHY_INTERFACE_MODE_REVMII:
+ case PHY_INTERFACE_MODE_RMII:
+ case PHY_INTERFACE_MODE_REVRMII:
+ case PHY_INTERFACE_MODE_RGMII:
+ case PHY_INTERFACE_MODE_RGMII_ID:
+ case PHY_INTERFACE_MODE_RGMII_RXID:
+ case PHY_INTERFACE_MODE_RGMII_TXID:
+ case PHY_INTERFACE_MODE_RTBI:
+ case PHY_INTERFACE_MODE_XGMII:
+ case PHY_INTERFACE_MODE_XLGMII:
+ case PHY_INTERFACE_MODE_MOCA:
+ case PHY_INTERFACE_MODE_TRGMII:
+ case PHY_INTERFACE_MODE_USXGMII:
+ case PHY_INTERFACE_MODE_SGMII:
+ case PHY_INTERFACE_MODE_SMII:
+ case PHY_INTERFACE_MODE_1000BASEX:
+ case PHY_INTERFACE_MODE_2500BASEX:
+ case PHY_INTERFACE_MODE_5GBASER:
+ case PHY_INTERFACE_MODE_10GBASER:
+ case PHY_INTERFACE_MODE_25GBASER:
+ case PHY_INTERFACE_MODE_10GKR:
+ case PHY_INTERFACE_MODE_100BASEX:
+ case PHY_INTERFACE_MODE_RXAUI:
+ case PHY_INTERFACE_MODE_XAUI:
+ return 1;
+ case PHY_INTERFACE_MODE_QSGMII:
+ case PHY_INTERFACE_MODE_QUSGMII:
+ return 4;
+ case PHY_INTERFACE_MODE_MAX:
+ WARN_ONCE(1, "PHY_INTERFACE_MODE_MAX isn't a valid interface mode");
+ return 0;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(phy_interface_num_ports);
+
/* A mapping of all SUPPORTED settings to speed/duplex. This table
* must be grouped by speed and sorted in descending match priority
* - iow, descending speed.
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 9bd69328dc4d..d2455df1d8d2 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -321,6 +321,7 @@ void phylink_get_linkmodes(unsigned long *linkmodes, phy_interface_t interface,
case PHY_INTERFACE_MODE_RGMII_ID:
case PHY_INTERFACE_MODE_RGMII:
case PHY_INTERFACE_MODE_QSGMII:
+ case PHY_INTERFACE_MODE_QUSGMII:
case PHY_INTERFACE_MODE_SGMII:
case PHY_INTERFACE_MODE_GMII:
caps |= MAC_1000HD | MAC_1000FD;
@@ -632,6 +633,7 @@ static int phylink_parse_mode(struct phylink *pl, struct fwnode_handle *fwnode)
switch (pl->link_config.interface) {
case PHY_INTERFACE_MODE_SGMII:
case PHY_INTERFACE_MODE_QSGMII:
+ case PHY_INTERFACE_MODE_QUSGMII:
phylink_set(pl->supported, 10baseT_Half);
phylink_set(pl->supported, 10baseT_Full);
phylink_set(pl->supported, 100baseT_Half);
@@ -2929,6 +2931,7 @@ void phylink_mii_c22_pcs_decode_state(struct phylink_link_state *state,
case PHY_INTERFACE_MODE_SGMII:
case PHY_INTERFACE_MODE_QSGMII:
+ case PHY_INTERFACE_MODE_QUSGMII:
phylink_decode_sgmii_word(state, lpa);
break;
diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c
index a5671ab896b3..3d99fd6664d7 100644
--- a/drivers/net/phy/realtek.c
+++ b/drivers/net/phy/realtek.c
@@ -70,6 +70,7 @@
#define RTLGEN_SPEED_MASK 0x0630
#define RTL_GENERIC_PHYID 0x001cc800
+#define RTL_8211FVD_PHYID 0x001cc878
MODULE_DESCRIPTION("Realtek PHY driver");
MODULE_AUTHOR("Johnson Leung");
@@ -78,6 +79,7 @@ MODULE_LICENSE("GPL");
struct rtl821x_priv {
u16 phycr1;
u16 phycr2;
+ bool has_phycr2;
};
static int rtl821x_read_page(struct phy_device *phydev)
@@ -94,6 +96,7 @@ static int rtl821x_probe(struct phy_device *phydev)
{
struct device *dev = &phydev->mdio.dev;
struct rtl821x_priv *priv;
+ u32 phy_id = phydev->drv->phy_id;
int ret;
priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
@@ -108,13 +111,16 @@ static int rtl821x_probe(struct phy_device *phydev)
if (of_property_read_bool(dev->of_node, "realtek,aldps-enable"))
priv->phycr1 |= RTL8211F_ALDPS_PLL_OFF | RTL8211F_ALDPS_ENABLE | RTL8211F_ALDPS_XTAL_OFF;
- ret = phy_read_paged(phydev, 0xa43, RTL8211F_PHYCR2);
- if (ret < 0)
- return ret;
+ priv->has_phycr2 = !(phy_id == RTL_8211FVD_PHYID);
+ if (priv->has_phycr2) {
+ ret = phy_read_paged(phydev, 0xa43, RTL8211F_PHYCR2);
+ if (ret < 0)
+ return ret;
- priv->phycr2 = ret & RTL8211F_CLKOUT_EN;
- if (of_property_read_bool(dev->of_node, "realtek,clkout-disable"))
- priv->phycr2 &= ~RTL8211F_CLKOUT_EN;
+ priv->phycr2 = ret & RTL8211F_CLKOUT_EN;
+ if (of_property_read_bool(dev->of_node, "realtek,clkout-disable"))
+ priv->phycr2 &= ~RTL8211F_CLKOUT_EN;
+ }
phydev->priv = priv;
@@ -400,12 +406,14 @@ static int rtl8211f_config_init(struct phy_device *phydev)
val_rxdly ? "enabled" : "disabled");
}
- ret = phy_modify_paged(phydev, 0xa43, RTL8211F_PHYCR2,
- RTL8211F_CLKOUT_EN, priv->phycr2);
- if (ret < 0) {
- dev_err(dev, "clkout configuration failed: %pe\n",
- ERR_PTR(ret));
- return ret;
+ if (priv->has_phycr2) {
+ ret = phy_modify_paged(phydev, 0xa43, RTL8211F_PHYCR2,
+ RTL8211F_CLKOUT_EN, priv->phycr2);
+ if (ret < 0) {
+ dev_err(dev, "clkout configuration failed: %pe\n",
+ ERR_PTR(ret));
+ return ret;
+ }
}
return genphy_soft_reset(phydev);
@@ -924,6 +932,18 @@ static struct phy_driver realtek_drvs[] = {
.read_page = rtl821x_read_page,
.write_page = rtl821x_write_page,
}, {
+ PHY_ID_MATCH_EXACT(RTL_8211FVD_PHYID),
+ .name = "RTL8211F-VD Gigabit Ethernet",
+ .probe = rtl821x_probe,
+ .config_init = &rtl8211f_config_init,
+ .read_status = rtlgen_read_status,
+ .config_intr = &rtl8211f_config_intr,
+ .handle_interrupt = rtl8211f_handle_interrupt,
+ .suspend = genphy_suspend,
+ .resume = rtl821x_resume,
+ .read_page = rtl821x_read_page,
+ .write_page = rtl821x_write_page,
+ }, {
.name = "Generic FE-GE Realtek PHY",
.match_phy_device = rtlgen_match_phy_device,
.read_status = rtlgen_read_status,
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index 63f90fe9a4d2..a12f7b599da2 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -1195,90 +1195,45 @@ static const struct hwmon_ops sfp_hwmon_ops = {
.read_string = sfp_hwmon_read_string,
};
-static u32 sfp_hwmon_chip_config[] = {
- HWMON_C_REGISTER_TZ,
- 0,
-};
-
-static const struct hwmon_channel_info sfp_hwmon_chip = {
- .type = hwmon_chip,
- .config = sfp_hwmon_chip_config,
-};
-
-static u32 sfp_hwmon_temp_config[] = {
- HWMON_T_INPUT |
- HWMON_T_MAX | HWMON_T_MIN |
- HWMON_T_MAX_ALARM | HWMON_T_MIN_ALARM |
- HWMON_T_CRIT | HWMON_T_LCRIT |
- HWMON_T_CRIT_ALARM | HWMON_T_LCRIT_ALARM |
- HWMON_T_LABEL,
- 0,
-};
-
-static const struct hwmon_channel_info sfp_hwmon_temp_channel_info = {
- .type = hwmon_temp,
- .config = sfp_hwmon_temp_config,
-};
-
-static u32 sfp_hwmon_vcc_config[] = {
- HWMON_I_INPUT |
- HWMON_I_MAX | HWMON_I_MIN |
- HWMON_I_MAX_ALARM | HWMON_I_MIN_ALARM |
- HWMON_I_CRIT | HWMON_I_LCRIT |
- HWMON_I_CRIT_ALARM | HWMON_I_LCRIT_ALARM |
- HWMON_I_LABEL,
- 0,
-};
-
-static const struct hwmon_channel_info sfp_hwmon_vcc_channel_info = {
- .type = hwmon_in,
- .config = sfp_hwmon_vcc_config,
-};
-
-static u32 sfp_hwmon_bias_config[] = {
- HWMON_C_INPUT |
- HWMON_C_MAX | HWMON_C_MIN |
- HWMON_C_MAX_ALARM | HWMON_C_MIN_ALARM |
- HWMON_C_CRIT | HWMON_C_LCRIT |
- HWMON_C_CRIT_ALARM | HWMON_C_LCRIT_ALARM |
- HWMON_C_LABEL,
- 0,
-};
-
-static const struct hwmon_channel_info sfp_hwmon_bias_channel_info = {
- .type = hwmon_curr,
- .config = sfp_hwmon_bias_config,
-};
-
-static u32 sfp_hwmon_power_config[] = {
- /* Transmit power */
- HWMON_P_INPUT |
- HWMON_P_MAX | HWMON_P_MIN |
- HWMON_P_MAX_ALARM | HWMON_P_MIN_ALARM |
- HWMON_P_CRIT | HWMON_P_LCRIT |
- HWMON_P_CRIT_ALARM | HWMON_P_LCRIT_ALARM |
- HWMON_P_LABEL,
- /* Receive power */
- HWMON_P_INPUT |
- HWMON_P_MAX | HWMON_P_MIN |
- HWMON_P_MAX_ALARM | HWMON_P_MIN_ALARM |
- HWMON_P_CRIT | HWMON_P_LCRIT |
- HWMON_P_CRIT_ALARM | HWMON_P_LCRIT_ALARM |
- HWMON_P_LABEL,
- 0,
-};
-
-static const struct hwmon_channel_info sfp_hwmon_power_channel_info = {
- .type = hwmon_power,
- .config = sfp_hwmon_power_config,
-};
-
static const struct hwmon_channel_info *sfp_hwmon_info[] = {
- &sfp_hwmon_chip,
- &sfp_hwmon_vcc_channel_info,
- &sfp_hwmon_temp_channel_info,
- &sfp_hwmon_bias_channel_info,
- &sfp_hwmon_power_channel_info,
+ HWMON_CHANNEL_INFO(chip,
+ HWMON_C_REGISTER_TZ),
+ HWMON_CHANNEL_INFO(in,
+ HWMON_I_INPUT |
+ HWMON_I_MAX | HWMON_I_MIN |
+ HWMON_I_MAX_ALARM | HWMON_I_MIN_ALARM |
+ HWMON_I_CRIT | HWMON_I_LCRIT |
+ HWMON_I_CRIT_ALARM | HWMON_I_LCRIT_ALARM |
+ HWMON_I_LABEL),
+ HWMON_CHANNEL_INFO(temp,
+ HWMON_T_INPUT |
+ HWMON_T_MAX | HWMON_T_MIN |
+ HWMON_T_MAX_ALARM | HWMON_T_MIN_ALARM |
+ HWMON_T_CRIT | HWMON_T_LCRIT |
+ HWMON_T_CRIT_ALARM | HWMON_T_LCRIT_ALARM |
+ HWMON_T_LABEL),
+ HWMON_CHANNEL_INFO(curr,
+ HWMON_C_INPUT |
+ HWMON_C_MAX | HWMON_C_MIN |
+ HWMON_C_MAX_ALARM | HWMON_C_MIN_ALARM |
+ HWMON_C_CRIT | HWMON_C_LCRIT |
+ HWMON_C_CRIT_ALARM | HWMON_C_LCRIT_ALARM |
+ HWMON_C_LABEL),
+ HWMON_CHANNEL_INFO(power,
+ /* Transmit power */
+ HWMON_P_INPUT |
+ HWMON_P_MAX | HWMON_P_MIN |
+ HWMON_P_MAX_ALARM | HWMON_P_MIN_ALARM |
+ HWMON_P_CRIT | HWMON_P_LCRIT |
+ HWMON_P_CRIT_ALARM | HWMON_P_LCRIT_ALARM |
+ HWMON_P_LABEL,
+ /* Receive power */
+ HWMON_P_INPUT |
+ HWMON_P_MAX | HWMON_P_MIN |
+ HWMON_P_MAX_ALARM | HWMON_P_MIN_ALARM |
+ HWMON_P_CRIT | HWMON_P_LCRIT |
+ HWMON_P_CRIT_ALARM | HWMON_P_LCRIT_ALARM |
+ HWMON_P_LABEL),
NULL,
};
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index c3285242f74f..1a47d04f5d1a 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -713,12 +713,9 @@ static struct sk_buff *vxlan_gro_receive(struct sock *sk,
off_vx = skb_gro_offset(skb);
hlen = off_vx + sizeof(*vh);
- vh = skb_gro_header_fast(skb, off_vx);
- if (skb_gro_header_hard(skb, hlen)) {
- vh = skb_gro_header_slow(skb, hlen, off_vx);
- if (unlikely(!vh))
- goto out;
- }
+ vh = skb_gro_header(skb, hlen, off_vx);
+ if (unlikely(!vh))
+ goto out;
skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 7fa960bd3df1..42da760e0f45 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -578,6 +578,7 @@ int of_device_compatible_match(struct device_node *device,
return score;
}
+EXPORT_SYMBOL_GPL(of_device_compatible_match);
/**
* of_machine_is_compatible - Test root of device tree for a given compatible value
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 20c26aed7896..a627a02cf8ab 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2349,6 +2349,7 @@ extern const struct bpf_func_proto bpf_get_numa_node_id_proto;
extern const struct bpf_func_proto bpf_tail_call_proto;
extern const struct bpf_func_proto bpf_ktime_get_ns_proto;
extern const struct bpf_func_proto bpf_ktime_get_boot_ns_proto;
+extern const struct bpf_func_proto bpf_ktime_get_tai_ns_proto;
extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto;
extern const struct bpf_func_proto bpf_get_current_uid_gid_proto;
extern const struct bpf_func_proto bpf_get_current_comm_proto;
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 6ff567ece34a..9e77165f3ef6 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -293,6 +293,7 @@
#define MII_BRCM_FET_SHDW_MC_FAME 0x4000 /* Force Auto MDIX enable */
#define MII_BRCM_FET_SHDW_AUXMODE4 0x1a /* Auxiliary mode 4 */
+#define MII_BRCM_FET_SHDW_AM4_STANDBY 0x0008 /* Standby enable */
#define MII_BRCM_FET_SHDW_AM4_LED_MASK 0x0003
#define MII_BRCM_FET_SHDW_AM4_LED_MODE1 0x0001
diff --git a/include/linux/btf.h b/include/linux/btf.h
index cdb376d53238..ad93c2d9cc1c 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -49,6 +49,8 @@
* for this case.
*/
#define KF_TRUSTED_ARGS (1 << 4) /* kfunc only takes trusted pointer arguments */
+#define KF_SLEEPABLE (1 << 5) /* kfunc may sleep */
+#define KF_DESTRUCTIVE (1 << 6) /* kfunc performs destructive actions */
struct btf;
struct btf_member;
diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index 445e80517cab..fc93c9488c76 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -371,4 +371,11 @@
*/
#define __weak __attribute__((__weak__))
+/*
+ * Used by functions that use '__builtin_return_address'. These function
+ * don't want to be splited or made inline, which can make
+ * the '__builtin_return_address' get unexpected address.
+ */
+#define __fix_address noinline __noclone
+
#endif /* __LINUX_COMPILER_ATTRIBUTES_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 05d6f3facd5a..eec35f9b6616 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -546,8 +546,8 @@ static inline bool napi_if_scheduled_mark_missed(struct napi_struct *n)
{
unsigned long val, new;
+ val = READ_ONCE(n->state);
do {
- val = READ_ONCE(n->state);
if (val & NAPIF_STATE_DISABLE)
return true;
@@ -555,7 +555,7 @@ static inline bool napi_if_scheduled_mark_missed(struct napi_struct *n)
return false;
new = val | NAPIF_STATE_MISSED;
- } while (cmpxchg(&n->state, val, new) != val);
+ } while (!try_cmpxchg(&n->state, &val, new));
return true;
}
@@ -1851,7 +1851,6 @@ enum netdev_ml_priv_type {
* @tipc_ptr: TIPC specific data
* @atalk_ptr: AppleTalk link
* @ip_ptr: IPv4 specific data
- * @dn_ptr: DECnet specific data
* @ip6_ptr: IPv6 specific data
* @ax25_ptr: AX.25 specific data
* @ieee80211_ptr: IEEE 802.11 specific data, assign before registering
@@ -2147,9 +2146,6 @@ struct net_device {
#if IS_ENABLED(CONFIG_ATALK)
void *atalk_ptr;
#endif
-#if IS_ENABLED(CONFIG_DECNET)
- struct dn_dev __rcu *dn_ptr;
-#endif
#if IS_ENABLED(CONFIG_AX25)
void *ax25_ptr;
#endif
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index c2c6f332fb90..d8817d381c14 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -243,11 +243,6 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
hook_head = rcu_dereference(net->nf.hooks_bridge[hook]);
#endif
break;
-#if IS_ENABLED(CONFIG_DECNET)
- case NFPROTO_DECNET:
- hook_head = rcu_dereference(net->nf.hooks_decnet[hook]);
- break;
-#endif
default:
WARN_ON_ONCE(1);
break;
diff --git a/include/linux/netfilter_defs.h b/include/linux/netfilter_defs.h
index 8dddfb151f00..a5f7bef1b3a4 100644
--- a/include/linux/netfilter_defs.h
+++ b/include/linux/netfilter_defs.h
@@ -7,14 +7,6 @@
/* in/out/forward only */
#define NF_ARP_NUMHOOKS 3
-/* max hook is NF_DN_ROUTE (6), also see uapi/linux/netfilter_decnet.h */
-#define NF_DN_NUMHOOKS 7
-
-#if IS_ENABLED(CONFIG_DECNET)
-/* Largest hook number + 1, see uapi/linux/netfilter_decnet.h */
-#define NF_MAX_HOOKS NF_DN_NUMHOOKS
-#else
#define NF_MAX_HOOKS NF_INET_NUMHOOKS
-#endif
#endif
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 87638c55d844..7c49ab95441b 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -115,6 +115,7 @@ extern const int phy_10gbit_features_array[1];
* @PHY_INTERFACE_MODE_25GBASER: 25G BaseR
* @PHY_INTERFACE_MODE_USXGMII: Universal Serial 10GE MII
* @PHY_INTERFACE_MODE_10GKR: 10GBASE-KR - with Clause 73 AN
+ * @PHY_INTERFACE_MODE_QUSGMII: Quad Universal SGMII
* @PHY_INTERFACE_MODE_MAX: Book keeping
*
* Describes the interface between the MAC and PHY.
@@ -152,6 +153,7 @@ typedef enum {
PHY_INTERFACE_MODE_USXGMII,
/* 10GBASE-KR - with Clause 73 AN */
PHY_INTERFACE_MODE_10GKR,
+ PHY_INTERFACE_MODE_QUSGMII,
PHY_INTERFACE_MODE_MAX,
} phy_interface_t;
@@ -267,6 +269,8 @@ static inline const char *phy_modes(phy_interface_t interface)
return "10gbase-kr";
case PHY_INTERFACE_MODE_100BASEX:
return "100base-x";
+ case PHY_INTERFACE_MODE_QUSGMII:
+ return "qusgmii";
default:
return "unknown";
}
@@ -964,6 +968,8 @@ struct phy_fixup {
const char *phy_speed_to_str(int speed);
const char *phy_duplex_to_str(unsigned int duplex);
+int phy_interface_num_ports(phy_interface_t interface);
+
/* A structure for mapping a particular speed and duplex
* combination to a particular SUPPORTED and ADVERTISED value
*/
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ca8afa382bf2..b51d07a727c9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1195,7 +1195,8 @@ static inline bool skb_unref(struct sk_buff *skb)
return true;
}
-void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason);
+void __fix_address
+kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason);
/**
* kfree_skb - free an sk_buff with 'NOT_SPECIFIED' reason
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 8df475db88c0..fb2e88614f5d 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -257,7 +257,6 @@ struct plat_stmmacenet_data {
u8 vlan_fail_q;
unsigned int eee_usecs_rate;
struct pci_dev *pdev;
- bool has_crossts;
int int_snapshot_num;
int ext_snapshot_num;
bool int_snapshot_en;
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 1c53c4c4d88f..568a87c5e0d0 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -78,6 +78,7 @@ struct vsock_sock {
s64 vsock_stream_has_data(struct vsock_sock *vsk);
s64 vsock_stream_has_space(struct vsock_sock *vsk);
struct sock *vsock_create_connected(struct sock *parent);
+void vsock_data_ready(struct sock *sk);
/**** TRANSPORT ****/
@@ -135,6 +136,7 @@ struct vsock_transport {
u64 (*stream_rcvhiwat)(struct vsock_sock *);
bool (*stream_is_active)(struct vsock_sock *);
bool (*stream_allow)(u32 cid, u32 port);
+ int (*set_rcvlowat)(struct vsock_sock *vsk, int val);
/* SEQ_PACKET. */
ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg,
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 119ed1ffb988..1f7026011856 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -624,8 +624,7 @@ struct devlink_flash_update_params {
u32 overwrite_mask;
};
-#define DEVLINK_SUPPORT_FLASH_UPDATE_COMPONENT BIT(0)
-#define DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK BIT(1)
+#define DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK BIT(0)
struct devlink_region;
struct devlink_info_req;
@@ -1714,15 +1713,31 @@ int devlink_info_driver_name_put(struct devlink_info_req *req,
const char *name);
int devlink_info_board_serial_number_put(struct devlink_info_req *req,
const char *bsn);
+
+enum devlink_info_version_type {
+ DEVLINK_INFO_VERSION_TYPE_NONE,
+ DEVLINK_INFO_VERSION_TYPE_COMPONENT, /* May be used as flash update
+ * component by name.
+ */
+};
+
int devlink_info_version_fixed_put(struct devlink_info_req *req,
const char *version_name,
const char *version_value);
int devlink_info_version_stored_put(struct devlink_info_req *req,
const char *version_name,
const char *version_value);
+int devlink_info_version_stored_put_ext(struct devlink_info_req *req,
+ const char *version_name,
+ const char *version_value,
+ enum devlink_info_version_type version_type);
int devlink_info_version_running_put(struct devlink_info_req *req,
const char *version_name,
const char *version_value);
+int devlink_info_version_running_put_ext(struct devlink_info_req *req,
+ const char *version_name,
+ const char *version_value,
+ enum devlink_info_version_type version_type);
int devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg);
int devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg);
diff --git a/include/net/dn.h b/include/net/dn.h
deleted file mode 100644
index ba9655b0098a..000000000000
--- a/include/net/dn.h
+++ /dev/null
@@ -1,231 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NET_DN_H
-#define _NET_DN_H
-
-#include <linux/dn.h>
-#include <net/sock.h>
-#include <net/flow.h>
-#include <asm/byteorder.h>
-#include <asm/unaligned.h>
-
-struct dn_scp /* Session Control Port */
-{
- unsigned char state;
-#define DN_O 1 /* Open */
-#define DN_CR 2 /* Connect Receive */
-#define DN_DR 3 /* Disconnect Reject */
-#define DN_DRC 4 /* Discon. Rej. Complete*/
-#define DN_CC 5 /* Connect Confirm */
-#define DN_CI 6 /* Connect Initiate */
-#define DN_NR 7 /* No resources */
-#define DN_NC 8 /* No communication */
-#define DN_CD 9 /* Connect Delivery */
-#define DN_RJ 10 /* Rejected */
-#define DN_RUN 11 /* Running */
-#define DN_DI 12 /* Disconnect Initiate */
-#define DN_DIC 13 /* Disconnect Complete */
-#define DN_DN 14 /* Disconnect Notificat */
-#define DN_CL 15 /* Closed */
-#define DN_CN 16 /* Closed Notification */
-
- __le16 addrloc;
- __le16 addrrem;
- __u16 numdat;
- __u16 numoth;
- __u16 numoth_rcv;
- __u16 numdat_rcv;
- __u16 ackxmt_dat;
- __u16 ackxmt_oth;
- __u16 ackrcv_dat;
- __u16 ackrcv_oth;
- __u8 flowrem_sw;
- __u8 flowloc_sw;
-#define DN_SEND 2
-#define DN_DONTSEND 1
-#define DN_NOCHANGE 0
- __u16 flowrem_dat;
- __u16 flowrem_oth;
- __u16 flowloc_dat;
- __u16 flowloc_oth;
- __u8 services_rem;
- __u8 services_loc;
- __u8 info_rem;
- __u8 info_loc;
-
- __u16 segsize_rem;
- __u16 segsize_loc;
-
- __u8 nonagle;
- __u8 multi_ireq;
- __u8 accept_mode;
- unsigned long seg_total; /* Running total of current segment */
-
- struct optdata_dn conndata_in;
- struct optdata_dn conndata_out;
- struct optdata_dn discdata_in;
- struct optdata_dn discdata_out;
- struct accessdata_dn accessdata;
-
- struct sockaddr_dn addr; /* Local address */
- struct sockaddr_dn peer; /* Remote address */
-
- /*
- * In this case the RTT estimation is not specified in the
- * docs, nor is any back off algorithm. Here we follow well
- * known tcp algorithms with a few small variations.
- *
- * snd_window: Max number of packets we send before we wait for
- * an ack to come back. This will become part of a
- * more complicated scheme when we support flow
- * control.
- *
- * nsp_srtt: Round-Trip-Time (x8) in jiffies. This is a rolling
- * average.
- * nsp_rttvar: Round-Trip-Time-Varience (x4) in jiffies. This is the
- * varience of the smoothed average (but calculated in
- * a simpler way than for normal statistical varience
- * calculations).
- *
- * nsp_rxtshift: Backoff counter. Value is zero normally, each time
- * a packet is lost is increases by one until an ack
- * is received. Its used to index an array of backoff
- * multipliers.
- */
-#define NSP_MIN_WINDOW 1
-#define NSP_MAX_WINDOW (0x07fe)
- unsigned long max_window;
- unsigned long snd_window;
-#define NSP_INITIAL_SRTT (HZ)
- unsigned long nsp_srtt;
-#define NSP_INITIAL_RTTVAR (HZ*3)
- unsigned long nsp_rttvar;
-#define NSP_MAXRXTSHIFT 12
- unsigned long nsp_rxtshift;
-
- /*
- * Output queues, one for data, one for otherdata/linkservice
- */
- struct sk_buff_head data_xmit_queue;
- struct sk_buff_head other_xmit_queue;
-
- /*
- * Input queue for other data
- */
- struct sk_buff_head other_receive_queue;
- int other_report;
-
- /*
- * Stuff to do with the slow timer
- */
- unsigned long stamp; /* time of last transmit */
- unsigned long persist;
- int (*persist_fxn)(struct sock *sk);
- unsigned long keepalive;
- void (*keepalive_fxn)(struct sock *sk);
-
-};
-
-static inline struct dn_scp *DN_SK(struct sock *sk)
-{
- return (struct dn_scp *)(sk + 1);
-}
-
-/*
- * src,dst : Source and Destination DECnet addresses
- * hops : Number of hops through the network
- * dst_port, src_port : NSP port numbers
- * services, info : Useful data extracted from conninit messages
- * rt_flags : Routing flags byte
- * nsp_flags : NSP layer flags byte
- * segsize : Size of segment
- * segnum : Number, for data, otherdata and linkservice
- * xmit_count : Number of times we've transmitted this skb
- * stamp : Time stamp of most recent transmission, used in RTT calculations
- * iif: Input interface number
- *
- * As a general policy, this structure keeps all addresses in network
- * byte order, and all else in host byte order. Thus dst, src, dst_port
- * and src_port are in network order. All else is in host order.
- *
- */
-#define DN_SKB_CB(skb) ((struct dn_skb_cb *)(skb)->cb)
-struct dn_skb_cb {
- __le16 dst;
- __le16 src;
- __u16 hops;
- __le16 dst_port;
- __le16 src_port;
- __u8 services;
- __u8 info;
- __u8 rt_flags;
- __u8 nsp_flags;
- __u16 segsize;
- __u16 segnum;
- __u16 xmit_count;
- unsigned long stamp;
- int iif;
-};
-
-static inline __le16 dn_eth2dn(const unsigned char *ethaddr)
-{
- return get_unaligned((__le16 *)(ethaddr + 4));
-}
-
-static inline __le16 dn_saddr2dn(struct sockaddr_dn *saddr)
-{
- return *(__le16 *)saddr->sdn_nodeaddr;
-}
-
-static inline void dn_dn2eth(unsigned char *ethaddr, __le16 addr)
-{
- __u16 a = le16_to_cpu(addr);
- ethaddr[0] = 0xAA;
- ethaddr[1] = 0x00;
- ethaddr[2] = 0x04;
- ethaddr[3] = 0x00;
- ethaddr[4] = (__u8)(a & 0xff);
- ethaddr[5] = (__u8)(a >> 8);
-}
-
-static inline void dn_sk_ports_copy(struct flowidn *fld, struct dn_scp *scp)
-{
- fld->fld_sport = scp->addrloc;
- fld->fld_dport = scp->addrrem;
-}
-
-unsigned int dn_mss_from_pmtu(struct net_device *dev, int mtu);
-void dn_register_sysctl(void);
-void dn_unregister_sysctl(void);
-
-#define DN_MENUVER_ACC 0x01
-#define DN_MENUVER_USR 0x02
-#define DN_MENUVER_PRX 0x04
-#define DN_MENUVER_UIC 0x08
-
-struct sock *dn_sklist_find_listener(struct sockaddr_dn *addr);
-struct sock *dn_find_by_skb(struct sk_buff *skb);
-#define DN_ASCBUF_LEN 9
-char *dn_addr2asc(__u16, char *);
-int dn_destroy_timer(struct sock *sk);
-
-int dn_sockaddr2username(struct sockaddr_dn *addr, unsigned char *buf,
- unsigned char type);
-int dn_username2sockaddr(unsigned char *data, int len, struct sockaddr_dn *addr,
- unsigned char *type);
-
-void dn_start_slow_timer(struct sock *sk);
-void dn_stop_slow_timer(struct sock *sk);
-
-extern __le16 decnet_address;
-extern int decnet_debug_level;
-extern int decnet_time_wait;
-extern int decnet_dn_count;
-extern int decnet_di_count;
-extern int decnet_dr_count;
-extern int decnet_no_fc_max_cwnd;
-
-extern long sysctl_decnet_mem[3];
-extern int sysctl_decnet_wmem[3];
-extern int sysctl_decnet_rmem[3];
-
-#endif /* _NET_DN_H */
diff --git a/include/net/dn_dev.h b/include/net/dn_dev.h
deleted file mode 100644
index bec303ea8367..000000000000
--- a/include/net/dn_dev.h
+++ /dev/null
@@ -1,200 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NET_DN_DEV_H
-#define _NET_DN_DEV_H
-
-#include <linux/netdevice.h>
-
-struct dn_dev;
-
-struct dn_ifaddr {
- struct dn_ifaddr __rcu *ifa_next;
- struct dn_dev *ifa_dev;
- __le16 ifa_local;
- __le16 ifa_address;
- __u32 ifa_flags;
- __u8 ifa_scope;
- char ifa_label[IFNAMSIZ];
- struct rcu_head rcu;
-};
-
-#define DN_DEV_S_RU 0 /* Run - working normally */
-#define DN_DEV_S_CR 1 /* Circuit Rejected */
-#define DN_DEV_S_DS 2 /* Data Link Start */
-#define DN_DEV_S_RI 3 /* Routing Layer Initialize */
-#define DN_DEV_S_RV 4 /* Routing Layer Verify */
-#define DN_DEV_S_RC 5 /* Routing Layer Complete */
-#define DN_DEV_S_OF 6 /* Off */
-#define DN_DEV_S_HA 7 /* Halt */
-
-
-/*
- * The dn_dev_parms structure contains the set of parameters
- * for each device (hence inclusion in the dn_dev structure)
- * and an array is used to store the default types of supported
- * device (in dn_dev.c).
- *
- * The type field matches the ARPHRD_ constants and is used in
- * searching the list for supported devices when new devices
- * come up.
- *
- * The mode field is used to find out if a device is broadcast,
- * multipoint, or pointopoint. Please note that DECnet thinks
- * different ways about devices to the rest of the kernel
- * so the normal IFF_xxx flags are invalid here. For devices
- * which can be any combination of the previously mentioned
- * attributes, you can set this on a per device basis by
- * installing an up() routine.
- *
- * The device state field, defines the initial state in which the
- * device will come up. In the dn_dev structure, it is the actual
- * state.
- *
- * Things have changed here. I've killed timer1 since it's a user space
- * issue for a user space routing deamon to sort out. The kernel does
- * not need to be bothered with it.
- *
- * Timers:
- * t2 - Rate limit timer, min time between routing and hello messages
- * t3 - Hello timer, send hello messages when it expires
- *
- * Callbacks:
- * up() - Called to initialize device, return value can veto use of
- * device with DECnet.
- * down() - Called to turn device off when it goes down
- * timer3() - Called once for each ifaddr when timer 3 goes off
- *
- * sysctl - Hook for sysctl things
- *
- */
-struct dn_dev_parms {
- int type; /* ARPHRD_xxx */
- int mode; /* Broadcast, Unicast, Mulitpoint */
-#define DN_DEV_BCAST 1
-#define DN_DEV_UCAST 2
-#define DN_DEV_MPOINT 4
- int state; /* Initial state */
- int forwarding; /* 0=EndNode, 1=L1Router, 2=L2Router */
- unsigned long t2; /* Default value of t2 */
- unsigned long t3; /* Default value of t3 */
- int priority; /* Priority to be a router */
- char *name; /* Name for sysctl */
- int (*up)(struct net_device *);
- void (*down)(struct net_device *);
- void (*timer3)(struct net_device *, struct dn_ifaddr *ifa);
- void *sysctl;
-};
-
-
-struct dn_dev {
- struct dn_ifaddr __rcu *ifa_list;
- struct net_device *dev;
- struct dn_dev_parms parms;
- char use_long;
- struct timer_list timer;
- unsigned long t3;
- struct neigh_parms *neigh_parms;
- __u8 addr[ETH_ALEN];
- struct neighbour *router; /* Default router on circuit */
- struct neighbour *peer; /* Peer on pointopoint links */
- unsigned long uptime; /* Time device went up in jiffies */
-};
-
-struct dn_short_packet {
- __u8 msgflg;
- __le16 dstnode;
- __le16 srcnode;
- __u8 forward;
-} __packed;
-
-struct dn_long_packet {
- __u8 msgflg;
- __u8 d_area;
- __u8 d_subarea;
- __u8 d_id[6];
- __u8 s_area;
- __u8 s_subarea;
- __u8 s_id[6];
- __u8 nl2;
- __u8 visit_ct;
- __u8 s_class;
- __u8 pt;
-} __packed;
-
-/*------------------------- DRP - Routing messages ---------------------*/
-
-struct endnode_hello_message {
- __u8 msgflg;
- __u8 tiver[3];
- __u8 id[6];
- __u8 iinfo;
- __le16 blksize;
- __u8 area;
- __u8 seed[8];
- __u8 neighbor[6];
- __le16 timer;
- __u8 mpd;
- __u8 datalen;
- __u8 data[2];
-} __packed;
-
-struct rtnode_hello_message {
- __u8 msgflg;
- __u8 tiver[3];
- __u8 id[6];
- __u8 iinfo;
- __le16 blksize;
- __u8 priority;
- __u8 area;
- __le16 timer;
- __u8 mpd;
-} __packed;
-
-
-void dn_dev_init(void);
-void dn_dev_cleanup(void);
-
-int dn_dev_ioctl(unsigned int cmd, void __user *arg);
-
-void dn_dev_devices_off(void);
-void dn_dev_devices_on(void);
-
-void dn_dev_init_pkt(struct sk_buff *skb);
-void dn_dev_veri_pkt(struct sk_buff *skb);
-void dn_dev_hello(struct sk_buff *skb);
-
-void dn_dev_up(struct net_device *);
-void dn_dev_down(struct net_device *);
-
-int dn_dev_set_default(struct net_device *dev, int force);
-struct net_device *dn_dev_get_default(void);
-int dn_dev_bind_default(__le16 *addr);
-
-int register_dnaddr_notifier(struct notifier_block *nb);
-int unregister_dnaddr_notifier(struct notifier_block *nb);
-
-static inline int dn_dev_islocal(struct net_device *dev, __le16 addr)
-{
- struct dn_dev *dn_db;
- struct dn_ifaddr *ifa;
- int res = 0;
-
- rcu_read_lock();
- dn_db = rcu_dereference(dev->dn_ptr);
- if (dn_db == NULL) {
- printk(KERN_DEBUG "dn_dev_islocal: Called for non DECnet device\n");
- goto out;
- }
-
- for (ifa = rcu_dereference(dn_db->ifa_list);
- ifa != NULL;
- ifa = rcu_dereference(ifa->ifa_next))
- if ((addr ^ ifa->ifa_local) == 0) {
- res = 1;
- break;
- }
-out:
- rcu_read_unlock();
- return res;
-}
-
-#endif /* _NET_DN_DEV_H */
diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h
deleted file mode 100644
index 1929a3cd5ebe..000000000000
--- a/include/net/dn_fib.h
+++ /dev/null
@@ -1,169 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NET_DN_FIB_H
-#define _NET_DN_FIB_H
-
-#include <linux/netlink.h>
-#include <linux/refcount.h>
-#include <linux/rtnetlink.h>
-#include <net/fib_rules.h>
-
-extern const struct nla_policy rtm_dn_policy[];
-
-struct dn_fib_res {
- struct fib_rule *r;
- struct dn_fib_info *fi;
- unsigned char prefixlen;
- unsigned char nh_sel;
- unsigned char type;
- unsigned char scope;
-};
-
-struct dn_fib_nh {
- struct net_device *nh_dev;
- unsigned int nh_flags;
- unsigned char nh_scope;
- int nh_weight;
- int nh_power;
- int nh_oif;
- __le16 nh_gw;
-};
-
-struct dn_fib_info {
- struct dn_fib_info *fib_next;
- struct dn_fib_info *fib_prev;
- refcount_t fib_treeref;
- refcount_t fib_clntref;
- int fib_dead;
- unsigned int fib_flags;
- int fib_protocol;
- __le16 fib_prefsrc;
- __u32 fib_priority;
- __u32 fib_metrics[RTAX_MAX];
- int fib_nhs;
- int fib_power;
- struct dn_fib_nh fib_nh[0];
-#define dn_fib_dev fib_nh[0].nh_dev
-};
-
-
-#define DN_FIB_RES_RESET(res) ((res).nh_sel = 0)
-#define DN_FIB_RES_NH(res) ((res).fi->fib_nh[(res).nh_sel])
-
-#define DN_FIB_RES_PREFSRC(res) ((res).fi->fib_prefsrc ? : __dn_fib_res_prefsrc(&res))
-#define DN_FIB_RES_GW(res) (DN_FIB_RES_NH(res).nh_gw)
-#define DN_FIB_RES_DEV(res) (DN_FIB_RES_NH(res).nh_dev)
-#define DN_FIB_RES_OIF(res) (DN_FIB_RES_NH(res).nh_oif)
-
-typedef struct {
- __le16 datum;
-} dn_fib_key_t;
-
-typedef struct {
- __le16 datum;
-} dn_fib_hash_t;
-
-typedef struct {
- __u16 datum;
-} dn_fib_idx_t;
-
-struct dn_fib_node {
- struct dn_fib_node *fn_next;
- struct dn_fib_info *fn_info;
-#define DN_FIB_INFO(f) ((f)->fn_info)
- dn_fib_key_t fn_key;
- u8 fn_type;
- u8 fn_scope;
- u8 fn_state;
-};
-
-
-struct dn_fib_table {
- struct hlist_node hlist;
- u32 n;
-
- int (*insert)(struct dn_fib_table *t, struct rtmsg *r,
- struct nlattr *attrs[], struct nlmsghdr *n,
- struct netlink_skb_parms *req);
- int (*delete)(struct dn_fib_table *t, struct rtmsg *r,
- struct nlattr *attrs[], struct nlmsghdr *n,
- struct netlink_skb_parms *req);
- int (*lookup)(struct dn_fib_table *t, const struct flowidn *fld,
- struct dn_fib_res *res);
- int (*flush)(struct dn_fib_table *t);
- int (*dump)(struct dn_fib_table *t, struct sk_buff *skb, struct netlink_callback *cb);
-
- unsigned char data[];
-};
-
-#ifdef CONFIG_DECNET_ROUTER
-/*
- * dn_fib.c
- */
-void dn_fib_init(void);
-void dn_fib_cleanup(void);
-
-int dn_fib_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
-struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r,
- struct nlattr *attrs[],
- const struct nlmsghdr *nlh, int *errp);
-int dn_fib_semantic_match(int type, struct dn_fib_info *fi,
- const struct flowidn *fld, struct dn_fib_res *res);
-void dn_fib_release_info(struct dn_fib_info *fi);
-void dn_fib_flush(void);
-void dn_fib_select_multipath(const struct flowidn *fld, struct dn_fib_res *res);
-
-/*
- * dn_tables.c
- */
-struct dn_fib_table *dn_fib_get_table(u32 n, int creat);
-struct dn_fib_table *dn_fib_empty_table(void);
-void dn_fib_table_init(void);
-void dn_fib_table_cleanup(void);
-
-/*
- * dn_rules.c
- */
-void dn_fib_rules_init(void);
-void dn_fib_rules_cleanup(void);
-unsigned int dnet_addr_type(__le16 addr);
-int dn_fib_lookup(struct flowidn *fld, struct dn_fib_res *res);
-
-int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb);
-
-void dn_fib_free_info(struct dn_fib_info *fi);
-
-static inline void dn_fib_info_put(struct dn_fib_info *fi)
-{
- if (refcount_dec_and_test(&fi->fib_clntref))
- dn_fib_free_info(fi);
-}
-
-static inline void dn_fib_res_put(struct dn_fib_res *res)
-{
- if (res->fi)
- dn_fib_info_put(res->fi);
- if (res->r)
- fib_rule_put(res->r);
-}
-
-#else /* Endnode */
-
-#define dn_fib_init() do { } while(0)
-#define dn_fib_cleanup() do { } while(0)
-
-#define dn_fib_lookup(fl, res) (-ESRCH)
-#define dn_fib_info_put(fi) do { } while(0)
-#define dn_fib_select_multipath(fl, res) do { } while(0)
-#define dn_fib_rules_policy(saddr,res,flags) (0)
-#define dn_fib_res_put(res) do { } while(0)
-
-#endif /* CONFIG_DECNET_ROUTER */
-
-static inline __le16 dnet_make_mask(int n)
-{
- if (n)
- return cpu_to_le16(~((1 << (16 - n)) - 1));
- return cpu_to_le16(0);
-}
-
-#endif /* _NET_DN_FIB_H */
diff --git a/include/net/dn_neigh.h b/include/net/dn_neigh.h
deleted file mode 100644
index 1f7df98bfc33..000000000000
--- a/include/net/dn_neigh.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NET_DN_NEIGH_H
-#define _NET_DN_NEIGH_H
-
-#include <net/neighbour.h>
-
-/*
- * The position of the first two fields of
- * this structure are critical - SJW
- */
-struct dn_neigh {
- struct neighbour n;
- __le16 addr;
- unsigned long flags;
-#define DN_NDFLAG_R1 0x0001 /* Router L1 */
-#define DN_NDFLAG_R2 0x0002 /* Router L2 */
-#define DN_NDFLAG_P3 0x0004 /* Phase III Node */
- unsigned long blksize;
- __u8 priority;
-};
-
-void dn_neigh_init(void);
-void dn_neigh_cleanup(void);
-int dn_neigh_router_hello(struct net *net, struct sock *sk, struct sk_buff *skb);
-int dn_neigh_endnode_hello(struct net *net, struct sock *sk, struct sk_buff *skb);
-void dn_neigh_pointopoint_hello(struct sk_buff *skb);
-int dn_neigh_elist(struct net_device *dev, unsigned char *ptr, int n);
-int dn_to_neigh_output(struct net *net, struct sock *sk, struct sk_buff *skb);
-
-extern struct neigh_table dn_neigh_table;
-
-#endif /* _NET_DN_NEIGH_H */
diff --git a/include/net/dn_nsp.h b/include/net/dn_nsp.h
deleted file mode 100644
index a4a18fee0b7c..000000000000
--- a/include/net/dn_nsp.h
+++ /dev/null
@@ -1,201 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifndef _NET_DN_NSP_H
-#define _NET_DN_NSP_H
-/******************************************************************************
- (c) 1995-1998 E.M. Serrat emserrat@geocities.com
-
-*******************************************************************************/
-/* dn_nsp.c functions prototyping */
-#include <linux/atomic.h>
-#include <linux/types.h>
-#include <net/sock.h>
-
-struct sk_buff;
-struct sk_buff_head;
-
-void dn_nsp_send_data_ack(struct sock *sk);
-void dn_nsp_send_oth_ack(struct sock *sk);
-void dn_send_conn_ack(struct sock *sk);
-void dn_send_conn_conf(struct sock *sk, gfp_t gfp);
-void dn_nsp_send_disc(struct sock *sk, unsigned char type,
- unsigned short reason, gfp_t gfp);
-void dn_nsp_return_disc(struct sk_buff *skb, unsigned char type,
- unsigned short reason);
-void dn_nsp_send_link(struct sock *sk, unsigned char lsflags, char fcval);
-void dn_nsp_send_conninit(struct sock *sk, unsigned char flags);
-
-void dn_nsp_output(struct sock *sk);
-int dn_nsp_check_xmit_queue(struct sock *sk, struct sk_buff *skb,
- struct sk_buff_head *q, unsigned short acknum);
-void dn_nsp_queue_xmit(struct sock *sk, struct sk_buff *skb, gfp_t gfp,
- int oob);
-unsigned long dn_nsp_persist(struct sock *sk);
-int dn_nsp_xmit_timeout(struct sock *sk);
-
-int dn_nsp_rx(struct sk_buff *);
-int dn_nsp_backlog_rcv(struct sock *sk, struct sk_buff *skb);
-
-struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri);
-struct sk_buff *dn_alloc_send_skb(struct sock *sk, size_t *size, int noblock,
- long timeo, int *err);
-
-#define NSP_REASON_OK 0 /* No error */
-#define NSP_REASON_NR 1 /* No resources */
-#define NSP_REASON_UN 2 /* Unrecognised node name */
-#define NSP_REASON_SD 3 /* Node shutting down */
-#define NSP_REASON_ID 4 /* Invalid destination end user */
-#define NSP_REASON_ER 5 /* End user lacks resources */
-#define NSP_REASON_OB 6 /* Object too busy */
-#define NSP_REASON_US 7 /* Unspecified error */
-#define NSP_REASON_TP 8 /* Third-Party abort */
-#define NSP_REASON_EA 9 /* End user has aborted the link */
-#define NSP_REASON_IF 10 /* Invalid node name format */
-#define NSP_REASON_LS 11 /* Local node shutdown */
-#define NSP_REASON_LL 32 /* Node lacks logical-link resources */
-#define NSP_REASON_LE 33 /* End user lacks logical-link resources */
-#define NSP_REASON_UR 34 /* Unacceptable RQSTRID or PASSWORD field */
-#define NSP_REASON_UA 36 /* Unacceptable ACCOUNT field */
-#define NSP_REASON_TM 38 /* End user timed out logical link */
-#define NSP_REASON_NU 39 /* Node unreachable */
-#define NSP_REASON_NL 41 /* No-link message */
-#define NSP_REASON_DC 42 /* Disconnect confirm */
-#define NSP_REASON_IO 43 /* Image data field overflow */
-
-#define NSP_DISCINIT 0x38
-#define NSP_DISCCONF 0x48
-
-/*------------------------- NSP - messages ------------------------------*/
-/* Data Messages */
-/*---------------*/
-
-/* Data Messages (data segment/interrupt/link service) */
-
-struct nsp_data_seg_msg {
- __u8 msgflg;
- __le16 dstaddr;
- __le16 srcaddr;
-} __packed;
-
-struct nsp_data_opt_msg {
- __le16 acknum;
- __le16 segnum;
- __le16 lsflgs;
-} __packed;
-
-struct nsp_data_opt_msg1 {
- __le16 acknum;
- __le16 segnum;
-} __packed;
-
-
-/* Acknowledgment Message (data/other data) */
-struct nsp_data_ack_msg {
- __u8 msgflg;
- __le16 dstaddr;
- __le16 srcaddr;
- __le16 acknum;
-} __packed;
-
-/* Connect Acknowledgment Message */
-struct nsp_conn_ack_msg {
- __u8 msgflg;
- __le16 dstaddr;
-} __packed;
-
-
-/* Connect Initiate/Retransmit Initiate/Connect Confirm */
-struct nsp_conn_init_msg {
- __u8 msgflg;
-#define NSP_CI 0x18 /* Connect Initiate */
-#define NSP_RCI 0x68 /* Retrans. Conn Init */
- __le16 dstaddr;
- __le16 srcaddr;
- __u8 services;
-#define NSP_FC_NONE 0x00 /* Flow Control None */
-#define NSP_FC_SRC 0x04 /* Seg Req. Count */
-#define NSP_FC_SCMC 0x08 /* Sess. Control Mess */
-#define NSP_FC_MASK 0x0c /* FC type mask */
- __u8 info;
- __le16 segsize;
-} __packed;
-
-/* Disconnect Initiate/Disconnect Confirm */
-struct nsp_disconn_init_msg {
- __u8 msgflg;
- __le16 dstaddr;
- __le16 srcaddr;
- __le16 reason;
-} __packed;
-
-
-
-struct srcobj_fmt {
- __u8 format;
- __u8 task;
- __le16 grpcode;
- __le16 usrcode;
- __u8 dlen;
-} __packed;
-
-/*
- * A collection of functions for manipulating the sequence
- * numbers used in NSP. Similar in operation to the functions
- * of the same name in TCP.
- */
-static __inline__ int dn_before(__u16 seq1, __u16 seq2)
-{
- seq1 &= 0x0fff;
- seq2 &= 0x0fff;
-
- return (int)((seq1 - seq2) & 0x0fff) > 2048;
-}
-
-
-static __inline__ int dn_after(__u16 seq1, __u16 seq2)
-{
- seq1 &= 0x0fff;
- seq2 &= 0x0fff;
-
- return (int)((seq2 - seq1) & 0x0fff) > 2048;
-}
-
-static __inline__ int dn_equal(__u16 seq1, __u16 seq2)
-{
- return ((seq1 ^ seq2) & 0x0fff) == 0;
-}
-
-static __inline__ int dn_before_or_equal(__u16 seq1, __u16 seq2)
-{
- return (dn_before(seq1, seq2) || dn_equal(seq1, seq2));
-}
-
-static __inline__ void seq_add(__u16 *seq, __u16 off)
-{
- (*seq) += off;
- (*seq) &= 0x0fff;
-}
-
-static __inline__ int seq_next(__u16 seq1, __u16 seq2)
-{
- return dn_equal(seq1 + 1, seq2);
-}
-
-/*
- * Can we delay the ack ?
- */
-static __inline__ int sendack(__u16 seq)
-{
- return (int)((seq & 0x1000) ? 0 : 1);
-}
-
-/*
- * Is socket congested ?
- */
-static __inline__ int dn_congested(struct sock *sk)
-{
- return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
-}
-
-#define DN_MAX_NSP_DATA_HEADER (11)
-
-#endif /* _NET_DN_NSP_H */
diff --git a/include/net/dn_route.h b/include/net/dn_route.h
deleted file mode 100644
index 88c0300236cc..000000000000
--- a/include/net/dn_route.h
+++ /dev/null
@@ -1,118 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifndef _NET_DN_ROUTE_H
-#define _NET_DN_ROUTE_H
-
-/******************************************************************************
- (c) 1995-1998 E.M. Serrat emserrat@geocities.com
-
-*******************************************************************************/
-
-#include <linux/types.h>
-#include <net/dst.h>
-
-struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri);
-int dn_route_output_sock(struct dst_entry __rcu **pprt, struct flowidn *,
- struct sock *sk, int flags);
-int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb);
-void dn_rt_cache_flush(int delay);
-int dn_route_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev);
-
-/* Masks for flags field */
-#define DN_RT_F_PID 0x07 /* Mask for packet type */
-#define DN_RT_F_PF 0x80 /* Padding Follows */
-#define DN_RT_F_VER 0x40 /* Version =0 discard packet if ==1 */
-#define DN_RT_F_IE 0x20 /* Intra Ethernet, Reserved in short pkt */
-#define DN_RT_F_RTS 0x10 /* Packet is being returned to sender */
-#define DN_RT_F_RQR 0x08 /* Return packet to sender upon non-delivery */
-
-/* Mask for types of routing packets */
-#define DN_RT_PKT_MSK 0x06
-/* Types of routing packets */
-#define DN_RT_PKT_SHORT 0x02 /* Short routing packet */
-#define DN_RT_PKT_LONG 0x06 /* Long routing packet */
-
-/* Mask for control/routing selection */
-#define DN_RT_PKT_CNTL 0x01 /* Set to 1 if a control packet */
-/* Types of control packets */
-#define DN_RT_CNTL_MSK 0x0f /* Mask for control packets */
-#define DN_RT_PKT_INIT 0x01 /* Initialisation packet */
-#define DN_RT_PKT_VERI 0x03 /* Verification Message */
-#define DN_RT_PKT_HELO 0x05 /* Hello and Test Message */
-#define DN_RT_PKT_L1RT 0x07 /* Level 1 Routing Message */
-#define DN_RT_PKT_L2RT 0x09 /* Level 2 Routing Message */
-#define DN_RT_PKT_ERTH 0x0b /* Ethernet Router Hello */
-#define DN_RT_PKT_EEDH 0x0d /* Ethernet EndNode Hello */
-
-/* Values for info field in hello message */
-#define DN_RT_INFO_TYPE 0x03 /* Type mask */
-#define DN_RT_INFO_L1RT 0x02 /* L1 Router */
-#define DN_RT_INFO_L2RT 0x01 /* L2 Router */
-#define DN_RT_INFO_ENDN 0x03 /* EndNode */
-#define DN_RT_INFO_VERI 0x04 /* Verification Reqd. */
-#define DN_RT_INFO_RJCT 0x08 /* Reject Flag, Reserved */
-#define DN_RT_INFO_VFLD 0x10 /* Verification Failed, Reserved */
-#define DN_RT_INFO_NOML 0x20 /* No Multicast traffic accepted */
-#define DN_RT_INFO_BLKR 0x40 /* Blocking Requested */
-
-/*
- * The fl structure is what we used to look up the route.
- * The rt_saddr & rt_daddr entries are the same as key.saddr & key.daddr
- * except for local input routes, where the rt_saddr = fl.fld_dst and
- * rt_daddr = fl.fld_src to allow the route to be used for returning
- * packets to the originating host.
- */
-struct dn_route {
- struct dst_entry dst;
- struct dn_route __rcu *dn_next;
-
- struct neighbour *n;
-
- struct flowidn fld;
-
- __le16 rt_saddr;
- __le16 rt_daddr;
- __le16 rt_gateway;
- __le16 rt_local_src; /* Source used for forwarding packets */
- __le16 rt_src_map;
- __le16 rt_dst_map;
-
- unsigned int rt_flags;
- unsigned int rt_type;
-};
-
-static inline bool dn_is_input_route(struct dn_route *rt)
-{
- return rt->fld.flowidn_iif != 0;
-}
-
-static inline bool dn_is_output_route(struct dn_route *rt)
-{
- return rt->fld.flowidn_iif == 0;
-}
-
-void dn_route_init(void);
-void dn_route_cleanup(void);
-
-#include <net/sock.h>
-#include <linux/if_arp.h>
-
-static inline void dn_rt_send(struct sk_buff *skb)
-{
- dev_queue_xmit(skb);
-}
-
-static inline void dn_rt_finish_output(struct sk_buff *skb, char *dst, char *src)
-{
- struct net_device *dev = skb->dev;
-
- if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK))
- dst = NULL;
-
- if (dev_hard_header(skb, dev, ETH_P_DNA_RT, dst, src, skb->len) >= 0)
- dn_rt_send(skb);
- else
- kfree_skb(skb);
-}
-
-#endif /* _NET_DN_ROUTE_H */
diff --git a/include/net/dsa.h b/include/net/dsa.h
index b902b31bebce..f2ce12860546 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -559,6 +559,10 @@ static inline bool dsa_is_user_port(struct dsa_switch *ds, int p)
list_for_each_entry((_dp), &(_dst)->ports, list) \
if (dsa_port_is_user((_dp)))
+#define dsa_tree_for_each_cpu_port(_dp, _dst) \
+ list_for_each_entry((_dp), &(_dst)->ports, list) \
+ if (dsa_port_is_cpu((_dp)))
+
#define dsa_switch_for_each_port(_dp, _ds) \
list_for_each_entry((_dp), &(_ds)->dst->ports, list) \
if ((_dp)->ds == (_ds))
diff --git a/include/net/gro.h b/include/net/gro.h
index 24003dea8fa4..a4fab706240d 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -160,6 +160,17 @@ static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen,
return skb->data + offset;
}
+static inline void *skb_gro_header(struct sk_buff *skb,
+ unsigned int hlen, unsigned int offset)
+{
+ void *ptr;
+
+ ptr = skb_gro_header_fast(skb, offset);
+ if (skb_gro_header_hard(skb, hlen))
+ ptr = skb_gro_header_slow(skb, hlen, offset);
+ return ptr;
+}
+
static inline void *skb_gro_network_header(struct sk_buff *skb)
{
return (NAPI_GRO_CB(skb)->frag0 ?: skb->data) +
@@ -301,12 +312,9 @@ static inline void *skb_gro_remcsum_process(struct sk_buff *skb, void *ptr,
return ptr;
}
- ptr = skb_gro_header_fast(skb, off);
- if (skb_gro_header_hard(skb, off + plen)) {
- ptr = skb_gro_header_slow(skb, off + plen, off);
- if (!ptr)
- return NULL;
- }
+ ptr = skb_gro_header(skb, off + plen, off);
+ if (!ptr)
+ return NULL;
delta = remcsum_adjust(ptr + hdrlen, NAPI_GRO_CB(skb)->csum,
start, offset);
@@ -329,12 +337,9 @@ static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb,
if (!grc->delta)
return;
- ptr = skb_gro_header_fast(skb, grc->offset);
- if (skb_gro_header_hard(skb, grc->offset + sizeof(u16))) {
- ptr = skb_gro_header_slow(skb, plen, grc->offset);
- if (!ptr)
- return;
- }
+ ptr = skb_gro_header(skb, plen, grc->offset);
+ if (!ptr)
+ return;
remcsum_unadjust((__sum16 *)ptr, grc->delta);
}
@@ -405,9 +410,7 @@ static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
off = skb_gro_offset(skb);
hlen = off + sizeof(*uh);
- uh = skb_gro_header_fast(skb, off);
- if (skb_gro_header_hard(skb, hlen))
- uh = skb_gro_header_slow(skb, hlen, off);
+ uh = skb_gro_header(skb, hlen, off);
return uh;
}
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index ee88f0f1350f..c2b15f7e5516 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -25,6 +25,7 @@
#undef INET_CSK_CLEAR_TIMERS
struct inet_bind_bucket;
+struct inet_bind2_bucket;
struct tcp_congestion_ops;
/*
@@ -57,6 +58,7 @@ struct inet_connection_sock_af_ops {
*
* @icsk_accept_queue: FIFO of established children
* @icsk_bind_hash: Bind node
+ * @icsk_bind2_hash: Bind node in the bhash2 table
* @icsk_timeout: Timeout
* @icsk_retransmit_timer: Resend (no ack)
* @icsk_rto: Retransmit timeout
@@ -83,6 +85,7 @@ struct inet_connection_sock {
struct inet_sock icsk_inet;
struct request_sock_queue icsk_accept_queue;
struct inet_bind_bucket *icsk_bind_hash;
+ struct inet_bind2_bucket *icsk_bind2_hash;
unsigned long icsk_timeout;
struct timer_list icsk_retransmit_timer;
struct timer_list icsk_delack_timer;
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index e9cf2157ed8a..44a419b9e3d5 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -23,6 +23,7 @@
#include <net/inet_connection_sock.h>
#include <net/inet_sock.h>
+#include <net/ip.h>
#include <net/sock.h>
#include <net/route.h>
#include <net/tcp_states.h>
@@ -90,7 +91,28 @@ struct inet_bind_bucket {
struct hlist_head owners;
};
-static inline struct net *ib_net(struct inet_bind_bucket *ib)
+struct inet_bind2_bucket {
+ possible_net_t ib_net;
+ int l3mdev;
+ unsigned short port;
+ union {
+#if IS_ENABLED(CONFIG_IPV6)
+ struct in6_addr v6_rcv_saddr;
+#endif
+ __be32 rcv_saddr;
+ };
+ /* Node in the bhash2 inet_bind_hashbucket chain */
+ struct hlist_node node;
+ /* List of sockets hashed to this bucket */
+ struct hlist_head owners;
+};
+
+static inline struct net *ib_net(const struct inet_bind_bucket *ib)
+{
+ return read_pnet(&ib->ib_net);
+}
+
+static inline struct net *ib2_net(const struct inet_bind2_bucket *ib)
{
return read_pnet(&ib->ib_net);
}
@@ -133,7 +155,14 @@ struct inet_hashinfo {
* TCP hash as well as the others for fast bind/connect.
*/
struct kmem_cache *bind_bucket_cachep;
+ /* This bind table is hashed by local port */
struct inet_bind_hashbucket *bhash;
+ struct kmem_cache *bind2_bucket_cachep;
+ /* This bind table is hashed by local port and sk->sk_rcv_saddr (ipv4)
+ * or sk->sk_v6_rcv_saddr (ipv6). This 2nd bind table is used
+ * primarily for expediting bind conflict resolution.
+ */
+ struct inet_bind_hashbucket *bhash2;
unsigned int bhash_size;
/* The 2nd listener table hashed by local port and address */
@@ -182,14 +211,61 @@ inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net,
void inet_bind_bucket_destroy(struct kmem_cache *cachep,
struct inet_bind_bucket *tb);
+bool inet_bind_bucket_match(const struct inet_bind_bucket *tb,
+ const struct net *net, unsigned short port,
+ int l3mdev);
+
+struct inet_bind2_bucket *
+inet_bind2_bucket_create(struct kmem_cache *cachep, struct net *net,
+ struct inet_bind_hashbucket *head,
+ unsigned short port, int l3mdev,
+ const struct sock *sk);
+
+void inet_bind2_bucket_destroy(struct kmem_cache *cachep,
+ struct inet_bind2_bucket *tb);
+
+struct inet_bind2_bucket *
+inet_bind2_bucket_find(const struct inet_bind_hashbucket *head,
+ const struct net *net,
+ unsigned short port, int l3mdev,
+ const struct sock *sk);
+
+bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb,
+ const struct net *net, unsigned short port,
+ int l3mdev, const struct sock *sk);
+
static inline u32 inet_bhashfn(const struct net *net, const __u16 lport,
const u32 bhash_size)
{
return (lport + net_hash_mix(net)) & (bhash_size - 1);
}
+static inline struct inet_bind_hashbucket *
+inet_bhashfn_portaddr(const struct inet_hashinfo *hinfo, const struct sock *sk,
+ const struct net *net, unsigned short port)
+{
+ u32 hash;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6)
+ hash = ipv6_portaddr_hash(net, &sk->sk_v6_rcv_saddr, port);
+ else
+#endif
+ hash = ipv4_portaddr_hash(net, sk->sk_rcv_saddr, port);
+ return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];
+}
+
+struct inet_bind_hashbucket *
+inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, int port);
+
+/* This should be called whenever a socket's sk_rcv_saddr (ipv4) or
+ * sk_v6_rcv_saddr (ipv6) changes after it has been binded. The socket's
+ * rcv_saddr field should already have been updated when this is called.
+ */
+int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct sock *sk);
+
void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
- const unsigned short snum);
+ struct inet_bind2_bucket *tb2, unsigned short port);
/* Caller must disable local BH processing. */
int __inet_inherit_port(const struct sock *sk, struct sock *child);
diff --git a/include/net/macsec.h b/include/net/macsec.h
index d6fa6b97f6ef..73780aa73644 100644
--- a/include/net/macsec.h
+++ b/include/net/macsec.h
@@ -14,7 +14,6 @@
#define MACSEC_DEFAULT_PN_LEN 4
#define MACSEC_XPN_PN_LEN 8
-#define MACSEC_SALT_LEN 12
#define MACSEC_NUM_AN 4 /* 2 bits for the association number */
typedef u64 __bitwise sci_t;
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 3cd3a6e631aa..b2b9de70d9f4 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -86,10 +86,6 @@ extern spinlock_t nf_conntrack_expect_lock;
/* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */
-#if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
- (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES) || \
- IS_ENABLED(CONFIG_NF_CT_NETLINK))
-
static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout)
{
if (timeout > INT_MAX)
@@ -101,6 +97,4 @@ int __nf_ct_change_timeout(struct nf_conn *ct, u64 cta_timeout);
void __nf_ct_change_status(struct nf_conn *ct, unsigned long on, unsigned long off);
int nf_ct_change_status_common(struct nf_conn *ct, unsigned int status);
-#endif
-
#endif /* _NF_CONNTRACK_CORE_H */
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 7a2a9d3144ba..e658d18afa67 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -741,6 +741,7 @@ static inline int __nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen,
* @hdrlen: length of family specific header
* @tb: destination array with maxtype+1 elements
* @maxtype: maximum attribute type to be expected
+ * @policy: validation policy
* @extack: extended ACK report struct
*
* See nla_parse()
@@ -760,6 +761,7 @@ static inline int nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen,
* @hdrlen: length of family specific header
* @tb: destination array with maxtype+1 elements
* @maxtype: maximum attribute type to be expected
+ * @policy: validation policy
* @extack: extended ACK report struct
*
* See nla_parse_deprecated()
@@ -779,6 +781,7 @@ static inline int nlmsg_parse_deprecated(const struct nlmsghdr *nlh, int hdrlen,
* @hdrlen: length of family specific header
* @tb: destination array with maxtype+1 elements
* @maxtype: maximum attribute type to be expected
+ * @policy: validation policy
* @extack: extended ACK report struct
*
* See nla_parse_deprecated_strict()
@@ -814,7 +817,6 @@ static inline struct nlattr *nlmsg_find_attr(const struct nlmsghdr *nlh,
* @len: length of attribute stream
* @maxtype: maximum attribute type to be expected
* @policy: validation policy
- * @validate: validation strictness
* @extack: extended ACK report struct
*
* Validates all attributes in the specified attribute stream against the
diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h
index b593f95e9991..02bbdc577f8e 100644
--- a/include/net/netns/netfilter.h
+++ b/include/net/netns/netfilter.h
@@ -24,9 +24,6 @@ struct netns_nf {
#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
struct nf_hook_entries __rcu *hooks_bridge[NF_INET_NUMHOOKS];
#endif
-#if IS_ENABLED(CONFIG_DECNET)
- struct nf_hook_entries __rcu *hooks_decnet[NF_DN_NUMHOOKS];
-#endif
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
unsigned int defrag_ipv4_users;
#endif
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 3372a1f67cf4..29f65632ebc5 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -100,7 +100,7 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops,
struct netlink_ext_ack *extack);
int register_qdisc(struct Qdisc_ops *qops);
-int unregister_qdisc(struct Qdisc_ops *qops);
+void unregister_qdisc(struct Qdisc_ops *qops);
void qdisc_get_default(char *id, size_t len);
int qdisc_set_default(const char *id);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index ec693fe7c553..f2958fb5ae08 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -1137,7 +1137,6 @@ static inline void __qdisc_reset_queue(struct qdisc_skb_head *qh)
static inline void qdisc_reset_queue(struct Qdisc *sch)
{
__qdisc_reset_queue(&sch->q);
- sch->qstats.backlog = 0;
}
static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new,
diff --git a/include/net/sock.h b/include/net/sock.h
index d08cfe190a78..ca469980e006 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -348,6 +348,7 @@ struct sk_filter;
* @sk_txtime_report_errors: set report errors mode for SO_TXTIME
* @sk_txtime_unused: unused txtime flags
* @ns_tracker: tracker for netns reference
+ * @sk_bind2_node: bind node in the bhash2 table
*/
struct sock {
/*
@@ -537,6 +538,7 @@ struct sock {
#endif
struct rcu_head sk_rcu;
netns_tracker ns_tracker;
+ struct hlist_node sk_bind2_node;
};
enum sk_pacing {
@@ -870,6 +872,16 @@ static inline void sk_add_bind_node(struct sock *sk,
hlist_add_head(&sk->sk_bind_node, list);
}
+static inline void __sk_del_bind2_node(struct sock *sk)
+{
+ __hlist_del(&sk->sk_bind2_node);
+}
+
+static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list)
+{
+ hlist_add_head(&sk->sk_bind2_node, list);
+}
+
#define sk_for_each(__sk, list) \
hlist_for_each_entry(__sk, list, sk_node)
#define sk_for_each_rcu(__sk, list) \
@@ -887,6 +899,8 @@ static inline void sk_add_bind_node(struct sock *sk,
hlist_for_each_entry_safe(__sk, tmp, list, sk_node)
#define sk_for_each_bound(__sk, list) \
hlist_for_each_entry(__sk, list, sk_bind_node)
+#define sk_for_each_bound_bhash2(__sk, list) \
+ hlist_for_each_entry(__sk, list, sk_bind2_node)
/**
* sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 2edea901bbd5..2a7e18ee5577 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -1024,6 +1024,8 @@ void ocelot_deinit(struct ocelot *ocelot);
void ocelot_init_port(struct ocelot *ocelot, int port);
void ocelot_deinit_port(struct ocelot *ocelot, int port);
+void ocelot_port_setup_dsa_8021q_cpu(struct ocelot *ocelot, int cpu);
+void ocelot_port_teardown_dsa_8021q_cpu(struct ocelot *ocelot, int cpu);
void ocelot_port_assign_dsa_8021q_cpu(struct ocelot *ocelot, int port, int cpu);
void ocelot_port_unassign_dsa_8021q_cpu(struct ocelot *ocelot, int port);
u32 ocelot_port_assigned_dsa_8021q_cpu_mask(struct ocelot *ocelot, int port);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 7bf9ba1329be..934a2a8beb87 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2573,10 +2573,12 @@ union bpf_attr {
* There are two supported modes at this time:
*
* * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
- * (room space is added or removed below the layer 2 header).
+ * (room space is added or removed between the layer 2 and
+ * layer 3 headers).
*
* * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
- * (room space is added or removed below the layer 3 header).
+ * (room space is added or removed between the layer 3 and
+ * layer 4 headers).
*
* The following flags are supported at this time:
*
@@ -3008,8 +3010,18 @@ union bpf_attr {
* **BPF_F_USER_STACK**
* Collect a user space stack instead of a kernel stack.
* **BPF_F_USER_BUILD_ID**
- * Collect buildid+offset instead of ips for user stack,
- * only valid if **BPF_F_USER_STACK** is also specified.
+ * Collect (build_id, file_offset) instead of ips for user
+ * stack, only valid if **BPF_F_USER_STACK** is also
+ * specified.
+ *
+ * *file_offset* is an offset relative to the beginning
+ * of the executable or shared object file backing the vma
+ * which the *ip* falls in. It is *not* an offset relative
+ * to that object's base address. Accordingly, it must be
+ * adjusted by adding (sh_addr - sh_offset), where
+ * sh_{addr,offset} correspond to the executable section
+ * containing *file_offset* in the object, for comparisons
+ * to symbols' st_value to be valid.
*
* **bpf_get_stack**\ () can collect up to
* **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
@@ -5331,6 +5343,18 @@ union bpf_attr {
* **-EACCES** if the SYN cookie is not valid.
*
* **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin.
+ *
+ * u64 bpf_ktime_get_tai_ns(void)
+ * Description
+ * A nonsettable system-wide clock derived from wall-clock time but
+ * ignoring leap seconds. This clock does not experience
+ * discontinuities and backwards jumps caused by NTP inserting leap
+ * seconds as CLOCK_REALTIME does.
+ *
+ * See: **clock_gettime**\ (**CLOCK_TAI**)
+ * Return
+ * Current *ktime*.
+ *
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -5541,6 +5565,7 @@ union bpf_attr {
FN(tcp_raw_gen_syncookie_ipv6), \
FN(tcp_raw_check_syncookie_ipv4), \
FN(tcp_raw_check_syncookie_ipv6), \
+ FN(ktime_get_tai_ns), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/include/uapi/linux/dn.h b/include/uapi/linux/dn.h
deleted file mode 100644
index 36ca71bd8bbe..000000000000
--- a/include/uapi/linux/dn.h
+++ /dev/null
@@ -1,149 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _LINUX_DN_H
-#define _LINUX_DN_H
-
-#include <linux/ioctl.h>
-#include <linux/types.h>
-#include <linux/if_ether.h>
-
-/*
-
- DECnet Data Structures and Constants
-
-*/
-
-/*
- * DNPROTO_NSP can't be the same as SOL_SOCKET,
- * so increment each by one (compared to ULTRIX)
- */
-#define DNPROTO_NSP 2 /* NSP protocol number */
-#define DNPROTO_ROU 3 /* Routing protocol number */
-#define DNPROTO_NML 4 /* Net mgt protocol number */
-#define DNPROTO_EVL 5 /* Evl protocol number (usr) */
-#define DNPROTO_EVR 6 /* Evl protocol number (evl) */
-#define DNPROTO_NSPT 7 /* NSP trace protocol number */
-
-
-#define DN_ADDL 2
-#define DN_MAXADDL 2 /* ULTRIX headers have 20 here, but pathworks has 2 */
-#define DN_MAXOPTL 16
-#define DN_MAXOBJL 16
-#define DN_MAXACCL 40
-#define DN_MAXALIASL 128
-#define DN_MAXNODEL 256
-#define DNBUFSIZE 65023
-
-/*
- * SET/GET Socket options - must match the DSO_ numbers below
- */
-#define SO_CONDATA 1
-#define SO_CONACCESS 2
-#define SO_PROXYUSR 3
-#define SO_LINKINFO 7
-
-#define DSO_CONDATA 1 /* Set/Get connect data */
-#define DSO_DISDATA 10 /* Set/Get disconnect data */
-#define DSO_CONACCESS 2 /* Set/Get connect access data */
-#define DSO_ACCEPTMODE 4 /* Set/Get accept mode */
-#define DSO_CONACCEPT 5 /* Accept deferred connection */
-#define DSO_CONREJECT 6 /* Reject deferred connection */
-#define DSO_LINKINFO 7 /* Set/Get link information */
-#define DSO_STREAM 8 /* Set socket type to stream */
-#define DSO_SEQPACKET 9 /* Set socket type to sequenced packet */
-#define DSO_MAXWINDOW 11 /* Maximum window size allowed */
-#define DSO_NODELAY 12 /* Turn off nagle */
-#define DSO_CORK 13 /* Wait for more data! */
-#define DSO_SERVICES 14 /* NSP Services field */
-#define DSO_INFO 15 /* NSP Info field */
-#define DSO_MAX 15 /* Maximum option number */
-
-
-/* LINK States */
-#define LL_INACTIVE 0
-#define LL_CONNECTING 1
-#define LL_RUNNING 2
-#define LL_DISCONNECTING 3
-
-#define ACC_IMMED 0
-#define ACC_DEFER 1
-
-#define SDF_WILD 1 /* Wild card object */
-#define SDF_PROXY 2 /* Addr eligible for proxy */
-#define SDF_UICPROXY 4 /* Use uic-based proxy */
-
-/* Structures */
-
-
-struct dn_naddr {
- __le16 a_len;
- __u8 a_addr[DN_MAXADDL]; /* Two bytes little endian */
-};
-
-struct sockaddr_dn {
- __u16 sdn_family;
- __u8 sdn_flags;
- __u8 sdn_objnum;
- __le16 sdn_objnamel;
- __u8 sdn_objname[DN_MAXOBJL];
- struct dn_naddr sdn_add;
-};
-#define sdn_nodeaddrl sdn_add.a_len /* Node address length */
-#define sdn_nodeaddr sdn_add.a_addr /* Node address */
-
-
-
-/*
- * DECnet set/get DSO_CONDATA, DSO_DISDATA (optional data) structure
- */
-struct optdata_dn {
- __le16 opt_status; /* Extended status return */
-#define opt_sts opt_status
- __le16 opt_optl; /* Length of user data */
- __u8 opt_data[16]; /* User data */
-};
-
-struct accessdata_dn {
- __u8 acc_accl;
- __u8 acc_acc[DN_MAXACCL];
- __u8 acc_passl;
- __u8 acc_pass[DN_MAXACCL];
- __u8 acc_userl;
- __u8 acc_user[DN_MAXACCL];
-};
-
-/*
- * DECnet logical link information structure
- */
-struct linkinfo_dn {
- __u16 idn_segsize; /* Segment size for link */
- __u8 idn_linkstate; /* Logical link state */
-};
-
-/*
- * Ethernet address format (for DECnet)
- */
-union etheraddress {
- __u8 dne_addr[ETH_ALEN]; /* Full ethernet address */
- struct {
- __u8 dne_hiord[4]; /* DECnet HIORD prefix */
- __u8 dne_nodeaddr[2]; /* DECnet node address */
- } dne_remote;
-};
-
-
-/*
- * DECnet physical socket address format
- */
-struct dn_addr {
- __le16 dna_family; /* AF_DECnet */
- union etheraddress dna_netaddr; /* DECnet ethernet address */
-};
-
-#define DECNET_IOCTL_BASE 0x89 /* PROTOPRIVATE range */
-
-#define SIOCSNETADDR _IOW(DECNET_IOCTL_BASE, 0xe0, struct dn_naddr)
-#define SIOCGNETADDR _IOR(DECNET_IOCTL_BASE, 0xe1, struct dn_naddr)
-#define OSIOCSNETADDR _IOW(DECNET_IOCTL_BASE, 0xe0, int)
-#define OSIOCGNETADDR _IOR(DECNET_IOCTL_BASE, 0xe1, int)
-
-#endif /* _LINUX_DN_H */
diff --git a/include/uapi/linux/if_macsec.h b/include/uapi/linux/if_macsec.h
index 3af2aa069a36..d5b6d1f37353 100644
--- a/include/uapi/linux/if_macsec.h
+++ b/include/uapi/linux/if_macsec.h
@@ -22,6 +22,8 @@
#define MACSEC_KEYID_LEN 16
+#define MACSEC_SALT_LEN 12
+
/* cipher IDs as per IEEE802.1AE-2018 (Table 14-1) */
#define MACSEC_CIPHER_ID_GCM_AES_128 0x0080C20001000001ULL
#define MACSEC_CIPHER_ID_GCM_AES_256 0x0080C20001000002ULL
diff --git a/include/uapi/linux/netfilter_decnet.h b/include/uapi/linux/netfilter_decnet.h
deleted file mode 100644
index 3c77f54560f2..000000000000
--- a/include/uapi/linux/netfilter_decnet.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef __LINUX_DECNET_NETFILTER_H
-#define __LINUX_DECNET_NETFILTER_H
-
-/* DECnet-specific defines for netfilter.
- * This file (C) Steve Whitehouse 1999 derived from the
- * ipv4 netfilter header file which is
- * (C)1998 Rusty Russell -- This code is GPL.
- */
-
-#include <linux/netfilter.h>
-
-/* only for userspace compatibility */
-#ifndef __KERNEL__
-
-#include <limits.h> /* for INT_MIN, INT_MAX */
-
-/* kernel define is in netfilter_defs.h */
-#define NF_DN_NUMHOOKS 7
-#endif /* ! __KERNEL__ */
-
-/* DECnet Hooks */
-/* After promisc drops, checksum checks. */
-#define NF_DN_PRE_ROUTING 0
-/* If the packet is destined for this box. */
-#define NF_DN_LOCAL_IN 1
-/* If the packet is destined for another interface. */
-#define NF_DN_FORWARD 2
-/* Packets coming from a local process. */
-#define NF_DN_LOCAL_OUT 3
-/* Packets about to hit the wire. */
-#define NF_DN_POST_ROUTING 4
-/* Input Hello Packets */
-#define NF_DN_HELLO 5
-/* Input Routing Packets */
-#define NF_DN_ROUTE 6
-
-enum nf_dn_hook_priorities {
- NF_DN_PRI_FIRST = INT_MIN,
- NF_DN_PRI_CONNTRACK = -200,
- NF_DN_PRI_MANGLE = -150,
- NF_DN_PRI_NAT_DST = -100,
- NF_DN_PRI_FILTER = 0,
- NF_DN_PRI_NAT_SRC = 100,
- NF_DN_PRI_DNRTMSG = 200,
- NF_DN_PRI_LAST = INT_MAX,
-};
-
-struct nf_dn_rtmsg {
- int nfdn_ifindex;
-};
-
-#define NFDN_RTMSG(r) ((unsigned char *)(r) + NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg)))
-
-#ifndef __KERNEL__
-/* backwards compatibility for userspace */
-#define DNRMG_L1_GROUP 0x01
-#define DNRMG_L2_GROUP 0x02
-#endif
-
-enum {
- DNRNG_NLGRP_NONE,
-#define DNRNG_NLGRP_NONE DNRNG_NLGRP_NONE
- DNRNG_NLGRP_L1,
-#define DNRNG_NLGRP_L1 DNRNG_NLGRP_L1
- DNRNG_NLGRP_L2,
-#define DNRNG_NLGRP_L2 DNRNG_NLGRP_L2
- __DNRNG_NLGRP_MAX
-};
-#define DNRNG_NLGRP_MAX (__DNRNG_NLGRP_MAX - 1)
-
-#endif /*__LINUX_DECNET_NETFILTER_H*/
diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index 855dffb4c1c3..e0ab261ceca2 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -20,7 +20,7 @@
#define NETLINK_CONNECTOR 11
#define NETLINK_NETFILTER 12 /* netfilter subsystem */
#define NETLINK_IP6_FW 13
-#define NETLINK_DNRTMSG 14 /* DECnet routing messages */
+#define NETLINK_DNRTMSG 14 /* DECnet routing messages (obsolete) */
#define NETLINK_KOBJECT_UEVENT 15 /* Kernel messages to userspace */
#define NETLINK_GENERIC 16
/* leave room for NETLINK_DM (DM Events) */
@@ -41,12 +41,20 @@ struct sockaddr_nl {
__u32 nl_groups; /* multicast groups mask */
};
+/**
+ * struct nlmsghdr - fixed format metadata header of Netlink messages
+ * @nlmsg_len: Length of message including header
+ * @nlmsg_type: Message content type
+ * @nlmsg_flags: Additional flags
+ * @nlmsg_seq: Sequence number
+ * @nlmsg_pid: Sending process port ID
+ */
struct nlmsghdr {
- __u32 nlmsg_len; /* Length of message including header */
- __u16 nlmsg_type; /* Message content */
- __u16 nlmsg_flags; /* Additional flags */
- __u32 nlmsg_seq; /* Sequence number */
- __u32 nlmsg_pid; /* Sending process port ID */
+ __u32 nlmsg_len;
+ __u16 nlmsg_type;
+ __u16 nlmsg_flags;
+ __u32 nlmsg_seq;
+ __u32 nlmsg_pid;
};
/* Flags values */
@@ -337,6 +345,9 @@ enum netlink_attribute_type {
* bitfield32 type (U32)
* @NL_POLICY_TYPE_ATTR_MASK: mask of valid bits for unsigned integers (U64)
* @NL_POLICY_TYPE_ATTR_PAD: pad attribute for 64-bit alignment
+ *
+ * @__NL_POLICY_TYPE_ATTR_MAX: number of attributes
+ * @NL_POLICY_TYPE_ATTR_MAX: highest attribute number
*/
enum netlink_policy_type_attr {
NL_POLICY_TYPE_ATTR_UNSPEC,
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 24b755eca0b3..97bb57493ed5 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -202,6 +202,11 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
}
stop:
offs = seq->count;
+ if (IS_ERR(p)) {
+ seq->op->stop(seq, NULL);
+ err = PTR_ERR(p);
+ goto done;
+ }
/* bpf program called if !p */
seq->op->stop(seq, p);
if (!p) {
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index 8ce40fd869f6..4ee2e7286c23 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -582,7 +582,7 @@ void bpf_local_storage_map_free(struct bpf_local_storage_map *smap,
synchronize_rcu();
kvfree(smap->buckets);
- kfree(smap);
+ bpf_map_area_free(smap);
}
int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
@@ -610,7 +610,7 @@ struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
unsigned int i;
u32 nbuckets;
- smap = kzalloc(sizeof(*smap), GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT);
+ smap = bpf_map_area_alloc(sizeof(*smap), NUMA_NO_NODE);
if (!smap)
return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&smap->map, attr);
@@ -623,7 +623,7 @@ struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets,
GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT);
if (!smap->buckets) {
- kfree(smap);
+ bpf_map_area_free(smap);
return ERR_PTR(-ENOMEM);
}
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 7e64447659f3..903719b89238 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -5864,26 +5864,25 @@ again:
}
static int __get_type_size(struct btf *btf, u32 btf_id,
- const struct btf_type **bad_type)
+ const struct btf_type **ret_type)
{
const struct btf_type *t;
+ *ret_type = btf_type_by_id(btf, 0);
if (!btf_id)
/* void */
return 0;
t = btf_type_by_id(btf, btf_id);
while (t && btf_type_is_modifier(t))
t = btf_type_by_id(btf, t->type);
- if (!t) {
- *bad_type = btf_type_by_id(btf, 0);
+ if (!t)
return -EINVAL;
- }
+ *ret_type = t;
if (btf_type_is_ptr(t))
/* kernel size of pointer. Not BPF's size of pointer*/
return sizeof(void *);
if (btf_type_is_int(t) || btf_is_any_enum(t))
return t->size;
- *bad_type = t;
return -EINVAL;
}
@@ -6175,6 +6174,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
{
enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
bool rel = false, kptr_get = false, trusted_arg = false;
+ bool sleepable = false;
struct bpf_verifier_log *log = &env->log;
u32 i, nargs, ref_id, ref_obj_id = 0;
bool is_kfunc = btf_is_kernel(btf);
@@ -6212,6 +6212,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
rel = kfunc_flags & KF_RELEASE;
kptr_get = kfunc_flags & KF_KPTR_GET;
trusted_arg = kfunc_flags & KF_TRUSTED_ARGS;
+ sleepable = kfunc_flags & KF_SLEEPABLE;
}
/* check that BTF function arguments match actual types that the
@@ -6419,6 +6420,13 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
func_name);
return -EINVAL;
}
+
+ if (sleepable && !env->prog->aux->sleepable) {
+ bpf_log(log, "kernel function %s is sleepable but the program is not\n",
+ func_name);
+ return -EINVAL;
+ }
+
/* returns argument register number > 0 in case of reference release kfunc */
return rel ? ref_regno : 0;
}
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index c1e10d088dbb..639437f36928 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2623,6 +2623,7 @@ const struct bpf_func_proto bpf_get_numa_node_id_proto __weak;
const struct bpf_func_proto bpf_ktime_get_ns_proto __weak;
const struct bpf_func_proto bpf_ktime_get_boot_ns_proto __weak;
const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto __weak;
+const struct bpf_func_proto bpf_ktime_get_tai_ns_proto __weak;
const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index f4860ac756cd..b5ba34ddd4b6 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -97,7 +97,7 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
attr->map_flags & ~BPF_F_NUMA_NODE)
return ERR_PTR(-EINVAL);
- cmap = kzalloc(sizeof(*cmap), GFP_USER | __GFP_ACCOUNT);
+ cmap = bpf_map_area_alloc(sizeof(*cmap), NUMA_NO_NODE);
if (!cmap)
return ERR_PTR(-ENOMEM);
@@ -118,7 +118,7 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
return &cmap->map;
free_cmap:
- kfree(cmap);
+ bpf_map_area_free(cmap);
return ERR_PTR(err);
}
@@ -623,7 +623,7 @@ static void cpu_map_free(struct bpf_map *map)
__cpu_map_entry_replace(cmap, i, NULL); /* call_rcu */
}
bpf_map_area_free(cmap->cpu_map);
- kfree(cmap);
+ bpf_map_area_free(cmap);
}
/* Elements are kept alive by RCU; either by rcu_read_lock() (from syscall) or
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index a0e02b009487..f9a87dcc5535 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -163,13 +163,13 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);
- dtab = kzalloc(sizeof(*dtab), GFP_USER | __GFP_ACCOUNT);
+ dtab = bpf_map_area_alloc(sizeof(*dtab), NUMA_NO_NODE);
if (!dtab)
return ERR_PTR(-ENOMEM);
err = dev_map_init_map(dtab, attr);
if (err) {
- kfree(dtab);
+ bpf_map_area_free(dtab);
return ERR_PTR(err);
}
@@ -240,7 +240,7 @@ static void dev_map_free(struct bpf_map *map)
bpf_map_area_free(dtab->netdev_map);
}
- kfree(dtab);
+ bpf_map_area_free(dtab);
}
static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 6c530a5e560a..b301a63afa2f 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -491,7 +491,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
struct bpf_htab *htab;
int err, i;
- htab = kzalloc(sizeof(*htab), GFP_USER | __GFP_ACCOUNT);
+ htab = bpf_map_area_alloc(sizeof(*htab), NUMA_NO_NODE);
if (!htab)
return ERR_PTR(-ENOMEM);
@@ -575,7 +575,7 @@ free_map_locked:
bpf_map_area_free(htab->buckets);
free_htab:
lockdep_unregister_key(&htab->lockdep_key);
- kfree(htab);
+ bpf_map_area_free(htab);
return ERR_PTR(err);
}
@@ -1492,7 +1492,7 @@ static void htab_map_free(struct bpf_map *map)
for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++)
free_percpu(htab->map_locked[i]);
lockdep_unregister_key(&htab->lockdep_key);
- kfree(htab);
+ bpf_map_area_free(htab);
}
static void htab_map_seq_show_elem(struct bpf_map *map, void *key,
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 1f961f9982d2..3c1b9bbcf971 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -198,6 +198,18 @@ const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto = {
.ret_type = RET_INTEGER,
};
+BPF_CALL_0(bpf_ktime_get_tai_ns)
+{
+ /* NMI safe access to clock tai */
+ return ktime_get_tai_fast_ns();
+}
+
+const struct bpf_func_proto bpf_ktime_get_tai_ns_proto = {
+ .func = bpf_ktime_get_tai_ns,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+};
+
BPF_CALL_0(bpf_get_current_pid_tgid)
{
struct task_struct *task = current;
@@ -1617,6 +1629,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return &bpf_ktime_get_ns_proto;
case BPF_FUNC_ktime_get_boot_ns:
return &bpf_ktime_get_boot_ns_proto;
+ case BPF_FUNC_ktime_get_tai_ns:
+ return &bpf_ktime_get_tai_ns_proto;
case BPF_FUNC_ringbuf_output:
return &bpf_ringbuf_output_proto;
case BPF_FUNC_ringbuf_reserve:
@@ -1711,3 +1725,21 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return NULL;
}
}
+
+BTF_SET8_START(tracing_btf_ids)
+#ifdef CONFIG_KEXEC_CORE
+BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
+#endif
+BTF_SET8_END(tracing_btf_ids)
+
+static const struct btf_kfunc_id_set tracing_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &tracing_btf_ids,
+};
+
+static int __init kfunc_init(void)
+{
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &tracing_kfunc_set);
+}
+
+late_initcall(kfunc_init);
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 49ef0ce040c7..098cf336fae6 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -313,8 +313,7 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
/* max_entries is not used and enforced to be 0 */
return ERR_PTR(-EINVAL);
- map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map),
- __GFP_ZERO | GFP_USER | __GFP_ACCOUNT, numa_node);
+ map = bpf_map_area_alloc(sizeof(struct bpf_cgroup_storage_map), numa_node);
if (!map)
return ERR_PTR(-ENOMEM);
@@ -346,7 +345,7 @@ static void cgroup_storage_map_free(struct bpf_map *_map)
WARN_ON(!RB_EMPTY_ROOT(&map->root));
WARN_ON(!list_empty(&map->list));
- kfree(map);
+ bpf_map_area_free(map);
}
static int cgroup_storage_delete_elem(struct bpf_map *map, void *key)
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index d789e3b831ad..d833496e9e42 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -558,7 +558,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
attr->value_size > LPM_VAL_SIZE_MAX)
return ERR_PTR(-EINVAL);
- trie = kzalloc(sizeof(*trie), GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT);
+ trie = bpf_map_area_alloc(sizeof(*trie), NUMA_NO_NODE);
if (!trie)
return ERR_PTR(-ENOMEM);
@@ -609,7 +609,7 @@ static void trie_free(struct bpf_map *map)
}
out:
- kfree(trie);
+ bpf_map_area_free(trie);
}
static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index bd09290e3648..13e4efc971e6 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -372,7 +372,7 @@ struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
attr->map_type != BPF_MAP_TYPE_HASH)
return ERR_PTR(-EINVAL);
- offmap = kzalloc(sizeof(*offmap), GFP_USER);
+ offmap = bpf_map_area_alloc(sizeof(*offmap), NUMA_NO_NODE);
if (!offmap)
return ERR_PTR(-ENOMEM);
@@ -404,7 +404,7 @@ struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
err_unlock:
up_write(&bpf_devs_lock);
rtnl_unlock();
- kfree(offmap);
+ bpf_map_area_free(offmap);
return ERR_PTR(err);
}
@@ -428,7 +428,7 @@ void bpf_map_offload_map_free(struct bpf_map *map)
up_write(&bpf_devs_lock);
rtnl_unlock();
- kfree(offmap);
+ bpf_map_area_free(offmap);
}
int bpf_map_offload_lookup_elem(struct bpf_map *map, void *key, void *value)
diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
index a1c0794ae49d..8a5e060de63b 100644
--- a/kernel/bpf/queue_stack_maps.c
+++ b/kernel/bpf/queue_stack_maps.c
@@ -78,8 +78,6 @@ static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
if (!qs)
return ERR_PTR(-ENOMEM);
- memset(qs, 0, sizeof(*qs));
-
bpf_map_init_from_attr(&qs->map, attr);
qs->size = size;
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index ded4faeca192..b483aea35f41 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -116,7 +116,7 @@ static struct bpf_ringbuf *bpf_ringbuf_area_alloc(size_t data_sz, int numa_node)
err_free_pages:
for (i = 0; i < nr_pages; i++)
__free_page(pages[i]);
- kvfree(pages);
+ bpf_map_area_free(pages);
return NULL;
}
@@ -164,7 +164,7 @@ static struct bpf_map *ringbuf_map_alloc(union bpf_attr *attr)
return ERR_PTR(-E2BIG);
#endif
- rb_map = kzalloc(sizeof(*rb_map), GFP_USER | __GFP_ACCOUNT);
+ rb_map = bpf_map_area_alloc(sizeof(*rb_map), NUMA_NO_NODE);
if (!rb_map)
return ERR_PTR(-ENOMEM);
@@ -172,7 +172,7 @@ static struct bpf_map *ringbuf_map_alloc(union bpf_attr *attr)
rb_map->rb = bpf_ringbuf_alloc(attr->max_entries, rb_map->map.numa_node);
if (!rb_map->rb) {
- kfree(rb_map);
+ bpf_map_area_free(rb_map);
return ERR_PTR(-ENOMEM);
}
@@ -190,7 +190,7 @@ static void bpf_ringbuf_free(struct bpf_ringbuf *rb)
vunmap(rb);
for (i = 0; i < nr_pages; i++)
__free_page(pages[i]);
- kvfree(pages);
+ bpf_map_area_free(pages);
}
static void ringbuf_map_free(struct bpf_map *map)
@@ -199,7 +199,7 @@ static void ringbuf_map_free(struct bpf_map *map)
rb_map = container_of(map, struct bpf_ringbuf_map, map);
bpf_ringbuf_free(rb_map->rb);
- kfree(rb_map);
+ bpf_map_area_free(rb_map);
}
static void *ringbuf_map_lookup_elem(struct bpf_map *map, void *key)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 096fdac70165..2c1f8069f7b7 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -427,6 +427,7 @@ static void verbose_invalid_scalar(struct bpf_verifier_env *env,
static bool type_is_pkt_pointer(enum bpf_reg_type type)
{
+ type = base_type(type);
return type == PTR_TO_PACKET ||
type == PTR_TO_PACKET_META;
}
@@ -456,10 +457,9 @@ static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
{
- return base_type(type) == PTR_TO_SOCKET ||
- base_type(type) == PTR_TO_TCP_SOCK ||
- base_type(type) == PTR_TO_MEM ||
- base_type(type) == PTR_TO_BTF_ID;
+ type = base_type(type);
+ return type == PTR_TO_SOCKET || type == PTR_TO_TCP_SOCK ||
+ type == PTR_TO_MEM || type == PTR_TO_BTF_ID;
}
static bool type_is_rdonly_mem(u32 type)
@@ -467,25 +467,11 @@ static bool type_is_rdonly_mem(u32 type)
return type & MEM_RDONLY;
}
-static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
-{
- return type == ARG_PTR_TO_SOCK_COMMON;
-}
-
static bool type_may_be_null(u32 type)
{
return type & PTR_MAYBE_NULL;
}
-static bool may_be_acquire_function(enum bpf_func_id func_id)
-{
- return func_id == BPF_FUNC_sk_lookup_tcp ||
- func_id == BPF_FUNC_sk_lookup_udp ||
- func_id == BPF_FUNC_skc_lookup_tcp ||
- func_id == BPF_FUNC_map_lookup_elem ||
- func_id == BPF_FUNC_ringbuf_reserve;
-}
-
static bool is_acquire_function(enum bpf_func_id func_id,
const struct bpf_map *map)
{
@@ -518,6 +504,26 @@ static bool is_ptr_cast_function(enum bpf_func_id func_id)
func_id == BPF_FUNC_skc_to_tcp_request_sock;
}
+static bool is_dynptr_ref_function(enum bpf_func_id func_id)
+{
+ return func_id == BPF_FUNC_dynptr_data;
+}
+
+static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
+ const struct bpf_map *map)
+{
+ int ref_obj_uses = 0;
+
+ if (is_ptr_cast_function(func_id))
+ ref_obj_uses++;
+ if (is_acquire_function(func_id, map))
+ ref_obj_uses++;
+ if (is_dynptr_ref_function(func_id))
+ ref_obj_uses++;
+
+ return ref_obj_uses > 1;
+}
+
static bool is_cmpxchg_insn(const struct bpf_insn *insn)
{
return BPF_CLASS(insn->code) == BPF_STX &&
@@ -6453,33 +6459,6 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
return true;
}
-static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
-{
- int count = 0;
-
- if (arg_type_may_be_refcounted(fn->arg1_type))
- count++;
- if (arg_type_may_be_refcounted(fn->arg2_type))
- count++;
- if (arg_type_may_be_refcounted(fn->arg3_type))
- count++;
- if (arg_type_may_be_refcounted(fn->arg4_type))
- count++;
- if (arg_type_may_be_refcounted(fn->arg5_type))
- count++;
-
- /* A reference acquiring function cannot acquire
- * another refcounted ptr.
- */
- if (may_be_acquire_function(func_id) && count)
- return false;
-
- /* We only support one arg being unreferenced at the moment,
- * which is sufficient for the helper functions we have right now.
- */
- return count <= 1;
-}
-
static bool check_btf_id_ok(const struct bpf_func_proto *fn)
{
int i;
@@ -6498,13 +6477,11 @@ static bool check_btf_id_ok(const struct bpf_func_proto *fn)
return true;
}
-static int check_func_proto(const struct bpf_func_proto *fn, int func_id,
- struct bpf_call_arg_meta *meta)
+static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
{
return check_raw_mode_ok(fn) &&
check_arg_pair_ok(fn) &&
- check_btf_id_ok(fn) &&
- check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
+ check_btf_id_ok(fn) ? 0 : -EINVAL;
}
/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
@@ -7218,7 +7195,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
memset(&meta, 0, sizeof(meta));
meta.pkt_access = fn->pkt_access;
- err = check_func_proto(fn, func_id, &meta);
+ err = check_func_proto(fn, func_id);
if (err) {
verbose(env, "kernel subsystem misconfigured func %s#%d\n",
func_id_name(func_id), func_id);
@@ -7343,6 +7320,23 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
}
}
break;
+ case BPF_FUNC_dynptr_data:
+ for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
+ if (arg_type_is_dynptr(fn->arg_type[i])) {
+ if (meta.ref_obj_id) {
+ verbose(env, "verifier internal error: meta.ref_obj_id already set\n");
+ return -EFAULT;
+ }
+ /* Find the id of the dynptr we're tracking the reference of */
+ meta.ref_obj_id = stack_slot_get_id(env, &regs[BPF_REG_1 + i]);
+ break;
+ }
+ }
+ if (i == MAX_BPF_FUNC_REG_ARGS) {
+ verbose(env, "verifier internal error: no dynptr in bpf_dynptr_data()\n");
+ return -EFAULT;
+ }
+ break;
}
if (err)
@@ -7359,13 +7353,17 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
/* update return register (already marked as written above) */
ret_type = fn->ret_type;
- ret_flag = type_flag(fn->ret_type);
- if (ret_type == RET_INTEGER) {
+ ret_flag = type_flag(ret_type);
+
+ switch (base_type(ret_type)) {
+ case RET_INTEGER:
/* sets type to SCALAR_VALUE */
mark_reg_unknown(env, regs, BPF_REG_0);
- } else if (ret_type == RET_VOID) {
+ break;
+ case RET_VOID:
regs[BPF_REG_0].type = NOT_INIT;
- } else if (base_type(ret_type) == RET_PTR_TO_MAP_VALUE) {
+ break;
+ case RET_PTR_TO_MAP_VALUE:
/* There is no offset yet applied, variable or fixed */
mark_reg_known_zero(env, regs, BPF_REG_0);
/* remember map_ptr, so that check_map_access()
@@ -7384,20 +7382,26 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
map_value_has_spin_lock(meta.map_ptr)) {
regs[BPF_REG_0].id = ++env->id_gen;
}
- } else if (base_type(ret_type) == RET_PTR_TO_SOCKET) {
+ break;
+ case RET_PTR_TO_SOCKET:
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag;
- } else if (base_type(ret_type) == RET_PTR_TO_SOCK_COMMON) {
+ break;
+ case RET_PTR_TO_SOCK_COMMON:
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag;
- } else if (base_type(ret_type) == RET_PTR_TO_TCP_SOCK) {
+ break;
+ case RET_PTR_TO_TCP_SOCK:
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag;
- } else if (base_type(ret_type) == RET_PTR_TO_ALLOC_MEM) {
+ break;
+ case RET_PTR_TO_ALLOC_MEM:
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
regs[BPF_REG_0].mem_size = meta.mem_size;
- } else if (base_type(ret_type) == RET_PTR_TO_MEM_OR_BTF_ID) {
+ break;
+ case RET_PTR_TO_MEM_OR_BTF_ID:
+ {
const struct btf_type *t;
mark_reg_known_zero(env, regs, BPF_REG_0);
@@ -7429,7 +7433,10 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
regs[BPF_REG_0].btf = meta.ret_btf;
regs[BPF_REG_0].btf_id = meta.ret_btf_id;
}
- } else if (base_type(ret_type) == RET_PTR_TO_BTF_ID) {
+ break;
+ }
+ case RET_PTR_TO_BTF_ID:
+ {
struct btf *ret_btf;
int ret_btf_id;
@@ -7450,7 +7457,9 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
}
regs[BPF_REG_0].btf = ret_btf;
regs[BPF_REG_0].btf_id = ret_btf_id;
- } else {
+ break;
+ }
+ default:
verbose(env, "unknown return type %u of func %s#%d\n",
base_type(ret_type), func_id_name(func_id), func_id);
return -EINVAL;
@@ -7459,7 +7468,13 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
if (type_may_be_null(regs[BPF_REG_0].type))
regs[BPF_REG_0].id = ++env->id_gen;
- if (is_ptr_cast_function(func_id)) {
+ if (helper_multiple_ref_obj_use(func_id, meta.map_ptr)) {
+ verbose(env, "verifier internal error: func %s#%d sets ref_obj_id more than once\n",
+ func_id_name(func_id), func_id);
+ return -EFAULT;
+ }
+
+ if (is_ptr_cast_function(func_id) || is_dynptr_ref_function(func_id)) {
/* For release_reference() */
regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
} else if (is_acquire_function(func_id, meta.map_ptr)) {
@@ -7471,21 +7486,6 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
regs[BPF_REG_0].id = id;
/* For release_reference() */
regs[BPF_REG_0].ref_obj_id = id;
- } else if (func_id == BPF_FUNC_dynptr_data) {
- int dynptr_id = 0, i;
-
- /* Find the id of the dynptr we're acquiring a reference to */
- for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
- if (arg_type_is_dynptr(fn->arg_type[i])) {
- if (dynptr_id) {
- verbose(env, "verifier internal error: multiple dynptr args in func\n");
- return -EFAULT;
- }
- dynptr_id = stack_slot_get_id(env, &regs[BPF_REG_1 + i]);
- }
- }
- /* For release_reference() */
- regs[BPF_REG_0].ref_obj_id = dynptr_id;
}
do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
@@ -7584,6 +7584,11 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
func_name);
return -EACCES;
}
+ if (*kfunc_flags & KF_DESTRUCTIVE && !capable(CAP_SYS_BOOT)) {
+ verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capabilities\n");
+ return -EACCES;
+ }
+
acq = *kfunc_flags & KF_ACQUIRE;
/* Check the arguments */
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index e4bb5d57f4d1..2ddae5743d53 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -6164,11 +6164,6 @@ static struct cgroup *cgroup_get_from_file(struct file *f)
return ERR_CAST(css);
cgrp = css->cgroup;
- if (!cgroup_on_dfl(cgrp)) {
- cgroup_put(cgrp);
- return ERR_PTR(-EBADF);
- }
-
return cgrp;
}
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 5aa8144101dc..0beb44f2fe1f 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -467,12 +467,9 @@ static struct sk_buff *vlan_gro_receive(struct list_head *head,
off_vlan = skb_gro_offset(skb);
hlen = off_vlan + sizeof(*vhdr);
- vhdr = skb_gro_header_fast(skb, off_vlan);
- if (skb_gro_header_hard(skb, hlen)) {
- vhdr = skb_gro_header_slow(skb, hlen, off_vlan);
- if (unlikely(!vhdr))
- goto out;
- }
+ vhdr = skb_gro_header(skb, hlen, off_vlan);
+ if (unlikely(!vhdr))
+ goto out;
type = vhdr->h_vlan_encapsulated_proto;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 035812b0461c..e1bb41a443c4 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -674,9 +674,9 @@ static int vlan_ethtool_get_link_ksettings(struct net_device *dev,
static void vlan_ethtool_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *info)
{
- strlcpy(info->driver, vlan_fullname, sizeof(info->driver));
- strlcpy(info->version, vlan_version, sizeof(info->version));
- strlcpy(info->fw_version, "N/A", sizeof(info->fw_version));
+ strscpy(info->driver, vlan_fullname, sizeof(info->driver));
+ strscpy(info->version, vlan_version, sizeof(info->version));
+ strscpy(info->fw_version, "N/A", sizeof(info->fw_version));
}
static int vlan_ethtool_get_ts_info(struct net_device *dev,
diff --git a/net/Kconfig b/net/Kconfig
index 6b78f695caa6..48c33c222199 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -204,7 +204,6 @@ config BRIDGE_NETFILTER
source "net/netfilter/Kconfig"
source "net/ipv4/netfilter/Kconfig"
source "net/ipv6/netfilter/Kconfig"
-source "net/decnet/netfilter/Kconfig"
source "net/bridge/netfilter/Kconfig"
endif
@@ -221,7 +220,6 @@ source "net/802/Kconfig"
source "net/bridge/Kconfig"
source "net/dsa/Kconfig"
source "net/8021q/Kconfig"
-source "net/decnet/Kconfig"
source "net/llc/Kconfig"
source "drivers/net/appletalk/Kconfig"
source "net/x25/Kconfig"
diff --git a/net/Makefile b/net/Makefile
index fbfeb8a0bb37..6a62e5b27378 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -38,7 +38,6 @@ obj-$(CONFIG_AF_KCM) += kcm/
obj-$(CONFIG_STREAM_PARSER) += strparser/
obj-$(CONFIG_ATM) += atm/
obj-$(CONFIG_L2TP) += l2tp/
-obj-$(CONFIG_DECNET) += decnet/
obj-$(CONFIG_PHONET) += phonet/
ifneq ($(CONFIG_VLAN_8021Q),)
obj-y += 8021q/
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index d82a51e69386..6b4c25a92377 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -778,7 +778,7 @@ static int ax25_getsockopt(struct socket *sock, int level, int optname,
ax25_dev = ax25->ax25_dev;
if (ax25_dev != NULL && ax25_dev->dev != NULL) {
- strlcpy(devname, ax25_dev->dev->name, sizeof(devname));
+ strscpy(devname, ax25_dev->dev->name, sizeof(devname));
length = strlen(devname) + 1;
} else {
*devname = '\0';
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index d11209367dd0..25d8ecf105aa 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -695,6 +695,10 @@ noinline void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p)
{
}
+noinline void bpf_kfunc_call_test_destructive(void)
+{
+}
+
__diag_pop();
ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO);
@@ -719,6 +723,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE)
BTF_SET8_END(test_sk_check_kfunc_ids)
static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 58a4f70e01e3..b82906fc999a 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -251,10 +251,10 @@ static int br_set_mac_address(struct net_device *dev, void *p)
static void br_getinfo(struct net_device *dev, struct ethtool_drvinfo *info)
{
- strlcpy(info->driver, "bridge", sizeof(info->driver));
- strlcpy(info->version, BR_VERSION, sizeof(info->version));
- strlcpy(info->fw_version, "N/A", sizeof(info->fw_version));
- strlcpy(info->bus_info, "N/A", sizeof(info->bus_info));
+ strscpy(info->driver, "bridge", sizeof(info->driver));
+ strscpy(info->version, BR_VERSION, sizeof(info->version));
+ strscpy(info->fw_version, "N/A", sizeof(info->fw_version));
+ strscpy(info->bus_info, "N/A", sizeof(info->bus_info));
}
static int br_get_link_ksettings(struct net_device *dev,
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index a84a7cfb9d6d..efbd93e92ce2 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -568,26 +568,6 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
!is_valid_ether_addr(dev->dev_addr))
return -EINVAL;
- /* Also don't allow bridging of net devices that are DSA masters, since
- * the bridge layer rx_handler prevents the DSA fake ethertype handler
- * to be invoked, so we don't get the chance to strip off and parse the
- * DSA switch tag protocol header (the bridge layer just returns
- * RX_HANDLER_CONSUMED, stopping RX processing for these frames).
- * The only case where that would not be an issue is when bridging can
- * already be offloaded, such as when the DSA master is itself a DSA
- * or plain switchdev port, and is bridged only with other ports from
- * the same hardware device.
- */
- if (netdev_uses_dsa(dev)) {
- list_for_each_entry(p, &br->port_list, list) {
- if (!netdev_port_same_parent_id(dev, p->dev)) {
- NL_SET_ERR_MSG(extack,
- "Cannot do software bridging with a DSA master");
- return -EINVAL;
- }
- }
- }
-
/* No bridging of bridges */
if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) {
NL_SET_ERR_MSG(extack,
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 07fa76080512..74fdd8105dca 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -384,7 +384,7 @@ int br_sysfs_addif(struct net_bridge_port *p)
return err;
}
- strlcpy(p->sysfs_name, p->dev->name, IFNAMSIZ);
+ strscpy(p->sysfs_name, p->dev->name, IFNAMSIZ);
return sysfs_create_link(br->ifobj, &p->kobj, p->sysfs_name);
}
@@ -406,7 +406,7 @@ int br_sysfs_renameif(struct net_bridge_port *p)
netdev_notice(br->dev, "unable to rename link %s to %s",
p->sysfs_name, p->dev->name);
else
- strlcpy(p->sysfs_name, p->dev->name, IFNAMSIZ);
+ strscpy(p->sysfs_name, p->dev->name, IFNAMSIZ);
return err;
}
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 9a0ae59cdc50..8f6639e095a0 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1440,7 +1440,7 @@ static inline int ebt_obj_to_user(char __user *um, const char *_name,
/* ebtables expects 31 bytes long names but xt_match names are 29 bytes
* long. Copy 29 bytes and fill remaining bytes with zeroes.
*/
- strlcpy(name, _name, sizeof(name));
+ strscpy(name, _name, sizeof(name));
if (copy_to_user(um, name, EBT_EXTENSION_MAXNAMELEN) ||
put_user(revision, (u8 __user *)(um + EBT_EXTENSION_MAXNAMELEN)) ||
put_user(datasize, (int __user *)(um + EBT_EXTENSION_MAXNAMELEN + 1)) ||
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 52dd0b6835bc..6a0cba4fc29f 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -342,7 +342,7 @@ int caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
mutex_lock(&caifdevs->lock);
list_add_rcu(&caifd->list, &caifdevs->list);
- strlcpy(caifd->layer.name, dev->name,
+ strscpy(caifd->layer.name, dev->name,
sizeof(caifd->layer.name));
caifd->layer.transmit = transmit;
res = cfcnfg_add_phy_layer(cfg,
diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c
index 4be6b04879a1..ebc202ffdd8d 100644
--- a/net/caif/caif_usb.c
+++ b/net/caif/caif_usb.c
@@ -184,7 +184,7 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
dev_add_pack(&caif_usb_type);
pack_added = true;
- strlcpy(layer->name, dev->name, sizeof(layer->name));
+ strscpy(layer->name, dev->name, sizeof(layer->name));
return 0;
err:
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 23267c8db7c4..52509e185960 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -268,14 +268,14 @@ static int caif_connect_req_to_link_param(struct cfcnfg *cnfg,
case CAIFPROTO_RFM:
l->linktype = CFCTRL_SRV_RFM;
l->u.datagram.connid = s->sockaddr.u.rfm.connection_id;
- strlcpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume,
+ strscpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume,
sizeof(l->u.rfm.volume));
break;
case CAIFPROTO_UTIL:
l->linktype = CFCTRL_SRV_UTIL;
l->endpoint = 0x00;
l->chtype = 0x00;
- strlcpy(l->u.utility.name, s->sockaddr.u.util.service,
+ strscpy(l->u.utility.name, s->sockaddr.u.util.service,
sizeof(l->u.utility.name));
caif_assert(sizeof(l->u.utility.name) > 10);
l->u.utility.paramlen = s->param.size;
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 2809cbd6b7f7..cc405d8c7c30 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -258,7 +258,7 @@ int cfctrl_linkup_request(struct cflayer *layer,
tmp16 = cpu_to_le16(param->u.utility.fifosize_bufs);
cfpkt_add_body(pkt, &tmp16, 2);
memset(utility_name, 0, sizeof(utility_name));
- strlcpy(utility_name, param->u.utility.name,
+ strscpy(utility_name, param->u.utility.name,
UTILITY_NAME_LENGTH);
cfpkt_add_body(pkt, utility_name, UTILITY_NAME_LENGTH);
tmp8 = param->u.utility.paramlen;
diff --git a/net/core/dev.c b/net/core/dev.c
index 56c8b0921c9f..d66c73c1c734 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1100,7 +1100,7 @@ static int dev_alloc_name_ns(struct net *net,
BUG_ON(!net);
ret = __dev_alloc_name(net, name, buf);
if (ret >= 0)
- strlcpy(dev->name, buf, IFNAMSIZ);
+ strscpy(dev->name, buf, IFNAMSIZ);
return ret;
}
@@ -1137,7 +1137,7 @@ static int dev_get_valid_name(struct net *net, struct net_device *dev,
else if (netdev_name_in_use(net, name))
return -EEXIST;
else if (dev->name != name)
- strlcpy(dev->name, name, IFNAMSIZ);
+ strscpy(dev->name, name, IFNAMSIZ);
return 0;
}
@@ -10370,9 +10370,7 @@ void netdev_run_todo(void)
BUG_ON(!list_empty(&dev->ptype_specific));
WARN_ON(rcu_access_pointer(dev->ip_ptr));
WARN_ON(rcu_access_pointer(dev->ip6_ptr));
-#if IS_ENABLED(CONFIG_DECNET)
- WARN_ON(dev->dn_ptr);
-#endif
+
if (dev->priv_destructor)
dev->priv_destructor(dev);
if (dev->needs_free_netdev)
diff --git a/net/core/devlink.c b/net/core/devlink.c
index b50bcc18b8d9..0f7078db1280 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4742,10 +4742,76 @@ void devlink_flash_update_timeout_notify(struct devlink *devlink,
}
EXPORT_SYMBOL_GPL(devlink_flash_update_timeout_notify);
+struct devlink_info_req {
+ struct sk_buff *msg;
+ void (*version_cb)(const char *version_name,
+ enum devlink_info_version_type version_type,
+ void *version_cb_priv);
+ void *version_cb_priv;
+};
+
+struct devlink_flash_component_lookup_ctx {
+ const char *lookup_name;
+ bool lookup_name_found;
+};
+
+static void
+devlink_flash_component_lookup_cb(const char *version_name,
+ enum devlink_info_version_type version_type,
+ void *version_cb_priv)
+{
+ struct devlink_flash_component_lookup_ctx *lookup_ctx = version_cb_priv;
+
+ if (version_type != DEVLINK_INFO_VERSION_TYPE_COMPONENT ||
+ lookup_ctx->lookup_name_found)
+ return;
+
+ lookup_ctx->lookup_name_found =
+ !strcmp(lookup_ctx->lookup_name, version_name);
+}
+
+static int devlink_flash_component_get(struct devlink *devlink,
+ struct nlattr *nla_component,
+ const char **p_component,
+ struct netlink_ext_ack *extack)
+{
+ struct devlink_flash_component_lookup_ctx lookup_ctx = {};
+ struct devlink_info_req req = {};
+ const char *component;
+ int ret;
+
+ if (!nla_component)
+ return 0;
+
+ component = nla_data(nla_component);
+
+ if (!devlink->ops->info_get) {
+ NL_SET_ERR_MSG_ATTR(extack, nla_component,
+ "component update is not supported by this device");
+ return -EOPNOTSUPP;
+ }
+
+ lookup_ctx.lookup_name = component;
+ req.version_cb = devlink_flash_component_lookup_cb;
+ req.version_cb_priv = &lookup_ctx;
+
+ ret = devlink->ops->info_get(devlink, &req, NULL);
+ if (ret)
+ return ret;
+
+ if (!lookup_ctx.lookup_name_found) {
+ NL_SET_ERR_MSG_ATTR(extack, nla_component,
+ "selected component is not supported by this device");
+ return -EINVAL;
+ }
+ *p_component = component;
+ return 0;
+}
+
static int devlink_nl_cmd_flash_update(struct sk_buff *skb,
struct genl_info *info)
{
- struct nlattr *nla_component, *nla_overwrite_mask, *nla_file_name;
+ struct nlattr *nla_overwrite_mask, *nla_file_name;
struct devlink_flash_update_params params = {};
struct devlink *devlink = info->user_ptr[0];
const char *file_name;
@@ -4758,17 +4824,13 @@ static int devlink_nl_cmd_flash_update(struct sk_buff *skb,
if (!info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME])
return -EINVAL;
- supported_params = devlink->ops->supported_flash_update_params;
+ ret = devlink_flash_component_get(devlink,
+ info->attrs[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT],
+ &params.component, info->extack);
+ if (ret)
+ return ret;
- nla_component = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT];
- if (nla_component) {
- if (!(supported_params & DEVLINK_SUPPORT_FLASH_UPDATE_COMPONENT)) {
- NL_SET_ERR_MSG_ATTR(info->extack, nla_component,
- "component update is not supported by this device");
- return -EOPNOTSUPP;
- }
- params.component = nla_data(nla_component);
- }
+ supported_params = devlink->ops->supported_flash_update_params;
nla_overwrite_mask = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK];
if (nla_overwrite_mask) {
@@ -6553,18 +6615,18 @@ out_unlock:
return err;
}
-struct devlink_info_req {
- struct sk_buff *msg;
-};
-
int devlink_info_driver_name_put(struct devlink_info_req *req, const char *name)
{
+ if (!req->msg)
+ return 0;
return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME, name);
}
EXPORT_SYMBOL_GPL(devlink_info_driver_name_put);
int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn)
{
+ if (!req->msg)
+ return 0;
return nla_put_string(req->msg, DEVLINK_ATTR_INFO_SERIAL_NUMBER, sn);
}
EXPORT_SYMBOL_GPL(devlink_info_serial_number_put);
@@ -6572,6 +6634,8 @@ EXPORT_SYMBOL_GPL(devlink_info_serial_number_put);
int devlink_info_board_serial_number_put(struct devlink_info_req *req,
const char *bsn)
{
+ if (!req->msg)
+ return 0;
return nla_put_string(req->msg, DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER,
bsn);
}
@@ -6579,11 +6643,19 @@ EXPORT_SYMBOL_GPL(devlink_info_board_serial_number_put);
static int devlink_info_version_put(struct devlink_info_req *req, int attr,
const char *version_name,
- const char *version_value)
+ const char *version_value,
+ enum devlink_info_version_type version_type)
{
struct nlattr *nest;
int err;
+ if (req->version_cb)
+ req->version_cb(version_name, version_type,
+ req->version_cb_priv);
+
+ if (!req->msg)
+ return 0;
+
nest = nla_nest_start_noflag(req->msg, attr);
if (!nest)
return -EMSGSIZE;
@@ -6612,7 +6684,8 @@ int devlink_info_version_fixed_put(struct devlink_info_req *req,
const char *version_value)
{
return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_FIXED,
- version_name, version_value);
+ version_name, version_value,
+ DEVLINK_INFO_VERSION_TYPE_NONE);
}
EXPORT_SYMBOL_GPL(devlink_info_version_fixed_put);
@@ -6621,25 +6694,49 @@ int devlink_info_version_stored_put(struct devlink_info_req *req,
const char *version_value)
{
return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED,
- version_name, version_value);
+ version_name, version_value,
+ DEVLINK_INFO_VERSION_TYPE_NONE);
}
EXPORT_SYMBOL_GPL(devlink_info_version_stored_put);
+int devlink_info_version_stored_put_ext(struct devlink_info_req *req,
+ const char *version_name,
+ const char *version_value,
+ enum devlink_info_version_type version_type)
+{
+ return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED,
+ version_name, version_value,
+ version_type);
+}
+EXPORT_SYMBOL_GPL(devlink_info_version_stored_put_ext);
+
int devlink_info_version_running_put(struct devlink_info_req *req,
const char *version_name,
const char *version_value)
{
return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING,
- version_name, version_value);
+ version_name, version_value,
+ DEVLINK_INFO_VERSION_TYPE_NONE);
}
EXPORT_SYMBOL_GPL(devlink_info_version_running_put);
+int devlink_info_version_running_put_ext(struct devlink_info_req *req,
+ const char *version_name,
+ const char *version_value,
+ enum devlink_info_version_type version_type)
+{
+ return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING,
+ version_name, version_value,
+ version_type);
+}
+EXPORT_SYMBOL_GPL(devlink_info_version_running_put_ext);
+
static int
devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink,
enum devlink_command cmd, u32 portid,
u32 seq, int flags, struct netlink_ext_ack *extack)
{
- struct devlink_info_req req;
+ struct devlink_info_req req = {};
void *hdr;
int err;
@@ -12306,8 +12403,8 @@ EXPORT_SYMBOL_GPL(devl_trap_policers_unregister);
static void __devlink_compat_running_version(struct devlink *devlink,
char *buf, size_t len)
{
+ struct devlink_info_req req = {};
const struct nlattr *nlattr;
- struct devlink_info_req req;
struct sk_buff *msg;
int rem, err;
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 75501e1bdd25..876664fc605e 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -464,7 +464,7 @@ net_dm_hw_trap_summary_probe(void *ignore, const struct devlink *devlink,
goto out;
hw_entry = &hw_entries->entries[hw_entries->num_entries];
- strlcpy(hw_entry->trap_name, metadata->trap_name,
+ strscpy(hw_entry->trap_name, metadata->trap_name,
NET_DM_MAX_HW_TRAP_NAME_LEN - 1);
hw_entry->count = 1;
hw_entries->num_entries++;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 78cc8fb68814..e93edb810103 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1853,9 +1853,6 @@ static struct neigh_table *neigh_find_table(int family)
case AF_INET6:
tbl = neigh_tables[NEIGH_ND_TABLE];
break;
- case AF_DECnet:
- tbl = neigh_tables[NEIGH_DN_TABLE];
- break;
}
return tbl;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 5d27067b72d5..9be762e1d042 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -556,7 +556,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
if ((delim = strchr(cur, ',')) == NULL)
goto parse_failed;
*delim = 0;
- strlcpy(np->dev_name, cur, sizeof(np->dev_name));
+ strscpy(np->dev_name, cur, sizeof(np->dev_name));
cur = delim;
}
cur++;
@@ -610,7 +610,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
int err;
np->dev = ndev;
- strlcpy(np->dev_name, ndev->name, IFNAMSIZ);
+ strscpy(np->dev_name, ndev->name, IFNAMSIZ);
if (ndev->priv_flags & IFF_DISABLE_NETPOLL) {
np_err(np, "%s doesn't support polling, aborting\n",
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 84bb5e188d0d..48ecfbf29174 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -777,9 +777,10 @@ EXPORT_SYMBOL(__kfree_skb);
* hit zero. Meanwhile, pass the drop reason to 'kfree_skb'
* tracepoint.
*/
-void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
+void __fix_address
+kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
{
- if (!skb_unref(skb))
+ if (unlikely(!skb_unref(skb)))
return;
DEBUG_NET_WARN_ON_ONCE(reason <= 0 || reason >= SKB_DROP_REASON_MAX);
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 9a9fb9487d63..a660baedd9e7 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -41,7 +41,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
attr->map_flags & ~SOCK_CREATE_FLAG_MASK)
return ERR_PTR(-EINVAL);
- stab = kzalloc(sizeof(*stab), GFP_USER | __GFP_ACCOUNT);
+ stab = bpf_map_area_alloc(sizeof(*stab), NUMA_NO_NODE);
if (!stab)
return ERR_PTR(-ENOMEM);
@@ -52,7 +52,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
sizeof(struct sock *),
stab->map.numa_node);
if (!stab->sks) {
- kfree(stab);
+ bpf_map_area_free(stab);
return ERR_PTR(-ENOMEM);
}
@@ -361,7 +361,7 @@ static void sock_map_free(struct bpf_map *map)
synchronize_rcu();
bpf_map_area_free(stab->sks);
- kfree(stab);
+ bpf_map_area_free(stab);
}
static void sock_map_release_progs(struct bpf_map *map)
@@ -1085,7 +1085,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
if (attr->key_size > MAX_BPF_STACK)
return ERR_PTR(-E2BIG);
- htab = kzalloc(sizeof(*htab), GFP_USER | __GFP_ACCOUNT);
+ htab = bpf_map_area_alloc(sizeof(*htab), NUMA_NO_NODE);
if (!htab)
return ERR_PTR(-ENOMEM);
@@ -1115,7 +1115,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
return &htab->map;
free_htab:
- kfree(htab);
+ bpf_map_area_free(htab);
return ERR_PTR(err);
}
@@ -1168,7 +1168,7 @@ static void sock_hash_free(struct bpf_map *map)
synchronize_rcu();
bpf_map_area_free(htab->buckets);
- kfree(htab);
+ bpf_map_area_free(htab);
}
static void *sock_hash_lookup_sys(struct bpf_map *map, void *key)
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index da6e3b20cd75..6a6e121dc00c 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -45,10 +45,11 @@ static unsigned int dccp_v4_pernet_id __read_mostly;
int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
+ struct inet_bind_hashbucket *prev_addr_hashbucket = NULL;
+ __be32 daddr, nexthop, prev_sk_rcv_saddr;
struct inet_sock *inet = inet_sk(sk);
struct dccp_sock *dp = dccp_sk(sk);
__be16 orig_sport, orig_dport;
- __be32 daddr, nexthop;
struct flowi4 *fl4;
struct rtable *rt;
int err;
@@ -89,9 +90,29 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (inet_opt == NULL || !inet_opt->opt.srr)
daddr = fl4->daddr;
- if (inet->inet_saddr == 0)
+ if (inet->inet_saddr == 0) {
+ if (inet_csk(sk)->icsk_bind2_hash) {
+ prev_addr_hashbucket =
+ inet_bhashfn_portaddr(&dccp_hashinfo, sk,
+ sock_net(sk),
+ inet->inet_num);
+ prev_sk_rcv_saddr = sk->sk_rcv_saddr;
+ }
inet->inet_saddr = fl4->saddr;
+ }
+
sk_rcv_saddr_set(sk, inet->inet_saddr);
+
+ if (prev_addr_hashbucket) {
+ err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk);
+ if (err) {
+ inet->inet_saddr = 0;
+ sk_rcv_saddr_set(sk, prev_sk_rcv_saddr);
+ ip_rt_put(rt);
+ return err;
+ }
+ }
+
inet->inet_dport = usin->sin_port;
sk_daddr_set(sk, daddr);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index fd44638ec16b..e57b43006074 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -934,8 +934,26 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
}
if (saddr == NULL) {
+ struct inet_bind_hashbucket *prev_addr_hashbucket = NULL;
+ struct in6_addr prev_v6_rcv_saddr;
+
+ if (icsk->icsk_bind2_hash) {
+ prev_addr_hashbucket = inet_bhashfn_portaddr(&dccp_hashinfo,
+ sk, sock_net(sk),
+ inet->inet_num);
+ prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
+ }
+
saddr = &fl6.saddr;
sk->sk_v6_rcv_saddr = *saddr;
+
+ if (prev_addr_hashbucket) {
+ err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk);
+ if (err) {
+ sk->sk_v6_rcv_saddr = prev_v6_rcv_saddr;
+ goto failure;
+ }
+ }
}
/* set the source address */
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index e13641c65f88..7cd4a6cc99fc 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1120,6 +1120,12 @@ static int __init dccp_init(void)
SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
if (!dccp_hashinfo.bind_bucket_cachep)
goto out_free_hashinfo2;
+ dccp_hashinfo.bind2_bucket_cachep =
+ kmem_cache_create("dccp_bind2_bucket",
+ sizeof(struct inet_bind2_bucket), 0,
+ SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
+ if (!dccp_hashinfo.bind2_bucket_cachep)
+ goto out_free_bind_bucket_cachep;
/*
* Size and allocate the main established and bind bucket
@@ -1150,7 +1156,7 @@ static int __init dccp_init(void)
if (!dccp_hashinfo.ehash) {
DCCP_CRIT("Failed to allocate DCCP established hash table");
- goto out_free_bind_bucket_cachep;
+ goto out_free_bind2_bucket_cachep;
}
for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
@@ -1176,14 +1182,24 @@ static int __init dccp_init(void)
goto out_free_dccp_locks;
}
+ dccp_hashinfo.bhash2 = (struct inet_bind_hashbucket *)
+ __get_free_pages(GFP_ATOMIC | __GFP_NOWARN, bhash_order);
+
+ if (!dccp_hashinfo.bhash2) {
+ DCCP_CRIT("Failed to allocate DCCP bind2 hash table");
+ goto out_free_dccp_bhash;
+ }
+
for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
spin_lock_init(&dccp_hashinfo.bhash[i].lock);
INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
+ spin_lock_init(&dccp_hashinfo.bhash2[i].lock);
+ INIT_HLIST_HEAD(&dccp_hashinfo.bhash2[i].chain);
}
rc = dccp_mib_init();
if (rc)
- goto out_free_dccp_bhash;
+ goto out_free_dccp_bhash2;
rc = dccp_ackvec_init();
if (rc)
@@ -1207,30 +1223,38 @@ out_ackvec_exit:
dccp_ackvec_exit();
out_free_dccp_mib:
dccp_mib_exit();
+out_free_dccp_bhash2:
+ free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
out_free_dccp_bhash:
free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
out_free_dccp_locks:
inet_ehash_locks_free(&dccp_hashinfo);
out_free_dccp_ehash:
free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
+out_free_bind2_bucket_cachep:
+ kmem_cache_destroy(dccp_hashinfo.bind2_bucket_cachep);
out_free_bind_bucket_cachep:
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
out_free_hashinfo2:
inet_hashinfo2_free_mod(&dccp_hashinfo);
out_fail:
dccp_hashinfo.bhash = NULL;
+ dccp_hashinfo.bhash2 = NULL;
dccp_hashinfo.ehash = NULL;
dccp_hashinfo.bind_bucket_cachep = NULL;
+ dccp_hashinfo.bind2_bucket_cachep = NULL;
return rc;
}
static void __exit dccp_fini(void)
{
+ int bhash_order = get_order(dccp_hashinfo.bhash_size *
+ sizeof(struct inet_bind_hashbucket));
+
ccid_cleanup_builtins();
dccp_mib_exit();
- free_pages((unsigned long)dccp_hashinfo.bhash,
- get_order(dccp_hashinfo.bhash_size *
- sizeof(struct inet_bind_hashbucket)));
+ free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
+ free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
free_pages((unsigned long)dccp_hashinfo.ehash,
get_order((dccp_hashinfo.ehash_mask + 1) *
sizeof(struct inet_ehash_bucket)));
diff --git a/net/decnet/Kconfig b/net/decnet/Kconfig
deleted file mode 100644
index 24336bdb1054..000000000000
--- a/net/decnet/Kconfig
+++ /dev/null
@@ -1,43 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# DECnet configuration
-#
-config DECNET
- tristate "DECnet Support"
- help
- The DECnet networking protocol was used in many products made by
- Digital (now Compaq). It provides reliable stream and sequenced
- packet communications over which run a variety of services similar
- to those which run over TCP/IP.
-
- To find some tools to use with the kernel layer support, please
- look at Patrick Caulfield's web site:
- <http://linux-decnet.sourceforge.net/>.
-
- More detailed documentation is available in
- <file:Documentation/networking/decnet.rst>.
-
- Be sure to say Y to "/proc file system support" and "Sysctl support"
- below when using DECnet, since you will need sysctl support to aid
- in configuration at run time.
-
- The DECnet code is also available as a module ( = code which can be
- inserted in and removed from the running kernel whenever you want).
- The module is called decnet.
-
-config DECNET_ROUTER
- bool "DECnet: router support"
- depends on DECNET
- select FIB_RULES
- help
- Add support for turning your DECnet Endnode into a level 1 or 2
- router. This is an experimental, but functional option. If you
- do say Y here, then make sure that you also say Y to "Kernel/User
- network link driver", "Routing messages" and "Network packet
- filtering". The first two are required to allow configuration via
- rtnetlink (you will need Alexey Kuznetsov's iproute2 package
- from <ftp://ftp.tux.org/pub/net/ip-routing/>). The "Network packet
- filtering" option will be required for the forthcoming routing daemon
- to work.
-
- See <file:Documentation/networking/decnet.rst> for more information.
diff --git a/net/decnet/Makefile b/net/decnet/Makefile
deleted file mode 100644
index 07b38e441b2d..000000000000
--- a/net/decnet/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-obj-$(CONFIG_DECNET) += decnet.o
-
-decnet-y := af_decnet.o dn_nsp_in.o dn_nsp_out.o \
- dn_route.o dn_dev.o dn_neigh.o dn_timer.o
-decnet-$(CONFIG_DECNET_ROUTER) += dn_fib.o dn_rules.o dn_table.o
-decnet-y += sysctl_net_decnet.o
-
-obj-$(CONFIG_NETFILTER) += netfilter/
diff --git a/net/decnet/README b/net/decnet/README
deleted file mode 100644
index 60e7ec88c81f..000000000000
--- a/net/decnet/README
+++ /dev/null
@@ -1,8 +0,0 @@
- Linux DECnet Project
- ======================
-
-The documentation for this kernel subsystem is available in the
-Documentation/networking subdirectory of this distribution and also
-on line at http://www.chygwyn.com/DECnet/
-
-Steve Whitehouse <SteveW@ACM.org>
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
deleted file mode 100644
index 6582dfdfb932..000000000000
--- a/net/decnet/af_decnet.c
+++ /dev/null
@@ -1,2404 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Socket Layer Interface
- *
- * Authors: Eduardo Marcelo Serrat <emserrat@geocities.com>
- * Patrick Caulfield <patrick@pandh.demon.co.uk>
- *
- * Changes:
- * Steve Whitehouse: Copied from Eduardo Serrat and Patrick Caulfield's
- * version of the code. Original copyright preserved
- * below.
- * Steve Whitehouse: Some bug fixes, cleaning up some code to make it
- * compatible with my routing layer.
- * Steve Whitehouse: Merging changes from Eduardo Serrat and Patrick
- * Caulfield.
- * Steve Whitehouse: Further bug fixes, checking module code still works
- * with new routing layer.
- * Steve Whitehouse: Additional set/get_sockopt() calls.
- * Steve Whitehouse: Fixed TIOCINQ ioctl to be same as Eduardo's new
- * code.
- * Steve Whitehouse: recvmsg() changed to try and behave in a POSIX like
- * way. Didn't manage it entirely, but its better.
- * Steve Whitehouse: ditto for sendmsg().
- * Steve Whitehouse: A selection of bug fixes to various things.
- * Steve Whitehouse: Added TIOCOUTQ ioctl.
- * Steve Whitehouse: Fixes to username2sockaddr & sockaddr2username.
- * Steve Whitehouse: Fixes to connect() error returns.
- * Patrick Caulfield: Fixes to delayed acceptance logic.
- * David S. Miller: New socket locking
- * Steve Whitehouse: Socket list hashing/locking
- * Arnaldo C. Melo: use capable, not suser
- * Steve Whitehouse: Removed unused code. Fix to use sk->allocation
- * when required.
- * Patrick Caulfield: /proc/net/decnet now has object name/number
- * Steve Whitehouse: Fixed local port allocation, hashed sk list
- * Matthew Wilcox: Fixes for dn_ioctl()
- * Steve Whitehouse: New connect/accept logic to allow timeouts and
- * prepare for sendpage etc.
- */
-
-
-/******************************************************************************
- (c) 1995-1998 E.M. Serrat emserrat@geocities.com
-
-
-HISTORY:
-
-Version Kernel Date Author/Comments
-------- ------ ---- ---------------
-Version 0.0.1 2.0.30 01-dic-97 Eduardo Marcelo Serrat
- (emserrat@geocities.com)
-
- First Development of DECnet Socket La-
- yer for Linux. Only supports outgoing
- connections.
-
-Version 0.0.2 2.1.105 20-jun-98 Patrick J. Caulfield
- (patrick@pandh.demon.co.uk)
-
- Port to new kernel development version.
-
-Version 0.0.3 2.1.106 25-jun-98 Eduardo Marcelo Serrat
- (emserrat@geocities.com)
- _
- Added support for incoming connections
- so we can start developing server apps
- on Linux.
- -
- Module Support
-Version 0.0.4 2.1.109 21-jul-98 Eduardo Marcelo Serrat
- (emserrat@geocities.com)
- _
- Added support for X11R6.4. Now we can
- use DECnet transport for X on Linux!!!
- -
-Version 0.0.5 2.1.110 01-aug-98 Eduardo Marcelo Serrat
- (emserrat@geocities.com)
- Removed bugs on flow control
- Removed bugs on incoming accessdata
- order
- -
-Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat
- dn_recvmsg fixes
-
- Patrick J. Caulfield
- dn_bind fixes
-*******************************************************************************/
-
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/sched/signal.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/netdevice.h>
-#include <linux/inet.h>
-#include <linux/route.h>
-#include <linux/netfilter.h>
-#include <linux/seq_file.h>
-#include <net/sock.h>
-#include <net/tcp_states.h>
-#include <net/flow.h>
-#include <asm/ioctls.h>
-#include <linux/capability.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/proc_fs.h>
-#include <linux/stat.h>
-#include <linux/init.h>
-#include <linux/poll.h>
-#include <linux/jiffies.h>
-#include <net/net_namespace.h>
-#include <net/neighbour.h>
-#include <net/dst.h>
-#include <net/fib_rules.h>
-#include <net/tcp.h>
-#include <net/dn.h>
-#include <net/dn_nsp.h>
-#include <net/dn_dev.h>
-#include <net/dn_route.h>
-#include <net/dn_fib.h>
-#include <net/dn_neigh.h>
-
-struct dn_sock {
- struct sock sk;
- struct dn_scp scp;
-};
-
-static void dn_keepalive(struct sock *sk);
-
-#define DN_SK_HASH_SHIFT 8
-#define DN_SK_HASH_SIZE (1 << DN_SK_HASH_SHIFT)
-#define DN_SK_HASH_MASK (DN_SK_HASH_SIZE - 1)
-
-
-static const struct proto_ops dn_proto_ops;
-static DEFINE_RWLOCK(dn_hash_lock);
-static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE];
-static struct hlist_head dn_wild_sk;
-static atomic_long_t decnet_memory_allocated;
-static DEFINE_PER_CPU(int, decnet_memory_per_cpu_fw_alloc);
-
-static int __dn_setsockopt(struct socket *sock, int level, int optname,
- sockptr_t optval, unsigned int optlen, int flags);
-static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags);
-
-static struct hlist_head *dn_find_list(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- if (scp->addr.sdn_flags & SDF_WILD)
- return hlist_empty(&dn_wild_sk) ? &dn_wild_sk : NULL;
-
- return &dn_sk_hash[le16_to_cpu(scp->addrloc) & DN_SK_HASH_MASK];
-}
-
-/*
- * Valid ports are those greater than zero and not already in use.
- */
-static int check_port(__le16 port)
-{
- struct sock *sk;
-
- if (port == 0)
- return -1;
-
- sk_for_each(sk, &dn_sk_hash[le16_to_cpu(port) & DN_SK_HASH_MASK]) {
- struct dn_scp *scp = DN_SK(sk);
- if (scp->addrloc == port)
- return -1;
- }
- return 0;
-}
-
-static unsigned short port_alloc(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
- static unsigned short port = 0x2000;
- unsigned short i_port = port;
-
- while(check_port(cpu_to_le16(++port)) != 0) {
- if (port == i_port)
- return 0;
- }
-
- scp->addrloc = cpu_to_le16(port);
-
- return 1;
-}
-
-/*
- * Since this is only ever called from user
- * level, we don't need a write_lock() version
- * of this.
- */
-static int dn_hash_sock(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
- struct hlist_head *list;
- int rv = -EUSERS;
-
- BUG_ON(sk_hashed(sk));
-
- write_lock_bh(&dn_hash_lock);
-
- if (!scp->addrloc && !port_alloc(sk))
- goto out;
-
- rv = -EADDRINUSE;
- if ((list = dn_find_list(sk)) == NULL)
- goto out;
-
- sk_add_node(sk, list);
- rv = 0;
-out:
- write_unlock_bh(&dn_hash_lock);
- return rv;
-}
-
-static void dn_unhash_sock(struct sock *sk)
-{
- write_lock(&dn_hash_lock);
- sk_del_node_init(sk);
- write_unlock(&dn_hash_lock);
-}
-
-static void dn_unhash_sock_bh(struct sock *sk)
-{
- write_lock_bh(&dn_hash_lock);
- sk_del_node_init(sk);
- write_unlock_bh(&dn_hash_lock);
-}
-
-static struct hlist_head *listen_hash(struct sockaddr_dn *addr)
-{
- int i;
- unsigned int hash = addr->sdn_objnum;
-
- if (hash == 0) {
- hash = addr->sdn_objnamel;
- for(i = 0; i < le16_to_cpu(addr->sdn_objnamel); i++) {
- hash ^= addr->sdn_objname[i];
- hash ^= (hash << 3);
- }
- }
-
- return &dn_sk_hash[hash & DN_SK_HASH_MASK];
-}
-
-/*
- * Called to transform a socket from bound (i.e. with a local address)
- * into a listening socket (doesn't need a local port number) and rehashes
- * based upon the object name/number.
- */
-static void dn_rehash_sock(struct sock *sk)
-{
- struct hlist_head *list;
- struct dn_scp *scp = DN_SK(sk);
-
- if (scp->addr.sdn_flags & SDF_WILD)
- return;
-
- write_lock_bh(&dn_hash_lock);
- sk_del_node_init(sk);
- DN_SK(sk)->addrloc = 0;
- list = listen_hash(&DN_SK(sk)->addr);
- sk_add_node(sk, list);
- write_unlock_bh(&dn_hash_lock);
-}
-
-int dn_sockaddr2username(struct sockaddr_dn *sdn, unsigned char *buf, unsigned char type)
-{
- int len = 2;
-
- *buf++ = type;
-
- switch (type) {
- case 0:
- *buf++ = sdn->sdn_objnum;
- break;
- case 1:
- *buf++ = 0;
- *buf++ = le16_to_cpu(sdn->sdn_objnamel);
- memcpy(buf, sdn->sdn_objname, le16_to_cpu(sdn->sdn_objnamel));
- len = 3 + le16_to_cpu(sdn->sdn_objnamel);
- break;
- case 2:
- memset(buf, 0, 5);
- buf += 5;
- *buf++ = le16_to_cpu(sdn->sdn_objnamel);
- memcpy(buf, sdn->sdn_objname, le16_to_cpu(sdn->sdn_objnamel));
- len = 7 + le16_to_cpu(sdn->sdn_objnamel);
- break;
- }
-
- return len;
-}
-
-/*
- * On reception of usernames, we handle types 1 and 0 for destination
- * addresses only. Types 2 and 4 are used for source addresses, but the
- * UIC, GIC are ignored and they are both treated the same way. Type 3
- * is never used as I've no idea what its purpose might be or what its
- * format is.
- */
-int dn_username2sockaddr(unsigned char *data, int len, struct sockaddr_dn *sdn, unsigned char *fmt)
-{
- unsigned char type;
- int size = len;
- int namel = 12;
-
- sdn->sdn_objnum = 0;
- sdn->sdn_objnamel = cpu_to_le16(0);
- memset(sdn->sdn_objname, 0, DN_MAXOBJL);
-
- if (len < 2)
- return -1;
-
- len -= 2;
- *fmt = *data++;
- type = *data++;
-
- switch (*fmt) {
- case 0:
- sdn->sdn_objnum = type;
- return 2;
- case 1:
- namel = 16;
- break;
- case 2:
- len -= 4;
- data += 4;
- break;
- case 4:
- len -= 8;
- data += 8;
- break;
- default:
- return -1;
- }
-
- len -= 1;
-
- if (len < 0)
- return -1;
-
- sdn->sdn_objnamel = cpu_to_le16(*data++);
- len -= le16_to_cpu(sdn->sdn_objnamel);
-
- if ((len < 0) || (le16_to_cpu(sdn->sdn_objnamel) > namel))
- return -1;
-
- memcpy(sdn->sdn_objname, data, le16_to_cpu(sdn->sdn_objnamel));
-
- return size - len;
-}
-
-struct sock *dn_sklist_find_listener(struct sockaddr_dn *addr)
-{
- struct hlist_head *list = listen_hash(addr);
- struct sock *sk;
-
- read_lock(&dn_hash_lock);
- sk_for_each(sk, list) {
- struct dn_scp *scp = DN_SK(sk);
- if (sk->sk_state != TCP_LISTEN)
- continue;
- if (scp->addr.sdn_objnum) {
- if (scp->addr.sdn_objnum != addr->sdn_objnum)
- continue;
- } else {
- if (addr->sdn_objnum)
- continue;
- if (scp->addr.sdn_objnamel != addr->sdn_objnamel)
- continue;
- if (memcmp(scp->addr.sdn_objname, addr->sdn_objname, le16_to_cpu(addr->sdn_objnamel)) != 0)
- continue;
- }
- sock_hold(sk);
- read_unlock(&dn_hash_lock);
- return sk;
- }
-
- sk = sk_head(&dn_wild_sk);
- if (sk) {
- if (sk->sk_state == TCP_LISTEN)
- sock_hold(sk);
- else
- sk = NULL;
- }
-
- read_unlock(&dn_hash_lock);
- return sk;
-}
-
-struct sock *dn_find_by_skb(struct sk_buff *skb)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- struct sock *sk;
- struct dn_scp *scp;
-
- read_lock(&dn_hash_lock);
- sk_for_each(sk, &dn_sk_hash[le16_to_cpu(cb->dst_port) & DN_SK_HASH_MASK]) {
- scp = DN_SK(sk);
- if (cb->src != dn_saddr2dn(&scp->peer))
- continue;
- if (cb->dst_port != scp->addrloc)
- continue;
- if (scp->addrrem && (cb->src_port != scp->addrrem))
- continue;
- sock_hold(sk);
- goto found;
- }
- sk = NULL;
-found:
- read_unlock(&dn_hash_lock);
- return sk;
-}
-
-
-
-static void dn_destruct(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- skb_queue_purge(&scp->data_xmit_queue);
- skb_queue_purge(&scp->other_xmit_queue);
- skb_queue_purge(&scp->other_receive_queue);
-
- dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1));
-}
-
-static unsigned long dn_memory_pressure;
-
-static void dn_enter_memory_pressure(struct sock *sk)
-{
- if (!dn_memory_pressure) {
- dn_memory_pressure = 1;
- }
-}
-
-static struct proto dn_proto = {
- .name = "NSP",
- .owner = THIS_MODULE,
- .enter_memory_pressure = dn_enter_memory_pressure,
- .memory_pressure = &dn_memory_pressure,
-
- .memory_allocated = &decnet_memory_allocated,
- .per_cpu_fw_alloc = &decnet_memory_per_cpu_fw_alloc,
-
- .sysctl_mem = sysctl_decnet_mem,
- .sysctl_wmem = sysctl_decnet_wmem,
- .sysctl_rmem = sysctl_decnet_rmem,
- .max_header = DN_MAX_NSP_DATA_HEADER + 64,
- .obj_size = sizeof(struct dn_sock),
-};
-
-static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gfp, int kern)
-{
- struct dn_scp *scp;
- struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto, kern);
-
- if (!sk)
- goto out;
-
- if (sock)
- sock->ops = &dn_proto_ops;
- sock_init_data(sock, sk);
-
- sk->sk_backlog_rcv = dn_nsp_backlog_rcv;
- sk->sk_destruct = dn_destruct;
- sk->sk_no_check_tx = 1;
- sk->sk_family = PF_DECnet;
- sk->sk_protocol = 0;
- sk->sk_allocation = gfp;
- sk->sk_sndbuf = READ_ONCE(sysctl_decnet_wmem[1]);
- sk->sk_rcvbuf = READ_ONCE(sysctl_decnet_rmem[1]);
-
- /* Initialization of DECnet Session Control Port */
- scp = DN_SK(sk);
- scp->state = DN_O; /* Open */
- scp->numdat = 1; /* Next data seg to tx */
- scp->numoth = 1; /* Next oth data to tx */
- scp->ackxmt_dat = 0; /* Last data seg ack'ed */
- scp->ackxmt_oth = 0; /* Last oth data ack'ed */
- scp->ackrcv_dat = 0; /* Highest data ack recv*/
- scp->ackrcv_oth = 0; /* Last oth data ack rec*/
- scp->flowrem_sw = DN_SEND;
- scp->flowloc_sw = DN_SEND;
- scp->flowrem_dat = 0;
- scp->flowrem_oth = 1;
- scp->flowloc_dat = 0;
- scp->flowloc_oth = 1;
- scp->services_rem = 0;
- scp->services_loc = 1 | NSP_FC_NONE;
- scp->info_rem = 0;
- scp->info_loc = 0x03; /* NSP version 4.1 */
- scp->segsize_rem = 230 - DN_MAX_NSP_DATA_HEADER; /* Default: Updated by remote segsize */
- scp->nonagle = 0;
- scp->multi_ireq = 1;
- scp->accept_mode = ACC_IMMED;
- scp->addr.sdn_family = AF_DECnet;
- scp->peer.sdn_family = AF_DECnet;
- scp->accessdata.acc_accl = 5;
- memcpy(scp->accessdata.acc_acc, "LINUX", 5);
-
- scp->max_window = NSP_MAX_WINDOW;
- scp->snd_window = NSP_MIN_WINDOW;
- scp->nsp_srtt = NSP_INITIAL_SRTT;
- scp->nsp_rttvar = NSP_INITIAL_RTTVAR;
- scp->nsp_rxtshift = 0;
-
- skb_queue_head_init(&scp->data_xmit_queue);
- skb_queue_head_init(&scp->other_xmit_queue);
- skb_queue_head_init(&scp->other_receive_queue);
-
- scp->persist = 0;
- scp->persist_fxn = NULL;
- scp->keepalive = 10 * HZ;
- scp->keepalive_fxn = dn_keepalive;
-
- dn_start_slow_timer(sk);
-out:
- return sk;
-}
-
-/*
- * Keepalive timer.
- * FIXME: Should respond to SO_KEEPALIVE etc.
- */
-static void dn_keepalive(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- /*
- * By checking the other_data transmit queue is empty
- * we are double checking that we are not sending too
- * many of these keepalive frames.
- */
- if (skb_queue_empty(&scp->other_xmit_queue))
- dn_nsp_send_link(sk, DN_NOCHANGE, 0);
-}
-
-
-/*
- * Timer for shutdown/destroyed sockets.
- * When socket is dead & no packets have been sent for a
- * certain amount of time, they are removed by this
- * routine. Also takes care of sending out DI & DC
- * frames at correct times.
- */
-int dn_destroy_timer(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- scp->persist = dn_nsp_persist(sk);
-
- switch (scp->state) {
- case DN_DI:
- dn_nsp_send_disc(sk, NSP_DISCINIT, 0, GFP_ATOMIC);
- if (scp->nsp_rxtshift >= decnet_di_count)
- scp->state = DN_CN;
- return 0;
-
- case DN_DR:
- dn_nsp_send_disc(sk, NSP_DISCINIT, 0, GFP_ATOMIC);
- if (scp->nsp_rxtshift >= decnet_dr_count)
- scp->state = DN_DRC;
- return 0;
-
- case DN_DN:
- if (scp->nsp_rxtshift < decnet_dn_count) {
- /* printk(KERN_DEBUG "dn_destroy_timer: DN\n"); */
- dn_nsp_send_disc(sk, NSP_DISCCONF, NSP_REASON_DC,
- GFP_ATOMIC);
- return 0;
- }
- }
-
- scp->persist = (HZ * decnet_time_wait);
-
- if (sk->sk_socket)
- return 0;
-
- if (time_after_eq(jiffies, scp->stamp + HZ * decnet_time_wait)) {
- dn_unhash_sock(sk);
- sock_put(sk);
- return 1;
- }
-
- return 0;
-}
-
-static void dn_destroy_sock(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- scp->nsp_rxtshift = 0; /* reset back off */
-
- if (sk->sk_socket) {
- if (sk->sk_socket->state != SS_UNCONNECTED)
- sk->sk_socket->state = SS_DISCONNECTING;
- }
-
- sk->sk_state = TCP_CLOSE;
-
- switch (scp->state) {
- case DN_DN:
- dn_nsp_send_disc(sk, NSP_DISCCONF, NSP_REASON_DC,
- sk->sk_allocation);
- scp->persist_fxn = dn_destroy_timer;
- scp->persist = dn_nsp_persist(sk);
- break;
- case DN_CR:
- scp->state = DN_DR;
- goto disc_reject;
- case DN_RUN:
- scp->state = DN_DI;
- fallthrough;
- case DN_DI:
- case DN_DR:
-disc_reject:
- dn_nsp_send_disc(sk, NSP_DISCINIT, 0, sk->sk_allocation);
- fallthrough;
- case DN_NC:
- case DN_NR:
- case DN_RJ:
- case DN_DIC:
- case DN_CN:
- case DN_DRC:
- case DN_CI:
- case DN_CD:
- scp->persist_fxn = dn_destroy_timer;
- scp->persist = dn_nsp_persist(sk);
- break;
- default:
- printk(KERN_DEBUG "DECnet: dn_destroy_sock passed socket in invalid state\n");
- fallthrough;
- case DN_O:
- dn_stop_slow_timer(sk);
-
- dn_unhash_sock_bh(sk);
- sock_put(sk);
-
- break;
- }
-}
-
-char *dn_addr2asc(__u16 addr, char *buf)
-{
- unsigned short node, area;
-
- node = addr & 0x03ff;
- area = addr >> 10;
- sprintf(buf, "%hd.%hd", area, node);
-
- return buf;
-}
-
-
-
-static int dn_create(struct net *net, struct socket *sock, int protocol,
- int kern)
-{
- struct sock *sk;
-
- if (protocol < 0 || protocol > U8_MAX)
- return -EINVAL;
-
- if (!net_eq(net, &init_net))
- return -EAFNOSUPPORT;
-
- switch (sock->type) {
- case SOCK_SEQPACKET:
- if (protocol != DNPROTO_NSP)
- return -EPROTONOSUPPORT;
- break;
- case SOCK_STREAM:
- break;
- default:
- return -ESOCKTNOSUPPORT;
- }
-
-
- if ((sk = dn_alloc_sock(net, sock, GFP_KERNEL, kern)) == NULL)
- return -ENOBUFS;
-
- sk->sk_protocol = protocol;
-
- return 0;
-}
-
-
-static int
-dn_release(struct socket *sock)
-{
- struct sock *sk = sock->sk;
-
- if (sk) {
- sock_orphan(sk);
- sock_hold(sk);
- lock_sock(sk);
- dn_destroy_sock(sk);
- release_sock(sk);
- sock_put(sk);
- }
-
- return 0;
-}
-
-static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
-{
- struct sock *sk = sock->sk;
- struct dn_scp *scp = DN_SK(sk);
- struct sockaddr_dn *saddr = (struct sockaddr_dn *)uaddr;
- struct net_device *dev, *ldev;
- int rv;
-
- if (addr_len != sizeof(struct sockaddr_dn))
- return -EINVAL;
-
- if (saddr->sdn_family != AF_DECnet)
- return -EINVAL;
-
- if (le16_to_cpu(saddr->sdn_nodeaddrl) && (le16_to_cpu(saddr->sdn_nodeaddrl) != 2))
- return -EINVAL;
-
- if (le16_to_cpu(saddr->sdn_objnamel) > DN_MAXOBJL)
- return -EINVAL;
-
- if (saddr->sdn_flags & ~SDF_WILD)
- return -EINVAL;
-
- if (!capable(CAP_NET_BIND_SERVICE) && (saddr->sdn_objnum ||
- (saddr->sdn_flags & SDF_WILD)))
- return -EACCES;
-
- if (!(saddr->sdn_flags & SDF_WILD)) {
- if (le16_to_cpu(saddr->sdn_nodeaddrl)) {
- rcu_read_lock();
- ldev = NULL;
- for_each_netdev_rcu(&init_net, dev) {
- if (!dev->dn_ptr)
- continue;
- if (dn_dev_islocal(dev, dn_saddr2dn(saddr))) {
- ldev = dev;
- break;
- }
- }
- rcu_read_unlock();
- if (ldev == NULL)
- return -EADDRNOTAVAIL;
- }
- }
-
- rv = -EINVAL;
- lock_sock(sk);
- if (sock_flag(sk, SOCK_ZAPPED)) {
- memcpy(&scp->addr, saddr, addr_len);
- sock_reset_flag(sk, SOCK_ZAPPED);
-
- rv = dn_hash_sock(sk);
- if (rv)
- sock_set_flag(sk, SOCK_ZAPPED);
- }
- release_sock(sk);
-
- return rv;
-}
-
-
-static int dn_auto_bind(struct socket *sock)
-{
- struct sock *sk = sock->sk;
- struct dn_scp *scp = DN_SK(sk);
- int rv;
-
- sock_reset_flag(sk, SOCK_ZAPPED);
-
- scp->addr.sdn_flags = 0;
- scp->addr.sdn_objnum = 0;
-
- /*
- * This stuff is to keep compatibility with Eduardo's
- * patch. I hope I can dispense with it shortly...
- */
- if ((scp->accessdata.acc_accl != 0) &&
- (scp->accessdata.acc_accl <= 12)) {
-
- scp->addr.sdn_objnamel = cpu_to_le16(scp->accessdata.acc_accl);
- memcpy(scp->addr.sdn_objname, scp->accessdata.acc_acc, le16_to_cpu(scp->addr.sdn_objnamel));
-
- scp->accessdata.acc_accl = 0;
- memset(scp->accessdata.acc_acc, 0, 40);
- }
- /* End of compatibility stuff */
-
- scp->addr.sdn_add.a_len = cpu_to_le16(2);
- rv = dn_dev_bind_default((__le16 *)scp->addr.sdn_add.a_addr);
- if (rv == 0) {
- rv = dn_hash_sock(sk);
- if (rv)
- sock_set_flag(sk, SOCK_ZAPPED);
- }
-
- return rv;
-}
-
-static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation)
-{
- struct dn_scp *scp = DN_SK(sk);
- DEFINE_WAIT_FUNC(wait, woken_wake_function);
- int err;
-
- if (scp->state != DN_CR)
- return -EINVAL;
-
- scp->state = DN_CC;
- scp->segsize_loc = dst_metric_advmss(__sk_dst_get(sk));
- dn_send_conn_conf(sk, allocation);
-
- add_wait_queue(sk_sleep(sk), &wait);
- for(;;) {
- release_sock(sk);
- if (scp->state == DN_CC)
- *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo);
- lock_sock(sk);
- err = 0;
- if (scp->state == DN_RUN)
- break;
- err = sock_error(sk);
- if (err)
- break;
- err = sock_intr_errno(*timeo);
- if (signal_pending(current))
- break;
- err = -EAGAIN;
- if (!*timeo)
- break;
- }
- remove_wait_queue(sk_sleep(sk), &wait);
- if (err == 0) {
- sk->sk_socket->state = SS_CONNECTED;
- } else if (scp->state != DN_CC) {
- sk->sk_socket->state = SS_UNCONNECTED;
- }
- return err;
-}
-
-static int dn_wait_run(struct sock *sk, long *timeo)
-{
- struct dn_scp *scp = DN_SK(sk);
- DEFINE_WAIT_FUNC(wait, woken_wake_function);
- int err = 0;
-
- if (scp->state == DN_RUN)
- goto out;
-
- if (!*timeo)
- return -EALREADY;
-
- add_wait_queue(sk_sleep(sk), &wait);
- for(;;) {
- release_sock(sk);
- if (scp->state == DN_CI || scp->state == DN_CC)
- *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo);
- lock_sock(sk);
- err = 0;
- if (scp->state == DN_RUN)
- break;
- err = sock_error(sk);
- if (err)
- break;
- err = sock_intr_errno(*timeo);
- if (signal_pending(current))
- break;
- err = -ETIMEDOUT;
- if (!*timeo)
- break;
- }
- remove_wait_queue(sk_sleep(sk), &wait);
-out:
- if (err == 0) {
- sk->sk_socket->state = SS_CONNECTED;
- } else if (scp->state != DN_CI && scp->state != DN_CC) {
- sk->sk_socket->state = SS_UNCONNECTED;
- }
- return err;
-}
-
-static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen, long *timeo, int flags)
-{
- struct socket *sock = sk->sk_socket;
- struct dn_scp *scp = DN_SK(sk);
- int err = -EISCONN;
- struct flowidn fld;
- struct dst_entry *dst;
-
- if (sock->state == SS_CONNECTED)
- goto out;
-
- if (sock->state == SS_CONNECTING) {
- err = 0;
- if (scp->state == DN_RUN) {
- sock->state = SS_CONNECTED;
- goto out;
- }
- err = -ECONNREFUSED;
- if (scp->state != DN_CI && scp->state != DN_CC) {
- sock->state = SS_UNCONNECTED;
- goto out;
- }
- return dn_wait_run(sk, timeo);
- }
-
- err = -EINVAL;
- if (scp->state != DN_O)
- goto out;
-
- if (addr == NULL || addrlen != sizeof(struct sockaddr_dn))
- goto out;
- if (addr->sdn_family != AF_DECnet)
- goto out;
- if (addr->sdn_flags & SDF_WILD)
- goto out;
-
- if (sock_flag(sk, SOCK_ZAPPED)) {
- err = dn_auto_bind(sk->sk_socket);
- if (err)
- goto out;
- }
-
- memcpy(&scp->peer, addr, sizeof(struct sockaddr_dn));
-
- err = -EHOSTUNREACH;
- memset(&fld, 0, sizeof(fld));
- fld.flowidn_oif = sk->sk_bound_dev_if;
- fld.daddr = dn_saddr2dn(&scp->peer);
- fld.saddr = dn_saddr2dn(&scp->addr);
- dn_sk_ports_copy(&fld, scp);
- fld.flowidn_proto = DNPROTO_NSP;
- if (dn_route_output_sock(&sk->sk_dst_cache, &fld, sk, flags) < 0)
- goto out;
- dst = __sk_dst_get(sk);
- sk->sk_route_caps = dst->dev->features;
- sock->state = SS_CONNECTING;
- scp->state = DN_CI;
- scp->segsize_loc = dst_metric_advmss(dst);
-
- dn_nsp_send_conninit(sk, NSP_CI);
- err = -EINPROGRESS;
- if (*timeo) {
- err = dn_wait_run(sk, timeo);
- }
-out:
- return err;
-}
-
-static int dn_connect(struct socket *sock, struct sockaddr *uaddr, int addrlen, int flags)
-{
- struct sockaddr_dn *addr = (struct sockaddr_dn *)uaddr;
- struct sock *sk = sock->sk;
- int err;
- long timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
-
- lock_sock(sk);
- err = __dn_connect(sk, addr, addrlen, &timeo, 0);
- release_sock(sk);
-
- return err;
-}
-
-static inline int dn_check_state(struct sock *sk, struct sockaddr_dn *addr, int addrlen, long *timeo, int flags)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- switch (scp->state) {
- case DN_RUN:
- return 0;
- case DN_CR:
- return dn_confirm_accept(sk, timeo, sk->sk_allocation);
- case DN_CI:
- case DN_CC:
- return dn_wait_run(sk, timeo);
- case DN_O:
- return __dn_connect(sk, addr, addrlen, timeo, flags);
- }
-
- return -EINVAL;
-}
-
-
-static void dn_access_copy(struct sk_buff *skb, struct accessdata_dn *acc)
-{
- unsigned char *ptr = skb->data;
-
- acc->acc_userl = *ptr++;
- memcpy(&acc->acc_user, ptr, acc->acc_userl);
- ptr += acc->acc_userl;
-
- acc->acc_passl = *ptr++;
- memcpy(&acc->acc_pass, ptr, acc->acc_passl);
- ptr += acc->acc_passl;
-
- acc->acc_accl = *ptr++;
- memcpy(&acc->acc_acc, ptr, acc->acc_accl);
-
- skb_pull(skb, acc->acc_accl + acc->acc_passl + acc->acc_userl + 3);
-
-}
-
-static void dn_user_copy(struct sk_buff *skb, struct optdata_dn *opt)
-{
- unsigned char *ptr = skb->data;
- u16 len = *ptr++; /* yes, it's 8bit on the wire */
-
- BUG_ON(len > 16); /* we've checked the contents earlier */
- opt->opt_optl = cpu_to_le16(len);
- opt->opt_status = 0;
- memcpy(opt->opt_data, ptr, len);
- skb_pull(skb, len + 1);
-}
-
-static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo)
-{
- DEFINE_WAIT_FUNC(wait, woken_wake_function);
- struct sk_buff *skb = NULL;
- int err = 0;
-
- add_wait_queue(sk_sleep(sk), &wait);
- for(;;) {
- release_sock(sk);
- skb = skb_dequeue(&sk->sk_receive_queue);
- if (skb == NULL) {
- *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo);
- skb = skb_dequeue(&sk->sk_receive_queue);
- }
- lock_sock(sk);
- if (skb != NULL)
- break;
- err = -EINVAL;
- if (sk->sk_state != TCP_LISTEN)
- break;
- err = sock_intr_errno(*timeo);
- if (signal_pending(current))
- break;
- err = -EAGAIN;
- if (!*timeo)
- break;
- }
- remove_wait_queue(sk_sleep(sk), &wait);
-
- return skb == NULL ? ERR_PTR(err) : skb;
-}
-
-static int dn_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
-{
- struct sock *sk = sock->sk, *newsk;
- struct sk_buff *skb = NULL;
- struct dn_skb_cb *cb;
- unsigned char menuver;
- int err = 0;
- unsigned char type;
- long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
- struct dst_entry *dst;
-
- lock_sock(sk);
-
- if (sk->sk_state != TCP_LISTEN || DN_SK(sk)->state != DN_O) {
- release_sock(sk);
- return -EINVAL;
- }
-
- skb = skb_dequeue(&sk->sk_receive_queue);
- if (skb == NULL) {
- skb = dn_wait_for_connect(sk, &timeo);
- if (IS_ERR(skb)) {
- release_sock(sk);
- return PTR_ERR(skb);
- }
- }
-
- cb = DN_SKB_CB(skb);
- sk_acceptq_removed(sk);
- newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, kern);
- if (newsk == NULL) {
- release_sock(sk);
- kfree_skb(skb);
- return -ENOBUFS;
- }
- release_sock(sk);
-
- dst = skb_dst(skb);
- sk_dst_set(newsk, dst);
- skb_dst_set(skb, NULL);
-
- DN_SK(newsk)->state = DN_CR;
- DN_SK(newsk)->addrrem = cb->src_port;
- DN_SK(newsk)->services_rem = cb->services;
- DN_SK(newsk)->info_rem = cb->info;
- DN_SK(newsk)->segsize_rem = cb->segsize;
- DN_SK(newsk)->accept_mode = DN_SK(sk)->accept_mode;
-
- if (DN_SK(newsk)->segsize_rem < 230)
- DN_SK(newsk)->segsize_rem = 230;
-
- if ((DN_SK(newsk)->services_rem & NSP_FC_MASK) == NSP_FC_NONE)
- DN_SK(newsk)->max_window = decnet_no_fc_max_cwnd;
-
- newsk->sk_state = TCP_LISTEN;
- memcpy(&(DN_SK(newsk)->addr), &(DN_SK(sk)->addr), sizeof(struct sockaddr_dn));
-
- /*
- * If we are listening on a wild socket, we don't want
- * the newly created socket on the wrong hash queue.
- */
- DN_SK(newsk)->addr.sdn_flags &= ~SDF_WILD;
-
- skb_pull(skb, dn_username2sockaddr(skb->data, skb->len, &(DN_SK(newsk)->addr), &type));
- skb_pull(skb, dn_username2sockaddr(skb->data, skb->len, &(DN_SK(newsk)->peer), &type));
- *(__le16 *)(DN_SK(newsk)->peer.sdn_add.a_addr) = cb->src;
- *(__le16 *)(DN_SK(newsk)->addr.sdn_add.a_addr) = cb->dst;
-
- menuver = *skb->data;
- skb_pull(skb, 1);
-
- if (menuver & DN_MENUVER_ACC)
- dn_access_copy(skb, &(DN_SK(newsk)->accessdata));
-
- if (menuver & DN_MENUVER_USR)
- dn_user_copy(skb, &(DN_SK(newsk)->conndata_in));
-
- if (menuver & DN_MENUVER_PRX)
- DN_SK(newsk)->peer.sdn_flags |= SDF_PROXY;
-
- if (menuver & DN_MENUVER_UIC)
- DN_SK(newsk)->peer.sdn_flags |= SDF_UICPROXY;
-
- kfree_skb(skb);
-
- memcpy(&(DN_SK(newsk)->conndata_out), &(DN_SK(sk)->conndata_out),
- sizeof(struct optdata_dn));
- memcpy(&(DN_SK(newsk)->discdata_out), &(DN_SK(sk)->discdata_out),
- sizeof(struct optdata_dn));
-
- lock_sock(newsk);
- err = dn_hash_sock(newsk);
- if (err == 0) {
- sock_reset_flag(newsk, SOCK_ZAPPED);
- dn_send_conn_ack(newsk);
-
- /*
- * Here we use sk->sk_allocation since although the conn conf is
- * for the newsk, the context is the old socket.
- */
- if (DN_SK(newsk)->accept_mode == ACC_IMMED)
- err = dn_confirm_accept(newsk, &timeo,
- sk->sk_allocation);
- }
- release_sock(newsk);
- return err;
-}
-
-
-static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int peer)
-{
- struct sockaddr_dn *sa = (struct sockaddr_dn *)uaddr;
- struct sock *sk = sock->sk;
- struct dn_scp *scp = DN_SK(sk);
-
- lock_sock(sk);
-
- if (peer) {
- if ((sock->state != SS_CONNECTED &&
- sock->state != SS_CONNECTING) &&
- scp->accept_mode == ACC_IMMED) {
- release_sock(sk);
- return -ENOTCONN;
- }
-
- memcpy(sa, &scp->peer, sizeof(struct sockaddr_dn));
- } else {
- memcpy(sa, &scp->addr, sizeof(struct sockaddr_dn));
- }
-
- release_sock(sk);
-
- return sizeof(struct sockaddr_dn);
-}
-
-
-static __poll_t dn_poll(struct file *file, struct socket *sock, poll_table *wait)
-{
- struct sock *sk = sock->sk;
- struct dn_scp *scp = DN_SK(sk);
- __poll_t mask = datagram_poll(file, sock, wait);
-
- if (!skb_queue_empty_lockless(&scp->other_receive_queue))
- mask |= EPOLLRDBAND;
-
- return mask;
-}
-
-static int dn_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
-{
- struct sock *sk = sock->sk;
- struct dn_scp *scp = DN_SK(sk);
- int err = -EOPNOTSUPP;
- long amount = 0;
- struct sk_buff *skb;
- int val;
-
- switch(cmd)
- {
- case SIOCGIFADDR:
- case SIOCSIFADDR:
- return dn_dev_ioctl(cmd, (void __user *)arg);
-
- case SIOCATMARK:
- lock_sock(sk);
- val = !skb_queue_empty(&scp->other_receive_queue);
- if (scp->state != DN_RUN)
- val = -ENOTCONN;
- release_sock(sk);
- return val;
-
- case TIOCOUTQ:
- amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
- if (amount < 0)
- amount = 0;
- err = put_user(amount, (int __user *)arg);
- break;
-
- case TIOCINQ:
- lock_sock(sk);
- skb = skb_peek(&scp->other_receive_queue);
- if (skb) {
- amount = skb->len;
- } else {
- skb_queue_walk(&sk->sk_receive_queue, skb)
- amount += skb->len;
- }
- release_sock(sk);
- err = put_user(amount, (int __user *)arg);
- break;
-
- default:
- err = -ENOIOCTLCMD;
- break;
- }
-
- return err;
-}
-
-static int dn_listen(struct socket *sock, int backlog)
-{
- struct sock *sk = sock->sk;
- int err = -EINVAL;
-
- lock_sock(sk);
-
- if (sock_flag(sk, SOCK_ZAPPED))
- goto out;
-
- if ((DN_SK(sk)->state != DN_O) || (sk->sk_state == TCP_LISTEN))
- goto out;
-
- sk->sk_max_ack_backlog = backlog;
- sk->sk_ack_backlog = 0;
- sk->sk_state = TCP_LISTEN;
- err = 0;
- dn_rehash_sock(sk);
-
-out:
- release_sock(sk);
-
- return err;
-}
-
-
-static int dn_shutdown(struct socket *sock, int how)
-{
- struct sock *sk = sock->sk;
- struct dn_scp *scp = DN_SK(sk);
- int err = -ENOTCONN;
-
- lock_sock(sk);
-
- if (sock->state == SS_UNCONNECTED)
- goto out;
-
- err = 0;
- if (sock->state == SS_DISCONNECTING)
- goto out;
-
- err = -EINVAL;
- if (scp->state == DN_O)
- goto out;
-
- if (how != SHUT_RDWR)
- goto out;
-
- sk->sk_shutdown = SHUTDOWN_MASK;
- dn_destroy_sock(sk);
- err = 0;
-
-out:
- release_sock(sk);
-
- return err;
-}
-
-static int dn_setsockopt(struct socket *sock, int level, int optname,
- sockptr_t optval, unsigned int optlen)
-{
- struct sock *sk = sock->sk;
- int err;
-
- lock_sock(sk);
- err = __dn_setsockopt(sock, level, optname, optval, optlen, 0);
- release_sock(sk);
-#ifdef CONFIG_NETFILTER
- /* we need to exclude all possible ENOPROTOOPTs except default case */
- if (err == -ENOPROTOOPT && optname != DSO_LINKINFO &&
- optname != DSO_STREAM && optname != DSO_SEQPACKET)
- err = nf_setsockopt(sk, PF_DECnet, optname, optval, optlen);
-#endif
-
- return err;
-}
-
-static int __dn_setsockopt(struct socket *sock, int level, int optname,
- sockptr_t optval, unsigned int optlen, int flags)
-{
- struct sock *sk = sock->sk;
- struct dn_scp *scp = DN_SK(sk);
- long timeo;
- union {
- struct optdata_dn opt;
- struct accessdata_dn acc;
- int mode;
- unsigned long win;
- int val;
- unsigned char services;
- unsigned char info;
- } u;
- int err;
-
- if (optlen && sockptr_is_null(optval))
- return -EINVAL;
-
- if (optlen > sizeof(u))
- return -EINVAL;
-
- if (copy_from_sockptr(&u, optval, optlen))
- return -EFAULT;
-
- switch (optname) {
- case DSO_CONDATA:
- if (sock->state == SS_CONNECTED)
- return -EISCONN;
- if ((scp->state != DN_O) && (scp->state != DN_CR))
- return -EINVAL;
-
- if (optlen != sizeof(struct optdata_dn))
- return -EINVAL;
-
- if (le16_to_cpu(u.opt.opt_optl) > 16)
- return -EINVAL;
-
- memcpy(&scp->conndata_out, &u.opt, optlen);
- break;
-
- case DSO_DISDATA:
- if (sock->state != SS_CONNECTED &&
- scp->accept_mode == ACC_IMMED)
- return -ENOTCONN;
-
- if (optlen != sizeof(struct optdata_dn))
- return -EINVAL;
-
- if (le16_to_cpu(u.opt.opt_optl) > 16)
- return -EINVAL;
-
- memcpy(&scp->discdata_out, &u.opt, optlen);
- break;
-
- case DSO_CONACCESS:
- if (sock->state == SS_CONNECTED)
- return -EISCONN;
- if (scp->state != DN_O)
- return -EINVAL;
-
- if (optlen != sizeof(struct accessdata_dn))
- return -EINVAL;
-
- if ((u.acc.acc_accl > DN_MAXACCL) ||
- (u.acc.acc_passl > DN_MAXACCL) ||
- (u.acc.acc_userl > DN_MAXACCL))
- return -EINVAL;
-
- memcpy(&scp->accessdata, &u.acc, optlen);
- break;
-
- case DSO_ACCEPTMODE:
- if (sock->state == SS_CONNECTED)
- return -EISCONN;
- if (scp->state != DN_O)
- return -EINVAL;
-
- if (optlen != sizeof(int))
- return -EINVAL;
-
- if ((u.mode != ACC_IMMED) && (u.mode != ACC_DEFER))
- return -EINVAL;
-
- scp->accept_mode = (unsigned char)u.mode;
- break;
-
- case DSO_CONACCEPT:
- if (scp->state != DN_CR)
- return -EINVAL;
- timeo = sock_rcvtimeo(sk, 0);
- err = dn_confirm_accept(sk, &timeo, sk->sk_allocation);
- return err;
-
- case DSO_CONREJECT:
- if (scp->state != DN_CR)
- return -EINVAL;
-
- scp->state = DN_DR;
- sk->sk_shutdown = SHUTDOWN_MASK;
- dn_nsp_send_disc(sk, 0x38, 0, sk->sk_allocation);
- break;
-
- case DSO_MAXWINDOW:
- if (optlen != sizeof(unsigned long))
- return -EINVAL;
- if (u.win > NSP_MAX_WINDOW)
- u.win = NSP_MAX_WINDOW;
- if (u.win == 0)
- return -EINVAL;
- scp->max_window = u.win;
- if (scp->snd_window > u.win)
- scp->snd_window = u.win;
- break;
-
- case DSO_NODELAY:
- if (optlen != sizeof(int))
- return -EINVAL;
- if (scp->nonagle == TCP_NAGLE_CORK)
- return -EINVAL;
- scp->nonagle = (u.val == 0) ? 0 : TCP_NAGLE_OFF;
- /* if (scp->nonagle == 1) { Push pending frames } */
- break;
-
- case DSO_CORK:
- if (optlen != sizeof(int))
- return -EINVAL;
- if (scp->nonagle == TCP_NAGLE_OFF)
- return -EINVAL;
- scp->nonagle = (u.val == 0) ? 0 : TCP_NAGLE_CORK;
- /* if (scp->nonagle == 0) { Push pending frames } */
- break;
-
- case DSO_SERVICES:
- if (optlen != sizeof(unsigned char))
- return -EINVAL;
- if ((u.services & ~NSP_FC_MASK) != 0x01)
- return -EINVAL;
- if ((u.services & NSP_FC_MASK) == NSP_FC_MASK)
- return -EINVAL;
- scp->services_loc = u.services;
- break;
-
- case DSO_INFO:
- if (optlen != sizeof(unsigned char))
- return -EINVAL;
- if (u.info & 0xfc)
- return -EINVAL;
- scp->info_loc = u.info;
- break;
-
- case DSO_LINKINFO:
- case DSO_STREAM:
- case DSO_SEQPACKET:
- default:
- return -ENOPROTOOPT;
- }
-
- return 0;
-}
-
-static int dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen)
-{
- struct sock *sk = sock->sk;
- int err;
-
- lock_sock(sk);
- err = __dn_getsockopt(sock, level, optname, optval, optlen, 0);
- release_sock(sk);
-#ifdef CONFIG_NETFILTER
- if (err == -ENOPROTOOPT && optname != DSO_STREAM &&
- optname != DSO_SEQPACKET && optname != DSO_CONACCEPT &&
- optname != DSO_CONREJECT) {
- int len;
-
- if (get_user(len, optlen))
- return -EFAULT;
-
- err = nf_getsockopt(sk, PF_DECnet, optname, optval, &len);
- if (err >= 0)
- err = put_user(len, optlen);
- }
-#endif
-
- return err;
-}
-
-static int __dn_getsockopt(struct socket *sock, int level,int optname, char __user *optval,int __user *optlen, int flags)
-{
- struct sock *sk = sock->sk;
- struct dn_scp *scp = DN_SK(sk);
- struct linkinfo_dn link;
- unsigned int r_len;
- void *r_data = NULL;
- unsigned int val;
-
- if(get_user(r_len , optlen))
- return -EFAULT;
-
- switch (optname) {
- case DSO_CONDATA:
- if (r_len > sizeof(struct optdata_dn))
- r_len = sizeof(struct optdata_dn);
- r_data = &scp->conndata_in;
- break;
-
- case DSO_DISDATA:
- if (r_len > sizeof(struct optdata_dn))
- r_len = sizeof(struct optdata_dn);
- r_data = &scp->discdata_in;
- break;
-
- case DSO_CONACCESS:
- if (r_len > sizeof(struct accessdata_dn))
- r_len = sizeof(struct accessdata_dn);
- r_data = &scp->accessdata;
- break;
-
- case DSO_ACCEPTMODE:
- if (r_len > sizeof(unsigned char))
- r_len = sizeof(unsigned char);
- r_data = &scp->accept_mode;
- break;
-
- case DSO_LINKINFO:
- if (r_len > sizeof(struct linkinfo_dn))
- r_len = sizeof(struct linkinfo_dn);
-
- memset(&link, 0, sizeof(link));
-
- switch (sock->state) {
- case SS_CONNECTING:
- link.idn_linkstate = LL_CONNECTING;
- break;
- case SS_DISCONNECTING:
- link.idn_linkstate = LL_DISCONNECTING;
- break;
- case SS_CONNECTED:
- link.idn_linkstate = LL_RUNNING;
- break;
- default:
- link.idn_linkstate = LL_INACTIVE;
- }
-
- link.idn_segsize = scp->segsize_rem;
- r_data = &link;
- break;
-
- case DSO_MAXWINDOW:
- if (r_len > sizeof(unsigned long))
- r_len = sizeof(unsigned long);
- r_data = &scp->max_window;
- break;
-
- case DSO_NODELAY:
- if (r_len > sizeof(int))
- r_len = sizeof(int);
- val = (scp->nonagle == TCP_NAGLE_OFF);
- r_data = &val;
- break;
-
- case DSO_CORK:
- if (r_len > sizeof(int))
- r_len = sizeof(int);
- val = (scp->nonagle == TCP_NAGLE_CORK);
- r_data = &val;
- break;
-
- case DSO_SERVICES:
- if (r_len > sizeof(unsigned char))
- r_len = sizeof(unsigned char);
- r_data = &scp->services_rem;
- break;
-
- case DSO_INFO:
- if (r_len > sizeof(unsigned char))
- r_len = sizeof(unsigned char);
- r_data = &scp->info_rem;
- break;
-
- case DSO_STREAM:
- case DSO_SEQPACKET:
- case DSO_CONACCEPT:
- case DSO_CONREJECT:
- default:
- return -ENOPROTOOPT;
- }
-
- if (r_data) {
- if (copy_to_user(optval, r_data, r_len))
- return -EFAULT;
- if (put_user(r_len, optlen))
- return -EFAULT;
- }
-
- return 0;
-}
-
-
-static int dn_data_ready(struct sock *sk, struct sk_buff_head *q, int flags, int target)
-{
- struct sk_buff *skb;
- int len = 0;
-
- if (flags & MSG_OOB)
- return !skb_queue_empty(q) ? 1 : 0;
-
- skb_queue_walk(q, skb) {
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- len += skb->len;
-
- if (cb->nsp_flags & 0x40) {
- /* SOCK_SEQPACKET reads to EOM */
- if (sk->sk_type == SOCK_SEQPACKET)
- return 1;
- /* so does SOCK_STREAM unless WAITALL is specified */
- if (!(flags & MSG_WAITALL))
- return 1;
- }
-
- /* minimum data length for read exceeded */
- if (len >= target)
- return 1;
- }
-
- return 0;
-}
-
-
-static int dn_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
- int flags)
-{
- struct sock *sk = sock->sk;
- struct dn_scp *scp = DN_SK(sk);
- struct sk_buff_head *queue = &sk->sk_receive_queue;
- size_t target = size > 1 ? 1 : 0;
- size_t copied = 0;
- int rv = 0;
- struct sk_buff *skb, *n;
- struct dn_skb_cb *cb = NULL;
- unsigned char eor = 0;
- long timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
-
- lock_sock(sk);
-
- if (sock_flag(sk, SOCK_ZAPPED)) {
- rv = -EADDRNOTAVAIL;
- goto out;
- }
-
- if (sk->sk_shutdown & RCV_SHUTDOWN) {
- rv = 0;
- goto out;
- }
-
- rv = dn_check_state(sk, NULL, 0, &timeo, flags);
- if (rv)
- goto out;
-
- if (flags & ~(MSG_CMSG_COMPAT|MSG_PEEK|MSG_OOB|MSG_WAITALL|MSG_DONTWAIT|MSG_NOSIGNAL)) {
- rv = -EOPNOTSUPP;
- goto out;
- }
-
- if (flags & MSG_OOB)
- queue = &scp->other_receive_queue;
-
- if (flags & MSG_WAITALL)
- target = size;
-
-
- /*
- * See if there is data ready to read, sleep if there isn't
- */
- for(;;) {
- DEFINE_WAIT_FUNC(wait, woken_wake_function);
-
- if (sk->sk_err)
- goto out;
-
- if (!skb_queue_empty(&scp->other_receive_queue)) {
- if (!(flags & MSG_OOB)) {
- msg->msg_flags |= MSG_OOB;
- if (!scp->other_report) {
- scp->other_report = 1;
- goto out;
- }
- }
- }
-
- if (scp->state != DN_RUN)
- goto out;
-
- if (signal_pending(current)) {
- rv = sock_intr_errno(timeo);
- goto out;
- }
-
- if (dn_data_ready(sk, queue, flags, target))
- break;
-
- if (flags & MSG_DONTWAIT) {
- rv = -EWOULDBLOCK;
- goto out;
- }
-
- add_wait_queue(sk_sleep(sk), &wait);
- sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
- sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target), &wait);
- sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
- remove_wait_queue(sk_sleep(sk), &wait);
- }
-
- skb_queue_walk_safe(queue, skb, n) {
- unsigned int chunk = skb->len;
- cb = DN_SKB_CB(skb);
-
- if ((chunk + copied) > size)
- chunk = size - copied;
-
- if (memcpy_to_msg(msg, skb->data, chunk)) {
- rv = -EFAULT;
- break;
- }
- copied += chunk;
-
- if (!(flags & MSG_PEEK))
- skb_pull(skb, chunk);
-
- eor = cb->nsp_flags & 0x40;
-
- if (skb->len == 0) {
- skb_unlink(skb, queue);
- kfree_skb(skb);
- /*
- * N.B. Don't refer to skb or cb after this point
- * in loop.
- */
- if ((scp->flowloc_sw == DN_DONTSEND) && !dn_congested(sk)) {
- scp->flowloc_sw = DN_SEND;
- dn_nsp_send_link(sk, DN_SEND, 0);
- }
- }
-
- if (eor) {
- if (sk->sk_type == SOCK_SEQPACKET)
- break;
- if (!(flags & MSG_WAITALL))
- break;
- }
-
- if (flags & MSG_OOB)
- break;
-
- if (copied >= target)
- break;
- }
-
- rv = copied;
-
-
- if (eor && (sk->sk_type == SOCK_SEQPACKET))
- msg->msg_flags |= MSG_EOR;
-
-out:
- if (rv == 0)
- rv = (flags & MSG_PEEK) ? -sk->sk_err : sock_error(sk);
-
- if ((rv >= 0) && msg->msg_name) {
- __sockaddr_check_size(sizeof(struct sockaddr_dn));
- memcpy(msg->msg_name, &scp->peer, sizeof(struct sockaddr_dn));
- msg->msg_namelen = sizeof(struct sockaddr_dn);
- }
-
- release_sock(sk);
-
- return rv;
-}
-
-
-static inline int dn_queue_too_long(struct dn_scp *scp, struct sk_buff_head *queue, int flags)
-{
- unsigned char fctype = scp->services_rem & NSP_FC_MASK;
- if (skb_queue_len(queue) >= scp->snd_window)
- return 1;
- if (fctype != NSP_FC_NONE) {
- if (flags & MSG_OOB) {
- if (scp->flowrem_oth == 0)
- return 1;
- } else {
- if (scp->flowrem_dat == 0)
- return 1;
- }
- }
- return 0;
-}
-
-/*
- * The DECnet spec requires that the "routing layer" accepts packets which
- * are at least 230 bytes in size. This excludes any headers which the NSP
- * layer might add, so we always assume that we'll be using the maximal
- * length header on data packets. The variation in length is due to the
- * inclusion (or not) of the two 16 bit acknowledgement fields so it doesn't
- * make much practical difference.
- */
-unsigned int dn_mss_from_pmtu(struct net_device *dev, int mtu)
-{
- unsigned int mss = 230 - DN_MAX_NSP_DATA_HEADER;
- if (dev) {
- struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
- mtu -= LL_RESERVED_SPACE(dev);
- if (dn_db->use_long)
- mtu -= 21;
- else
- mtu -= 6;
- mtu -= DN_MAX_NSP_DATA_HEADER;
- } else {
- /*
- * 21 = long header, 16 = guess at MAC header length
- */
- mtu -= (21 + DN_MAX_NSP_DATA_HEADER + 16);
- }
- if (mtu > mss)
- mss = mtu;
- return mss;
-}
-
-static inline unsigned int dn_current_mss(struct sock *sk, int flags)
-{
- struct dst_entry *dst = __sk_dst_get(sk);
- struct dn_scp *scp = DN_SK(sk);
- int mss_now = min_t(int, scp->segsize_loc, scp->segsize_rem);
-
- /* Other data messages are limited to 16 bytes per packet */
- if (flags & MSG_OOB)
- return 16;
-
- /* This works out the maximum size of segment we can send out */
- if (dst) {
- u32 mtu = dst_mtu(dst);
- mss_now = min_t(int, dn_mss_from_pmtu(dst->dev, mtu), mss_now);
- }
-
- return mss_now;
-}
-
-/*
- * N.B. We get the timeout wrong here, but then we always did get it
- * wrong before and this is another step along the road to correcting
- * it. It ought to get updated each time we pass through the routine,
- * but in practise it probably doesn't matter too much for now.
- */
-static inline struct sk_buff *dn_alloc_send_pskb(struct sock *sk,
- unsigned long datalen, int noblock,
- int *errcode)
-{
- struct sk_buff *skb = sock_alloc_send_skb(sk, datalen,
- noblock, errcode);
- if (skb) {
- skb->protocol = htons(ETH_P_DNA_RT);
- skb->pkt_type = PACKET_OUTGOING;
- }
- return skb;
-}
-
-static int dn_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
-{
- struct sock *sk = sock->sk;
- struct dn_scp *scp = DN_SK(sk);
- size_t mss;
- struct sk_buff_head *queue = &scp->data_xmit_queue;
- int flags = msg->msg_flags;
- int err = 0;
- size_t sent = 0;
- int addr_len = msg->msg_namelen;
- DECLARE_SOCKADDR(struct sockaddr_dn *, addr, msg->msg_name);
- struct sk_buff *skb = NULL;
- struct dn_skb_cb *cb;
- size_t len;
- unsigned char fctype;
- long timeo;
-
- if (flags & ~(MSG_TRYHARD|MSG_OOB|MSG_DONTWAIT|MSG_EOR|MSG_NOSIGNAL|MSG_MORE|MSG_CMSG_COMPAT))
- return -EOPNOTSUPP;
-
- if (addr_len && (addr_len != sizeof(struct sockaddr_dn)))
- return -EINVAL;
-
- lock_sock(sk);
- timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
- /*
- * The only difference between stream sockets and sequenced packet
- * sockets is that the stream sockets always behave as if MSG_EOR
- * has been set.
- */
- if (sock->type == SOCK_STREAM) {
- if (flags & MSG_EOR) {
- err = -EINVAL;
- goto out;
- }
- flags |= MSG_EOR;
- }
-
-
- err = dn_check_state(sk, addr, addr_len, &timeo, flags);
- if (err)
- goto out_err;
-
- if (sk->sk_shutdown & SEND_SHUTDOWN) {
- err = -EPIPE;
- if (!(flags & MSG_NOSIGNAL))
- send_sig(SIGPIPE, current, 0);
- goto out_err;
- }
-
- if ((flags & MSG_TRYHARD) && sk->sk_dst_cache)
- dst_negative_advice(sk);
-
- mss = scp->segsize_rem;
- fctype = scp->services_rem & NSP_FC_MASK;
-
- mss = dn_current_mss(sk, flags);
-
- if (flags & MSG_OOB) {
- queue = &scp->other_xmit_queue;
- if (size > mss) {
- err = -EMSGSIZE;
- goto out;
- }
- }
-
- scp->persist_fxn = dn_nsp_xmit_timeout;
-
- while(sent < size) {
- err = sock_error(sk);
- if (err)
- goto out;
-
- if (signal_pending(current)) {
- err = sock_intr_errno(timeo);
- goto out;
- }
-
- /*
- * Calculate size that we wish to send.
- */
- len = size - sent;
-
- if (len > mss)
- len = mss;
-
- /*
- * Wait for queue size to go down below the window
- * size.
- */
- if (dn_queue_too_long(scp, queue, flags)) {
- DEFINE_WAIT_FUNC(wait, woken_wake_function);
-
- if (flags & MSG_DONTWAIT) {
- err = -EWOULDBLOCK;
- goto out;
- }
-
- add_wait_queue(sk_sleep(sk), &wait);
- sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
- sk_wait_event(sk, &timeo,
- !dn_queue_too_long(scp, queue, flags), &wait);
- sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
- remove_wait_queue(sk_sleep(sk), &wait);
- continue;
- }
-
- /*
- * Get a suitably sized skb.
- * 64 is a bit of a hack really, but its larger than any
- * link-layer headers and has served us well as a good
- * guess as to their real length.
- */
- skb = dn_alloc_send_pskb(sk, len + 64 + DN_MAX_NSP_DATA_HEADER,
- flags & MSG_DONTWAIT, &err);
-
- if (err)
- break;
-
- if (!skb)
- continue;
-
- cb = DN_SKB_CB(skb);
-
- skb_reserve(skb, 64 + DN_MAX_NSP_DATA_HEADER);
-
- if (memcpy_from_msg(skb_put(skb, len), msg, len)) {
- err = -EFAULT;
- goto out;
- }
-
- if (flags & MSG_OOB) {
- cb->nsp_flags = 0x30;
- if (fctype != NSP_FC_NONE)
- scp->flowrem_oth--;
- } else {
- cb->nsp_flags = 0x00;
- if (scp->seg_total == 0)
- cb->nsp_flags |= 0x20;
-
- scp->seg_total += len;
-
- if (((sent + len) == size) && (flags & MSG_EOR)) {
- cb->nsp_flags |= 0x40;
- scp->seg_total = 0;
- if (fctype == NSP_FC_SCMC)
- scp->flowrem_dat--;
- }
- if (fctype == NSP_FC_SRC)
- scp->flowrem_dat--;
- }
-
- sent += len;
- dn_nsp_queue_xmit(sk, skb, sk->sk_allocation, flags & MSG_OOB);
- skb = NULL;
-
- scp->persist = dn_nsp_persist(sk);
-
- }
-out:
-
- kfree_skb(skb);
-
- release_sock(sk);
-
- return sent ? sent : err;
-
-out_err:
- err = sk_stream_error(sk, flags, err);
- release_sock(sk);
- return err;
-}
-
-static int dn_device_event(struct notifier_block *this, unsigned long event,
- void *ptr)
-{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-
- if (!net_eq(dev_net(dev), &init_net))
- return NOTIFY_DONE;
-
- switch (event) {
- case NETDEV_UP:
- dn_dev_up(dev);
- break;
- case NETDEV_DOWN:
- dn_dev_down(dev);
- break;
- default:
- break;
- }
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block dn_dev_notifier = {
- .notifier_call = dn_device_event,
-};
-
-static struct packet_type dn_dix_packet_type __read_mostly = {
- .type = cpu_to_be16(ETH_P_DNA_RT),
- .func = dn_route_rcv,
-};
-
-#ifdef CONFIG_PROC_FS
-struct dn_iter_state {
- int bucket;
-};
-
-static struct sock *dn_socket_get_first(struct seq_file *seq)
-{
- struct dn_iter_state *state = seq->private;
- struct sock *n = NULL;
-
- for(state->bucket = 0;
- state->bucket < DN_SK_HASH_SIZE;
- ++state->bucket) {
- n = sk_head(&dn_sk_hash[state->bucket]);
- if (n)
- break;
- }
-
- return n;
-}
-
-static struct sock *dn_socket_get_next(struct seq_file *seq,
- struct sock *n)
-{
- struct dn_iter_state *state = seq->private;
-
- n = sk_next(n);
- while (!n) {
- if (++state->bucket >= DN_SK_HASH_SIZE)
- break;
- n = sk_head(&dn_sk_hash[state->bucket]);
- }
- return n;
-}
-
-static struct sock *socket_get_idx(struct seq_file *seq, loff_t *pos)
-{
- struct sock *sk = dn_socket_get_first(seq);
-
- if (sk) {
- while(*pos && (sk = dn_socket_get_next(seq, sk)))
- --*pos;
- }
- return *pos ? NULL : sk;
-}
-
-static void *dn_socket_get_idx(struct seq_file *seq, loff_t pos)
-{
- void *rc;
- read_lock_bh(&dn_hash_lock);
- rc = socket_get_idx(seq, &pos);
- if (!rc) {
- read_unlock_bh(&dn_hash_lock);
- }
- return rc;
-}
-
-static void *dn_socket_seq_start(struct seq_file *seq, loff_t *pos)
-{
- return *pos ? dn_socket_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
-}
-
-static void *dn_socket_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- void *rc;
-
- if (v == SEQ_START_TOKEN) {
- rc = dn_socket_get_idx(seq, 0);
- goto out;
- }
-
- rc = dn_socket_get_next(seq, v);
- if (rc)
- goto out;
- read_unlock_bh(&dn_hash_lock);
-out:
- ++*pos;
- return rc;
-}
-
-static void dn_socket_seq_stop(struct seq_file *seq, void *v)
-{
- if (v && v != SEQ_START_TOKEN)
- read_unlock_bh(&dn_hash_lock);
-}
-
-#define IS_NOT_PRINTABLE(x) ((x) < 32 || (x) > 126)
-
-static void dn_printable_object(struct sockaddr_dn *dn, unsigned char *buf)
-{
- int i;
-
- switch (le16_to_cpu(dn->sdn_objnamel)) {
- case 0:
- sprintf(buf, "%d", dn->sdn_objnum);
- break;
- default:
- for (i = 0; i < le16_to_cpu(dn->sdn_objnamel); i++) {
- buf[i] = dn->sdn_objname[i];
- if (IS_NOT_PRINTABLE(buf[i]))
- buf[i] = '.';
- }
- buf[i] = 0;
- }
-}
-
-static char *dn_state2asc(unsigned char state)
-{
- switch (state) {
- case DN_O:
- return "OPEN";
- case DN_CR:
- return " CR";
- case DN_DR:
- return " DR";
- case DN_DRC:
- return " DRC";
- case DN_CC:
- return " CC";
- case DN_CI:
- return " CI";
- case DN_NR:
- return " NR";
- case DN_NC:
- return " NC";
- case DN_CD:
- return " CD";
- case DN_RJ:
- return " RJ";
- case DN_RUN:
- return " RUN";
- case DN_DI:
- return " DI";
- case DN_DIC:
- return " DIC";
- case DN_DN:
- return " DN";
- case DN_CL:
- return " CL";
- case DN_CN:
- return " CN";
- }
-
- return "????";
-}
-
-static inline void dn_socket_format_entry(struct seq_file *seq, struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
- char buf1[DN_ASCBUF_LEN];
- char buf2[DN_ASCBUF_LEN];
- char local_object[DN_MAXOBJL+3];
- char remote_object[DN_MAXOBJL+3];
-
- dn_printable_object(&scp->addr, local_object);
- dn_printable_object(&scp->peer, remote_object);
-
- seq_printf(seq,
- "%6s/%04X %04d:%04d %04d:%04d %01d %-16s "
- "%6s/%04X %04d:%04d %04d:%04d %01d %-16s %4s %s\n",
- dn_addr2asc(le16_to_cpu(dn_saddr2dn(&scp->addr)), buf1),
- scp->addrloc,
- scp->numdat,
- scp->numoth,
- scp->ackxmt_dat,
- scp->ackxmt_oth,
- scp->flowloc_sw,
- local_object,
- dn_addr2asc(le16_to_cpu(dn_saddr2dn(&scp->peer)), buf2),
- scp->addrrem,
- scp->numdat_rcv,
- scp->numoth_rcv,
- scp->ackrcv_dat,
- scp->ackrcv_oth,
- scp->flowrem_sw,
- remote_object,
- dn_state2asc(scp->state),
- ((scp->accept_mode == ACC_IMMED) ? "IMMED" : "DEFER"));
-}
-
-static int dn_socket_seq_show(struct seq_file *seq, void *v)
-{
- if (v == SEQ_START_TOKEN) {
- seq_puts(seq, "Local Remote\n");
- } else {
- dn_socket_format_entry(seq, v);
- }
- return 0;
-}
-
-static const struct seq_operations dn_socket_seq_ops = {
- .start = dn_socket_seq_start,
- .next = dn_socket_seq_next,
- .stop = dn_socket_seq_stop,
- .show = dn_socket_seq_show,
-};
-#endif
-
-static const struct net_proto_family dn_family_ops = {
- .family = AF_DECnet,
- .create = dn_create,
- .owner = THIS_MODULE,
-};
-
-static const struct proto_ops dn_proto_ops = {
- .family = AF_DECnet,
- .owner = THIS_MODULE,
- .release = dn_release,
- .bind = dn_bind,
- .connect = dn_connect,
- .socketpair = sock_no_socketpair,
- .accept = dn_accept,
- .getname = dn_getname,
- .poll = dn_poll,
- .ioctl = dn_ioctl,
- .listen = dn_listen,
- .shutdown = dn_shutdown,
- .setsockopt = dn_setsockopt,
- .getsockopt = dn_getsockopt,
- .sendmsg = dn_sendmsg,
- .recvmsg = dn_recvmsg,
- .mmap = sock_no_mmap,
- .sendpage = sock_no_sendpage,
-};
-
-MODULE_DESCRIPTION("The Linux DECnet Network Protocol");
-MODULE_AUTHOR("Linux DECnet Project Team");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NETPROTO(PF_DECnet);
-
-static const char banner[] __initconst = KERN_INFO
-"NET4: DECnet for Linux: V.2.5.68s (C) 1995-2003 Linux DECnet Project Team\n";
-
-static int __init decnet_init(void)
-{
- int rc;
-
- printk(banner);
-
- rc = proto_register(&dn_proto, 1);
- if (rc != 0)
- goto out;
-
- dn_neigh_init();
- dn_dev_init();
- dn_route_init();
- dn_fib_init();
-
- sock_register(&dn_family_ops);
- dev_add_pack(&dn_dix_packet_type);
- register_netdevice_notifier(&dn_dev_notifier);
-
- proc_create_seq_private("decnet", 0444, init_net.proc_net,
- &dn_socket_seq_ops, sizeof(struct dn_iter_state),
- NULL);
- dn_register_sysctl();
-out:
- return rc;
-
-}
-module_init(decnet_init);
-
-/*
- * Prevent DECnet module unloading until its fixed properly.
- * Requires an audit of the code to check for memory leaks and
- * initialisation problems etc.
- */
-#if 0
-static void __exit decnet_exit(void)
-{
- sock_unregister(AF_DECnet);
- rtnl_unregister_all(PF_DECnet);
- dev_remove_pack(&dn_dix_packet_type);
-
- dn_unregister_sysctl();
-
- unregister_netdevice_notifier(&dn_dev_notifier);
-
- dn_route_cleanup();
- dn_dev_cleanup();
- dn_neigh_cleanup();
- dn_fib_cleanup();
-
- remove_proc_entry("decnet", init_net.proc_net);
-
- proto_unregister(&dn_proto);
-
- rcu_barrier(); /* Wait for completion of call_rcu()'s */
-}
-module_exit(decnet_exit);
-#endif
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
deleted file mode 100644
index a09ba642b5e7..000000000000
--- a/net/decnet/dn_dev.c
+++ /dev/null
@@ -1,1433 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Device Layer
- *
- * Authors: Steve Whitehouse <SteveW@ACM.org>
- * Eduardo Marcelo Serrat <emserrat@geocities.com>
- *
- * Changes:
- * Steve Whitehouse : Devices now see incoming frames so they
- * can mark on who it came from.
- * Steve Whitehouse : Fixed bug in creating neighbours. Each neighbour
- * can now have a device specific setup func.
- * Steve Whitehouse : Added /proc/sys/net/decnet/conf/<dev>/
- * Steve Whitehouse : Fixed bug which sometimes killed timer
- * Steve Whitehouse : Multiple ifaddr support
- * Steve Whitehouse : SIOCGIFCONF is now a compile time option
- * Steve Whitehouse : /proc/sys/net/decnet/conf/<sys>/forwarding
- * Steve Whitehouse : Removed timer1 - it's a user space issue now
- * Patrick Caulfield : Fixed router hello message format
- * Steve Whitehouse : Got rid of constant sizes for blksize for
- * devices. All mtu based now.
- */
-
-#include <linux/capability.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/init.h>
-#include <linux/net.h>
-#include <linux/netdevice.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/if_addr.h>
-#include <linux/if_arp.h>
-#include <linux/if_ether.h>
-#include <linux/skbuff.h>
-#include <linux/sysctl.h>
-#include <linux/notifier.h>
-#include <linux/slab.h>
-#include <linux/jiffies.h>
-#include <linux/uaccess.h>
-#include <net/net_namespace.h>
-#include <net/neighbour.h>
-#include <net/dst.h>
-#include <net/flow.h>
-#include <net/fib_rules.h>
-#include <net/netlink.h>
-#include <net/dn.h>
-#include <net/dn_dev.h>
-#include <net/dn_route.h>
-#include <net/dn_neigh.h>
-#include <net/dn_fib.h>
-
-#define DN_IFREQ_SIZE (offsetof(struct ifreq, ifr_ifru) + sizeof(struct sockaddr_dn))
-
-static char dn_rt_all_end_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x04,0x00,0x00};
-static char dn_rt_all_rt_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x03,0x00,0x00};
-static char dn_hiord[ETH_ALEN] = {0xAA,0x00,0x04,0x00,0x00,0x00};
-static unsigned char dn_eco_version[3] = {0x02,0x00,0x00};
-
-extern struct neigh_table dn_neigh_table;
-
-/*
- * decnet_address is kept in network order.
- */
-__le16 decnet_address = 0;
-
-static DEFINE_SPINLOCK(dndev_lock);
-static struct net_device *decnet_default_device;
-static BLOCKING_NOTIFIER_HEAD(dnaddr_chain);
-
-static struct dn_dev *dn_dev_create(struct net_device *dev, int *err);
-static void dn_dev_delete(struct net_device *dev);
-static void dn_ifaddr_notify(int event, struct dn_ifaddr *ifa);
-
-static int dn_eth_up(struct net_device *);
-static void dn_eth_down(struct net_device *);
-static void dn_send_brd_hello(struct net_device *dev, struct dn_ifaddr *ifa);
-static void dn_send_ptp_hello(struct net_device *dev, struct dn_ifaddr *ifa);
-
-static struct dn_dev_parms dn_dev_list[] = {
-{
- .type = ARPHRD_ETHER, /* Ethernet */
- .mode = DN_DEV_BCAST,
- .state = DN_DEV_S_RU,
- .t2 = 1,
- .t3 = 10,
- .name = "ethernet",
- .up = dn_eth_up,
- .down = dn_eth_down,
- .timer3 = dn_send_brd_hello,
-},
-{
- .type = ARPHRD_IPGRE, /* DECnet tunneled over GRE in IP */
- .mode = DN_DEV_BCAST,
- .state = DN_DEV_S_RU,
- .t2 = 1,
- .t3 = 10,
- .name = "ipgre",
- .timer3 = dn_send_brd_hello,
-},
-#if 0
-{
- .type = ARPHRD_X25, /* Bog standard X.25 */
- .mode = DN_DEV_UCAST,
- .state = DN_DEV_S_DS,
- .t2 = 1,
- .t3 = 120,
- .name = "x25",
- .timer3 = dn_send_ptp_hello,
-},
-#endif
-#if 0
-{
- .type = ARPHRD_PPP, /* DECnet over PPP */
- .mode = DN_DEV_BCAST,
- .state = DN_DEV_S_RU,
- .t2 = 1,
- .t3 = 10,
- .name = "ppp",
- .timer3 = dn_send_brd_hello,
-},
-#endif
-{
- .type = ARPHRD_DDCMP, /* DECnet over DDCMP */
- .mode = DN_DEV_UCAST,
- .state = DN_DEV_S_DS,
- .t2 = 1,
- .t3 = 120,
- .name = "ddcmp",
- .timer3 = dn_send_ptp_hello,
-},
-{
- .type = ARPHRD_LOOPBACK, /* Loopback interface - always last */
- .mode = DN_DEV_BCAST,
- .state = DN_DEV_S_RU,
- .t2 = 1,
- .t3 = 10,
- .name = "loopback",
- .timer3 = dn_send_brd_hello,
-}
-};
-
-#define DN_DEV_LIST_SIZE ARRAY_SIZE(dn_dev_list)
-
-#define DN_DEV_PARMS_OFFSET(x) offsetof(struct dn_dev_parms, x)
-
-#ifdef CONFIG_SYSCTL
-
-static int min_t2[] = { 1 };
-static int max_t2[] = { 60 }; /* No max specified, but this seems sensible */
-static int min_t3[] = { 1 };
-static int max_t3[] = { 8191 }; /* Must fit in 16 bits when multiplied by BCT3MULT or T3MULT */
-
-static int min_priority[1];
-static int max_priority[] = { 127 }; /* From DECnet spec */
-
-static int dn_forwarding_proc(struct ctl_table *, int, void *, size_t *,
- loff_t *);
-static struct dn_dev_sysctl_table {
- struct ctl_table_header *sysctl_header;
- struct ctl_table dn_dev_vars[5];
-} dn_dev_sysctl = {
- NULL,
- {
- {
- .procname = "forwarding",
- .data = (void *)DN_DEV_PARMS_OFFSET(forwarding),
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = dn_forwarding_proc,
- },
- {
- .procname = "priority",
- .data = (void *)DN_DEV_PARMS_OFFSET(priority),
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_priority,
- .extra2 = &max_priority
- },
- {
- .procname = "t2",
- .data = (void *)DN_DEV_PARMS_OFFSET(t2),
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_t2,
- .extra2 = &max_t2
- },
- {
- .procname = "t3",
- .data = (void *)DN_DEV_PARMS_OFFSET(t3),
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_t3,
- .extra2 = &max_t3
- },
- { }
- },
-};
-
-static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms *parms)
-{
- struct dn_dev_sysctl_table *t;
- int i;
-
- char path[sizeof("net/decnet/conf/") + IFNAMSIZ];
-
- t = kmemdup(&dn_dev_sysctl, sizeof(*t), GFP_KERNEL);
- if (t == NULL)
- return;
-
- for(i = 0; i < ARRAY_SIZE(t->dn_dev_vars) - 1; i++) {
- long offset = (long)t->dn_dev_vars[i].data;
- t->dn_dev_vars[i].data = ((char *)parms) + offset;
- }
-
- snprintf(path, sizeof(path), "net/decnet/conf/%s",
- dev? dev->name : parms->name);
-
- t->dn_dev_vars[0].extra1 = (void *)dev;
-
- t->sysctl_header = register_net_sysctl(&init_net, path, t->dn_dev_vars);
- if (t->sysctl_header == NULL)
- kfree(t);
- else
- parms->sysctl = t;
-}
-
-static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms)
-{
- if (parms->sysctl) {
- struct dn_dev_sysctl_table *t = parms->sysctl;
- parms->sysctl = NULL;
- unregister_net_sysctl_table(t->sysctl_header);
- kfree(t);
- }
-}
-
-static int dn_forwarding_proc(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
-#ifdef CONFIG_DECNET_ROUTER
- struct net_device *dev = table->extra1;
- struct dn_dev *dn_db;
- int err;
- int tmp, old;
-
- if (table->extra1 == NULL)
- return -EINVAL;
-
- dn_db = rcu_dereference_raw(dev->dn_ptr);
- old = dn_db->parms.forwarding;
-
- err = proc_dointvec(table, write, buffer, lenp, ppos);
-
- if ((err >= 0) && write) {
- if (dn_db->parms.forwarding < 0)
- dn_db->parms.forwarding = 0;
- if (dn_db->parms.forwarding > 2)
- dn_db->parms.forwarding = 2;
- /*
- * What an ugly hack this is... its works, just. It
- * would be nice if sysctl/proc were just that little
- * bit more flexible so I don't have to write a special
- * routine, or suffer hacks like this - SJW
- */
- tmp = dn_db->parms.forwarding;
- dn_db->parms.forwarding = old;
- if (dn_db->parms.down)
- dn_db->parms.down(dev);
- dn_db->parms.forwarding = tmp;
- if (dn_db->parms.up)
- dn_db->parms.up(dev);
- }
-
- return err;
-#else
- return -EINVAL;
-#endif
-}
-
-#else /* CONFIG_SYSCTL */
-static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms)
-{
-}
-static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms *parms)
-{
-}
-
-#endif /* CONFIG_SYSCTL */
-
-static inline __u16 mtu2blksize(struct net_device *dev)
-{
- u32 blksize = dev->mtu;
- if (blksize > 0xffff)
- blksize = 0xffff;
-
- if (dev->type == ARPHRD_ETHER ||
- dev->type == ARPHRD_PPP ||
- dev->type == ARPHRD_IPGRE ||
- dev->type == ARPHRD_LOOPBACK)
- blksize -= 2;
-
- return (__u16)blksize;
-}
-
-static struct dn_ifaddr *dn_dev_alloc_ifa(void)
-{
- struct dn_ifaddr *ifa;
-
- ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
-
- return ifa;
-}
-
-static void dn_dev_free_ifa(struct dn_ifaddr *ifa)
-{
- kfree_rcu(ifa, rcu);
-}
-
-static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr __rcu **ifap, int destroy)
-{
- struct dn_ifaddr *ifa1 = rtnl_dereference(*ifap);
- unsigned char mac_addr[6];
- struct net_device *dev = dn_db->dev;
-
- ASSERT_RTNL();
-
- *ifap = ifa1->ifa_next;
-
- if (dn_db->dev->type == ARPHRD_ETHER) {
- if (ifa1->ifa_local != dn_eth2dn(dev->dev_addr)) {
- dn_dn2eth(mac_addr, ifa1->ifa_local);
- dev_mc_del(dev, mac_addr);
- }
- }
-
- dn_ifaddr_notify(RTM_DELADDR, ifa1);
- blocking_notifier_call_chain(&dnaddr_chain, NETDEV_DOWN, ifa1);
- if (destroy) {
- dn_dev_free_ifa(ifa1);
-
- if (dn_db->ifa_list == NULL)
- dn_dev_delete(dn_db->dev);
- }
-}
-
-static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa)
-{
- struct net_device *dev = dn_db->dev;
- struct dn_ifaddr *ifa1;
- unsigned char mac_addr[6];
-
- ASSERT_RTNL();
-
- /* Check for duplicates */
- for (ifa1 = rtnl_dereference(dn_db->ifa_list);
- ifa1 != NULL;
- ifa1 = rtnl_dereference(ifa1->ifa_next)) {
- if (ifa1->ifa_local == ifa->ifa_local)
- return -EEXIST;
- }
-
- if (dev->type == ARPHRD_ETHER) {
- if (ifa->ifa_local != dn_eth2dn(dev->dev_addr)) {
- dn_dn2eth(mac_addr, ifa->ifa_local);
- dev_mc_add(dev, mac_addr);
- }
- }
-
- ifa->ifa_next = dn_db->ifa_list;
- rcu_assign_pointer(dn_db->ifa_list, ifa);
-
- dn_ifaddr_notify(RTM_NEWADDR, ifa);
- blocking_notifier_call_chain(&dnaddr_chain, NETDEV_UP, ifa);
-
- return 0;
-}
-
-static int dn_dev_set_ifa(struct net_device *dev, struct dn_ifaddr *ifa)
-{
- struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
- int rv;
-
- if (dn_db == NULL) {
- int err;
- dn_db = dn_dev_create(dev, &err);
- if (dn_db == NULL)
- return err;
- }
-
- ifa->ifa_dev = dn_db;
-
- if (dev->flags & IFF_LOOPBACK)
- ifa->ifa_scope = RT_SCOPE_HOST;
-
- rv = dn_dev_insert_ifa(dn_db, ifa);
- if (rv)
- dn_dev_free_ifa(ifa);
- return rv;
-}
-
-
-int dn_dev_ioctl(unsigned int cmd, void __user *arg)
-{
- char buffer[DN_IFREQ_SIZE];
- struct ifreq *ifr = (struct ifreq *)buffer;
- struct sockaddr_dn *sdn = (struct sockaddr_dn *)&ifr->ifr_addr;
- struct dn_dev *dn_db;
- struct net_device *dev;
- struct dn_ifaddr *ifa = NULL;
- struct dn_ifaddr __rcu **ifap = NULL;
- int ret = 0;
-
- if (copy_from_user(ifr, arg, DN_IFREQ_SIZE))
- return -EFAULT;
- ifr->ifr_name[IFNAMSIZ-1] = 0;
-
- dev_load(&init_net, ifr->ifr_name);
-
- switch (cmd) {
- case SIOCGIFADDR:
- break;
- case SIOCSIFADDR:
- if (!capable(CAP_NET_ADMIN))
- return -EACCES;
- if (sdn->sdn_family != AF_DECnet)
- return -EINVAL;
- break;
- default:
- return -EINVAL;
- }
-
- rtnl_lock();
-
- if ((dev = __dev_get_by_name(&init_net, ifr->ifr_name)) == NULL) {
- ret = -ENODEV;
- goto done;
- }
-
- if ((dn_db = rtnl_dereference(dev->dn_ptr)) != NULL) {
- for (ifap = &dn_db->ifa_list;
- (ifa = rtnl_dereference(*ifap)) != NULL;
- ifap = &ifa->ifa_next)
- if (strcmp(ifr->ifr_name, ifa->ifa_label) == 0)
- break;
- }
-
- if (ifa == NULL && cmd != SIOCSIFADDR) {
- ret = -EADDRNOTAVAIL;
- goto done;
- }
-
- switch (cmd) {
- case SIOCGIFADDR:
- *((__le16 *)sdn->sdn_nodeaddr) = ifa->ifa_local;
- if (copy_to_user(arg, ifr, DN_IFREQ_SIZE))
- ret = -EFAULT;
- break;
-
- case SIOCSIFADDR:
- if (!ifa) {
- if ((ifa = dn_dev_alloc_ifa()) == NULL) {
- ret = -ENOBUFS;
- break;
- }
- memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
- } else {
- if (ifa->ifa_local == dn_saddr2dn(sdn))
- break;
- dn_dev_del_ifa(dn_db, ifap, 0);
- }
-
- ifa->ifa_local = ifa->ifa_address = dn_saddr2dn(sdn);
-
- ret = dn_dev_set_ifa(dev, ifa);
- }
-done:
- rtnl_unlock();
-
- return ret;
-}
-
-struct net_device *dn_dev_get_default(void)
-{
- struct net_device *dev;
-
- spin_lock(&dndev_lock);
- dev = decnet_default_device;
- if (dev) {
- if (dev->dn_ptr)
- dev_hold(dev);
- else
- dev = NULL;
- }
- spin_unlock(&dndev_lock);
-
- return dev;
-}
-
-int dn_dev_set_default(struct net_device *dev, int force)
-{
- struct net_device *old = NULL;
- int rv = -EBUSY;
- if (!dev->dn_ptr)
- return -ENODEV;
-
- spin_lock(&dndev_lock);
- if (force || decnet_default_device == NULL) {
- old = decnet_default_device;
- decnet_default_device = dev;
- rv = 0;
- }
- spin_unlock(&dndev_lock);
-
- dev_put(old);
- return rv;
-}
-
-static void dn_dev_check_default(struct net_device *dev)
-{
- spin_lock(&dndev_lock);
- if (dev == decnet_default_device) {
- decnet_default_device = NULL;
- } else {
- dev = NULL;
- }
- spin_unlock(&dndev_lock);
-
- dev_put(dev);
-}
-
-/*
- * Called with RTNL
- */
-static struct dn_dev *dn_dev_by_index(int ifindex)
-{
- struct net_device *dev;
- struct dn_dev *dn_dev = NULL;
-
- dev = __dev_get_by_index(&init_net, ifindex);
- if (dev)
- dn_dev = rtnl_dereference(dev->dn_ptr);
-
- return dn_dev;
-}
-
-static const struct nla_policy dn_ifa_policy[IFA_MAX+1] = {
- [IFA_ADDRESS] = { .type = NLA_U16 },
- [IFA_LOCAL] = { .type = NLA_U16 },
- [IFA_LABEL] = { .type = NLA_STRING,
- .len = IFNAMSIZ - 1 },
- [IFA_FLAGS] = { .type = NLA_U32 },
-};
-
-static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
-{
- struct net *net = sock_net(skb->sk);
- struct nlattr *tb[IFA_MAX+1];
- struct dn_dev *dn_db;
- struct ifaddrmsg *ifm;
- struct dn_ifaddr *ifa;
- struct dn_ifaddr __rcu **ifap;
- int err = -EINVAL;
-
- if (!netlink_capable(skb, CAP_NET_ADMIN))
- return -EPERM;
-
- if (!net_eq(net, &init_net))
- goto errout;
-
- err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
- dn_ifa_policy, extack);
- if (err < 0)
- goto errout;
-
- err = -ENODEV;
- ifm = nlmsg_data(nlh);
- if ((dn_db = dn_dev_by_index(ifm->ifa_index)) == NULL)
- goto errout;
-
- err = -EADDRNOTAVAIL;
- for (ifap = &dn_db->ifa_list;
- (ifa = rtnl_dereference(*ifap)) != NULL;
- ifap = &ifa->ifa_next) {
- if (tb[IFA_LOCAL] &&
- nla_memcmp(tb[IFA_LOCAL], &ifa->ifa_local, 2))
- continue;
-
- if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
- continue;
-
- dn_dev_del_ifa(dn_db, ifap, 1);
- return 0;
- }
-
-errout:
- return err;
-}
-
-static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
-{
- struct net *net = sock_net(skb->sk);
- struct nlattr *tb[IFA_MAX+1];
- struct net_device *dev;
- struct dn_dev *dn_db;
- struct ifaddrmsg *ifm;
- struct dn_ifaddr *ifa;
- int err;
-
- if (!netlink_capable(skb, CAP_NET_ADMIN))
- return -EPERM;
-
- if (!net_eq(net, &init_net))
- return -EINVAL;
-
- err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
- dn_ifa_policy, extack);
- if (err < 0)
- return err;
-
- if (tb[IFA_LOCAL] == NULL)
- return -EINVAL;
-
- ifm = nlmsg_data(nlh);
- if ((dev = __dev_get_by_index(&init_net, ifm->ifa_index)) == NULL)
- return -ENODEV;
-
- if ((dn_db = rtnl_dereference(dev->dn_ptr)) == NULL) {
- dn_db = dn_dev_create(dev, &err);
- if (!dn_db)
- return err;
- }
-
- if ((ifa = dn_dev_alloc_ifa()) == NULL)
- return -ENOBUFS;
-
- if (tb[IFA_ADDRESS] == NULL)
- tb[IFA_ADDRESS] = tb[IFA_LOCAL];
-
- ifa->ifa_local = nla_get_le16(tb[IFA_LOCAL]);
- ifa->ifa_address = nla_get_le16(tb[IFA_ADDRESS]);
- ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
- ifm->ifa_flags;
- ifa->ifa_scope = ifm->ifa_scope;
- ifa->ifa_dev = dn_db;
-
- if (tb[IFA_LABEL])
- nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
- else
- memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
-
- err = dn_dev_insert_ifa(dn_db, ifa);
- if (err)
- dn_dev_free_ifa(ifa);
-
- return err;
-}
-
-static inline size_t dn_ifaddr_nlmsg_size(void)
-{
- return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
- + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
- + nla_total_size(2) /* IFA_ADDRESS */
- + nla_total_size(2) /* IFA_LOCAL */
- + nla_total_size(4); /* IFA_FLAGS */
-}
-
-static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa,
- u32 portid, u32 seq, int event, unsigned int flags)
-{
- struct ifaddrmsg *ifm;
- struct nlmsghdr *nlh;
- u32 ifa_flags = ifa->ifa_flags | IFA_F_PERMANENT;
-
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
- if (nlh == NULL)
- return -EMSGSIZE;
-
- ifm = nlmsg_data(nlh);
- ifm->ifa_family = AF_DECnet;
- ifm->ifa_prefixlen = 16;
- ifm->ifa_flags = ifa_flags;
- ifm->ifa_scope = ifa->ifa_scope;
- ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
-
- if ((ifa->ifa_address &&
- nla_put_le16(skb, IFA_ADDRESS, ifa->ifa_address)) ||
- (ifa->ifa_local &&
- nla_put_le16(skb, IFA_LOCAL, ifa->ifa_local)) ||
- (ifa->ifa_label[0] &&
- nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
- nla_put_u32(skb, IFA_FLAGS, ifa_flags))
- goto nla_put_failure;
- nlmsg_end(skb, nlh);
- return 0;
-
-nla_put_failure:
- nlmsg_cancel(skb, nlh);
- return -EMSGSIZE;
-}
-
-static void dn_ifaddr_notify(int event, struct dn_ifaddr *ifa)
-{
- struct sk_buff *skb;
- int err = -ENOBUFS;
-
- skb = alloc_skb(dn_ifaddr_nlmsg_size(), GFP_KERNEL);
- if (skb == NULL)
- goto errout;
-
- err = dn_nl_fill_ifaddr(skb, ifa, 0, 0, event, 0);
- if (err < 0) {
- /* -EMSGSIZE implies BUG in dn_ifaddr_nlmsg_size() */
- WARN_ON(err == -EMSGSIZE);
- kfree_skb(skb);
- goto errout;
- }
- rtnl_notify(skb, &init_net, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL);
- return;
-errout:
- if (err < 0)
- rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_IFADDR, err);
-}
-
-static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
-{
- struct net *net = sock_net(skb->sk);
- int idx, dn_idx = 0, skip_ndevs, skip_naddr;
- struct net_device *dev;
- struct dn_dev *dn_db;
- struct dn_ifaddr *ifa;
-
- if (!net_eq(net, &init_net))
- return 0;
-
- skip_ndevs = cb->args[0];
- skip_naddr = cb->args[1];
-
- idx = 0;
- rcu_read_lock();
- for_each_netdev_rcu(&init_net, dev) {
- if (idx < skip_ndevs)
- goto cont;
- else if (idx > skip_ndevs) {
- /* Only skip over addresses for first dev dumped
- * in this iteration (idx == skip_ndevs) */
- skip_naddr = 0;
- }
-
- if ((dn_db = rcu_dereference(dev->dn_ptr)) == NULL)
- goto cont;
-
- for (ifa = rcu_dereference(dn_db->ifa_list), dn_idx = 0; ifa;
- ifa = rcu_dereference(ifa->ifa_next), dn_idx++) {
- if (dn_idx < skip_naddr)
- continue;
-
- if (dn_nl_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, RTM_NEWADDR,
- NLM_F_MULTI) < 0)
- goto done;
- }
-cont:
- idx++;
- }
-done:
- rcu_read_unlock();
- cb->args[0] = idx;
- cb->args[1] = dn_idx;
-
- return skb->len;
-}
-
-static int dn_dev_get_first(struct net_device *dev, __le16 *addr)
-{
- struct dn_dev *dn_db;
- struct dn_ifaddr *ifa;
- int rv = -ENODEV;
-
- rcu_read_lock();
- dn_db = rcu_dereference(dev->dn_ptr);
- if (dn_db == NULL)
- goto out;
-
- ifa = rcu_dereference(dn_db->ifa_list);
- if (ifa != NULL) {
- *addr = ifa->ifa_local;
- rv = 0;
- }
-out:
- rcu_read_unlock();
- return rv;
-}
-
-/*
- * Find a default address to bind to.
- *
- * This is one of those areas where the initial VMS concepts don't really
- * map onto the Linux concepts, and since we introduced multiple addresses
- * per interface we have to cope with slightly odd ways of finding out what
- * "our address" really is. Mostly it's not a problem; for this we just guess
- * a sensible default. Eventually the routing code will take care of all the
- * nasties for us I hope.
- */
-int dn_dev_bind_default(__le16 *addr)
-{
- struct net_device *dev;
- int rv;
- dev = dn_dev_get_default();
-last_chance:
- if (dev) {
- rv = dn_dev_get_first(dev, addr);
- dev_put(dev);
- if (rv == 0 || dev == init_net.loopback_dev)
- return rv;
- }
- dev = init_net.loopback_dev;
- dev_hold(dev);
- goto last_chance;
-}
-
-static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa)
-{
- struct endnode_hello_message *msg;
- struct sk_buff *skb = NULL;
- __le16 *pktlen;
- struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
-
- if ((skb = dn_alloc_skb(NULL, sizeof(*msg), GFP_ATOMIC)) == NULL)
- return;
-
- skb->dev = dev;
-
- msg = skb_put(skb, sizeof(*msg));
-
- msg->msgflg = 0x0D;
- memcpy(msg->tiver, dn_eco_version, 3);
- dn_dn2eth(msg->id, ifa->ifa_local);
- msg->iinfo = DN_RT_INFO_ENDN;
- msg->blksize = cpu_to_le16(mtu2blksize(dev));
- msg->area = 0x00;
- memset(msg->seed, 0, 8);
- memcpy(msg->neighbor, dn_hiord, ETH_ALEN);
-
- if (dn_db->router) {
- struct dn_neigh *dn = container_of(dn_db->router, struct dn_neigh, n);
- dn_dn2eth(msg->neighbor, dn->addr);
- }
-
- msg->timer = cpu_to_le16((unsigned short)dn_db->parms.t3);
- msg->mpd = 0x00;
- msg->datalen = 0x02;
- memset(msg->data, 0xAA, 2);
-
- pktlen = skb_push(skb, 2);
- *pktlen = cpu_to_le16(skb->len - 2);
-
- skb_reset_network_header(skb);
-
- dn_rt_finish_output(skb, dn_rt_all_rt_mcast, msg->id);
-}
-
-
-#define DRDELAY (5 * HZ)
-
-static int dn_am_i_a_router(struct dn_neigh *dn, struct dn_dev *dn_db, struct dn_ifaddr *ifa)
-{
- /* First check time since device went up */
- if (time_before(jiffies, dn_db->uptime + DRDELAY))
- return 0;
-
- /* If there is no router, then yes... */
- if (!dn_db->router)
- return 1;
-
- /* otherwise only if we have a higher priority or.. */
- if (dn->priority < dn_db->parms.priority)
- return 1;
-
- /* if we have equal priority and a higher node number */
- if (dn->priority != dn_db->parms.priority)
- return 0;
-
- if (le16_to_cpu(dn->addr) < le16_to_cpu(ifa->ifa_local))
- return 1;
-
- return 0;
-}
-
-static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa)
-{
- int n;
- struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
- struct dn_neigh *dn = container_of(dn_db->router, struct dn_neigh, n);
- struct sk_buff *skb;
- size_t size;
- unsigned char *ptr;
- unsigned char *i1, *i2;
- __le16 *pktlen;
- char *src;
-
- if (mtu2blksize(dev) < (26 + 7))
- return;
-
- n = mtu2blksize(dev) - 26;
- n /= 7;
-
- if (n > 32)
- n = 32;
-
- size = 2 + 26 + 7 * n;
-
- if ((skb = dn_alloc_skb(NULL, size, GFP_ATOMIC)) == NULL)
- return;
-
- skb->dev = dev;
- ptr = skb_put(skb, size);
-
- *ptr++ = DN_RT_PKT_CNTL | DN_RT_PKT_ERTH;
- *ptr++ = 2; /* ECO */
- *ptr++ = 0;
- *ptr++ = 0;
- dn_dn2eth(ptr, ifa->ifa_local);
- src = ptr;
- ptr += ETH_ALEN;
- *ptr++ = dn_db->parms.forwarding == 1 ?
- DN_RT_INFO_L1RT : DN_RT_INFO_L2RT;
- *((__le16 *)ptr) = cpu_to_le16(mtu2blksize(dev));
- ptr += 2;
- *ptr++ = dn_db->parms.priority; /* Priority */
- *ptr++ = 0; /* Area: Reserved */
- *((__le16 *)ptr) = cpu_to_le16((unsigned short)dn_db->parms.t3);
- ptr += 2;
- *ptr++ = 0; /* MPD: Reserved */
- i1 = ptr++;
- memset(ptr, 0, 7); /* Name: Reserved */
- ptr += 7;
- i2 = ptr++;
-
- n = dn_neigh_elist(dev, ptr, n);
-
- *i2 = 7 * n;
- *i1 = 8 + *i2;
-
- skb_trim(skb, (27 + *i2));
-
- pktlen = skb_push(skb, 2);
- *pktlen = cpu_to_le16(skb->len - 2);
-
- skb_reset_network_header(skb);
-
- if (dn_am_i_a_router(dn, dn_db, ifa)) {
- struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
- if (skb2) {
- dn_rt_finish_output(skb2, dn_rt_all_end_mcast, src);
- }
- }
-
- dn_rt_finish_output(skb, dn_rt_all_rt_mcast, src);
-}
-
-static void dn_send_brd_hello(struct net_device *dev, struct dn_ifaddr *ifa)
-{
- struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
-
- if (dn_db->parms.forwarding == 0)
- dn_send_endnode_hello(dev, ifa);
- else
- dn_send_router_hello(dev, ifa);
-}
-
-static void dn_send_ptp_hello(struct net_device *dev, struct dn_ifaddr *ifa)
-{
- int tdlen = 16;
- int size = dev->hard_header_len + 2 + 4 + tdlen;
- struct sk_buff *skb = dn_alloc_skb(NULL, size, GFP_ATOMIC);
- int i;
- unsigned char *ptr;
- char src[ETH_ALEN];
-
- if (skb == NULL)
- return ;
-
- skb->dev = dev;
- skb_push(skb, dev->hard_header_len);
- ptr = skb_put(skb, 2 + 4 + tdlen);
-
- *ptr++ = DN_RT_PKT_HELO;
- *((__le16 *)ptr) = ifa->ifa_local;
- ptr += 2;
- *ptr++ = tdlen;
-
- for(i = 0; i < tdlen; i++)
- *ptr++ = 0252;
-
- dn_dn2eth(src, ifa->ifa_local);
- dn_rt_finish_output(skb, dn_rt_all_rt_mcast, src);
-}
-
-static int dn_eth_up(struct net_device *dev)
-{
- struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
-
- if (dn_db->parms.forwarding == 0)
- dev_mc_add(dev, dn_rt_all_end_mcast);
- else
- dev_mc_add(dev, dn_rt_all_rt_mcast);
-
- dn_db->use_long = 1;
-
- return 0;
-}
-
-static void dn_eth_down(struct net_device *dev)
-{
- struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
-
- if (dn_db->parms.forwarding == 0)
- dev_mc_del(dev, dn_rt_all_end_mcast);
- else
- dev_mc_del(dev, dn_rt_all_rt_mcast);
-}
-
-static void dn_dev_set_timer(struct net_device *dev);
-
-static void dn_dev_timer_func(struct timer_list *t)
-{
- struct dn_dev *dn_db = from_timer(dn_db, t, timer);
- struct net_device *dev;
- struct dn_ifaddr *ifa;
-
- rcu_read_lock();
- dev = dn_db->dev;
- if (dn_db->t3 <= dn_db->parms.t2) {
- if (dn_db->parms.timer3) {
- for (ifa = rcu_dereference(dn_db->ifa_list);
- ifa;
- ifa = rcu_dereference(ifa->ifa_next)) {
- if (!(ifa->ifa_flags & IFA_F_SECONDARY))
- dn_db->parms.timer3(dev, ifa);
- }
- }
- dn_db->t3 = dn_db->parms.t3;
- } else {
- dn_db->t3 -= dn_db->parms.t2;
- }
- rcu_read_unlock();
- dn_dev_set_timer(dev);
-}
-
-static void dn_dev_set_timer(struct net_device *dev)
-{
- struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
-
- if (dn_db->parms.t2 > dn_db->parms.t3)
- dn_db->parms.t2 = dn_db->parms.t3;
-
- dn_db->timer.expires = jiffies + (dn_db->parms.t2 * HZ);
-
- add_timer(&dn_db->timer);
-}
-
-static struct dn_dev *dn_dev_create(struct net_device *dev, int *err)
-{
- int i;
- struct dn_dev_parms *p = dn_dev_list;
- struct dn_dev *dn_db;
-
- for(i = 0; i < DN_DEV_LIST_SIZE; i++, p++) {
- if (p->type == dev->type)
- break;
- }
-
- *err = -ENODEV;
- if (i == DN_DEV_LIST_SIZE)
- return NULL;
-
- *err = -ENOBUFS;
- if ((dn_db = kzalloc(sizeof(struct dn_dev), GFP_ATOMIC)) == NULL)
- return NULL;
-
- memcpy(&dn_db->parms, p, sizeof(struct dn_dev_parms));
-
- rcu_assign_pointer(dev->dn_ptr, dn_db);
- dn_db->dev = dev;
- timer_setup(&dn_db->timer, dn_dev_timer_func, 0);
-
- dn_db->uptime = jiffies;
-
- dn_db->neigh_parms = neigh_parms_alloc(dev, &dn_neigh_table);
- if (!dn_db->neigh_parms) {
- RCU_INIT_POINTER(dev->dn_ptr, NULL);
- kfree(dn_db);
- return NULL;
- }
-
- if (dn_db->parms.up) {
- if (dn_db->parms.up(dev) < 0) {
- neigh_parms_release(&dn_neigh_table, dn_db->neigh_parms);
- dev->dn_ptr = NULL;
- kfree(dn_db);
- return NULL;
- }
- }
-
- dn_dev_sysctl_register(dev, &dn_db->parms);
-
- dn_dev_set_timer(dev);
-
- *err = 0;
- return dn_db;
-}
-
-
-/*
- * This processes a device up event. We only start up
- * the loopback device & ethernet devices with correct
- * MAC addresses automatically. Others must be started
- * specifically.
- *
- * FIXME: How should we configure the loopback address ? If we could dispense
- * with using decnet_address here and for autobind, it will be one less thing
- * for users to worry about setting up.
- */
-
-void dn_dev_up(struct net_device *dev)
-{
- struct dn_ifaddr *ifa;
- __le16 addr = decnet_address;
- int maybe_default = 0;
- struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
-
- if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK))
- return;
-
- /*
- * Need to ensure that loopback device has a dn_db attached to it
- * to allow creation of neighbours against it, even though it might
- * not have a local address of its own. Might as well do the same for
- * all autoconfigured interfaces.
- */
- if (dn_db == NULL) {
- int err;
- dn_db = dn_dev_create(dev, &err);
- if (dn_db == NULL)
- return;
- }
-
- if (dev->type == ARPHRD_ETHER) {
- if (memcmp(dev->dev_addr, dn_hiord, 4) != 0)
- return;
- addr = dn_eth2dn(dev->dev_addr);
- maybe_default = 1;
- }
-
- if (addr == 0)
- return;
-
- if ((ifa = dn_dev_alloc_ifa()) == NULL)
- return;
-
- ifa->ifa_local = ifa->ifa_address = addr;
- ifa->ifa_flags = 0;
- ifa->ifa_scope = RT_SCOPE_UNIVERSE;
- strcpy(ifa->ifa_label, dev->name);
-
- dn_dev_set_ifa(dev, ifa);
-
- /*
- * Automagically set the default device to the first automatically
- * configured ethernet card in the system.
- */
- if (maybe_default) {
- dev_hold(dev);
- if (dn_dev_set_default(dev, 0))
- dev_put(dev);
- }
-}
-
-static void dn_dev_delete(struct net_device *dev)
-{
- struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
-
- if (dn_db == NULL)
- return;
-
- del_timer_sync(&dn_db->timer);
- dn_dev_sysctl_unregister(&dn_db->parms);
- dn_dev_check_default(dev);
- neigh_ifdown(&dn_neigh_table, dev);
-
- if (dn_db->parms.down)
- dn_db->parms.down(dev);
-
- dev->dn_ptr = NULL;
-
- neigh_parms_release(&dn_neigh_table, dn_db->neigh_parms);
- neigh_ifdown(&dn_neigh_table, dev);
-
- if (dn_db->router)
- neigh_release(dn_db->router);
- if (dn_db->peer)
- neigh_release(dn_db->peer);
-
- kfree(dn_db);
-}
-
-void dn_dev_down(struct net_device *dev)
-{
- struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
- struct dn_ifaddr *ifa;
-
- if (dn_db == NULL)
- return;
-
- while ((ifa = rtnl_dereference(dn_db->ifa_list)) != NULL) {
- dn_dev_del_ifa(dn_db, &dn_db->ifa_list, 0);
- dn_dev_free_ifa(ifa);
- }
-
- dn_dev_delete(dev);
-}
-
-void dn_dev_init_pkt(struct sk_buff *skb)
-{
-}
-
-void dn_dev_veri_pkt(struct sk_buff *skb)
-{
-}
-
-void dn_dev_hello(struct sk_buff *skb)
-{
-}
-
-void dn_dev_devices_off(void)
-{
- struct net_device *dev;
-
- rtnl_lock();
- for_each_netdev(&init_net, dev)
- dn_dev_down(dev);
- rtnl_unlock();
-
-}
-
-void dn_dev_devices_on(void)
-{
- struct net_device *dev;
-
- rtnl_lock();
- for_each_netdev(&init_net, dev) {
- if (dev->flags & IFF_UP)
- dn_dev_up(dev);
- }
- rtnl_unlock();
-}
-
-int register_dnaddr_notifier(struct notifier_block *nb)
-{
- return blocking_notifier_chain_register(&dnaddr_chain, nb);
-}
-
-int unregister_dnaddr_notifier(struct notifier_block *nb)
-{
- return blocking_notifier_chain_unregister(&dnaddr_chain, nb);
-}
-
-#ifdef CONFIG_PROC_FS
-static inline int is_dn_dev(struct net_device *dev)
-{
- return dev->dn_ptr != NULL;
-}
-
-static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(RCU)
-{
- int i;
- struct net_device *dev;
-
- rcu_read_lock();
-
- if (*pos == 0)
- return SEQ_START_TOKEN;
-
- i = 1;
- for_each_netdev_rcu(&init_net, dev) {
- if (!is_dn_dev(dev))
- continue;
-
- if (i++ == *pos)
- return dev;
- }
-
- return NULL;
-}
-
-static void *dn_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct net_device *dev;
-
- ++*pos;
-
- dev = v;
- if (v == SEQ_START_TOKEN)
- dev = net_device_entry(&init_net.dev_base_head);
-
- for_each_netdev_continue_rcu(&init_net, dev) {
- if (!is_dn_dev(dev))
- continue;
-
- return dev;
- }
-
- return NULL;
-}
-
-static void dn_dev_seq_stop(struct seq_file *seq, void *v)
- __releases(RCU)
-{
- rcu_read_unlock();
-}
-
-static char *dn_type2asc(char type)
-{
- switch (type) {
- case DN_DEV_BCAST:
- return "B";
- case DN_DEV_UCAST:
- return "U";
- case DN_DEV_MPOINT:
- return "M";
- }
-
- return "?";
-}
-
-static int dn_dev_seq_show(struct seq_file *seq, void *v)
-{
- if (v == SEQ_START_TOKEN)
- seq_puts(seq, "Name Flags T1 Timer1 T3 Timer3 BlkSize Pri State DevType Router Peer\n");
- else {
- struct net_device *dev = v;
- char peer_buf[DN_ASCBUF_LEN];
- char router_buf[DN_ASCBUF_LEN];
- struct dn_dev *dn_db = rcu_dereference(dev->dn_ptr);
-
- seq_printf(seq, "%-8s %1s %04u %04u %04lu %04lu"
- " %04hu %03d %02x %-10s %-7s %-7s\n",
- dev->name,
- dn_type2asc(dn_db->parms.mode),
- 0, 0,
- dn_db->t3, dn_db->parms.t3,
- mtu2blksize(dev),
- dn_db->parms.priority,
- dn_db->parms.state, dn_db->parms.name,
- dn_db->router ? dn_addr2asc(le16_to_cpu(*(__le16 *)dn_db->router->primary_key), router_buf) : "",
- dn_db->peer ? dn_addr2asc(le16_to_cpu(*(__le16 *)dn_db->peer->primary_key), peer_buf) : "");
- }
- return 0;
-}
-
-static const struct seq_operations dn_dev_seq_ops = {
- .start = dn_dev_seq_start,
- .next = dn_dev_seq_next,
- .stop = dn_dev_seq_stop,
- .show = dn_dev_seq_show,
-};
-#endif /* CONFIG_PROC_FS */
-
-static int addr[2];
-module_param_array(addr, int, NULL, 0444);
-MODULE_PARM_DESC(addr, "The DECnet address of this machine: area,node");
-
-void __init dn_dev_init(void)
-{
- if (addr[0] > 63 || addr[0] < 0) {
- printk(KERN_ERR "DECnet: Area must be between 0 and 63");
- return;
- }
-
- if (addr[1] > 1023 || addr[1] < 0) {
- printk(KERN_ERR "DECnet: Node must be between 0 and 1023");
- return;
- }
-
- decnet_address = cpu_to_le16((addr[0] << 10) | addr[1]);
-
- dn_dev_devices_on();
-
- rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_NEWADDR,
- dn_nl_newaddr, NULL, 0);
- rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_DELADDR,
- dn_nl_deladdr, NULL, 0);
- rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETADDR,
- NULL, dn_nl_dump_ifaddr, 0);
-
- proc_create_seq("decnet_dev", 0444, init_net.proc_net, &dn_dev_seq_ops);
-
-#ifdef CONFIG_SYSCTL
- {
- int i;
- for(i = 0; i < DN_DEV_LIST_SIZE; i++)
- dn_dev_sysctl_register(NULL, &dn_dev_list[i]);
- }
-#endif /* CONFIG_SYSCTL */
-}
-
-void __exit dn_dev_cleanup(void)
-{
-#ifdef CONFIG_SYSCTL
- {
- int i;
- for(i = 0; i < DN_DEV_LIST_SIZE; i++)
- dn_dev_sysctl_unregister(&dn_dev_list[i]);
- }
-#endif /* CONFIG_SYSCTL */
-
- remove_proc_entry("decnet_dev", init_net.proc_net);
-
- dn_dev_devices_off();
-}
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
deleted file mode 100644
index 269c029ad74f..000000000000
--- a/net/decnet/dn_fib.c
+++ /dev/null
@@ -1,798 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Routing Forwarding Information Base (Glue/Info List)
- *
- * Author: Steve Whitehouse <SteveW@ACM.org>
- *
- *
- * Changes:
- * Alexey Kuznetsov : SMP locking changes
- * Steve Whitehouse : Rewrote it... Well to be more correct, I
- * copied most of it from the ipv4 fib code.
- * Steve Whitehouse : Updated it in style and fixed a few bugs
- * which were fixed in the ipv4 code since
- * this code was copied from it.
- *
- */
-#include <linux/string.h>
-#include <linux/net.h>
-#include <linux/socket.h>
-#include <linux/slab.h>
-#include <linux/sockios.h>
-#include <linux/init.h>
-#include <linux/skbuff.h>
-#include <linux/netlink.h>
-#include <linux/rtnetlink.h>
-#include <linux/proc_fs.h>
-#include <linux/netdevice.h>
-#include <linux/timer.h>
-#include <linux/spinlock.h>
-#include <linux/atomic.h>
-#include <linux/uaccess.h>
-#include <net/neighbour.h>
-#include <net/dst.h>
-#include <net/flow.h>
-#include <net/fib_rules.h>
-#include <net/dn.h>
-#include <net/dn_route.h>
-#include <net/dn_fib.h>
-#include <net/dn_neigh.h>
-#include <net/dn_dev.h>
-#include <net/rtnh.h>
-
-#define RT_MIN_TABLE 1
-
-#define for_fib_info() { struct dn_fib_info *fi;\
- for(fi = dn_fib_info_list; fi; fi = fi->fib_next)
-#define endfor_fib_info() }
-
-#define for_nexthops(fi) { int nhsel; const struct dn_fib_nh *nh;\
- for(nhsel = 0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
-
-#define change_nexthops(fi) { int nhsel; struct dn_fib_nh *nh;\
- for(nhsel = 0, nh = (struct dn_fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
-
-#define endfor_nexthops(fi) }
-
-static DEFINE_SPINLOCK(dn_fib_multipath_lock);
-static struct dn_fib_info *dn_fib_info_list;
-static DEFINE_SPINLOCK(dn_fib_info_lock);
-
-static struct
-{
- int error;
- u8 scope;
-} dn_fib_props[RTN_MAX+1] = {
- [RTN_UNSPEC] = { .error = 0, .scope = RT_SCOPE_NOWHERE },
- [RTN_UNICAST] = { .error = 0, .scope = RT_SCOPE_UNIVERSE },
- [RTN_LOCAL] = { .error = 0, .scope = RT_SCOPE_HOST },
- [RTN_BROADCAST] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE },
- [RTN_ANYCAST] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE },
- [RTN_MULTICAST] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE },
- [RTN_BLACKHOLE] = { .error = -EINVAL, .scope = RT_SCOPE_UNIVERSE },
- [RTN_UNREACHABLE] = { .error = -EHOSTUNREACH, .scope = RT_SCOPE_UNIVERSE },
- [RTN_PROHIBIT] = { .error = -EACCES, .scope = RT_SCOPE_UNIVERSE },
- [RTN_THROW] = { .error = -EAGAIN, .scope = RT_SCOPE_UNIVERSE },
- [RTN_NAT] = { .error = 0, .scope = RT_SCOPE_NOWHERE },
- [RTN_XRESOLVE] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE },
-};
-
-static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force);
-static int dn_fib_sync_up(struct net_device *dev);
-
-void dn_fib_free_info(struct dn_fib_info *fi)
-{
- if (fi->fib_dead == 0) {
- printk(KERN_DEBUG "DECnet: BUG! Attempt to free alive dn_fib_info\n");
- return;
- }
-
- change_nexthops(fi) {
- dev_put(nh->nh_dev);
- nh->nh_dev = NULL;
- } endfor_nexthops(fi);
- kfree(fi);
-}
-
-void dn_fib_release_info(struct dn_fib_info *fi)
-{
- spin_lock(&dn_fib_info_lock);
- if (fi && refcount_dec_and_test(&fi->fib_treeref)) {
- if (fi->fib_next)
- fi->fib_next->fib_prev = fi->fib_prev;
- if (fi->fib_prev)
- fi->fib_prev->fib_next = fi->fib_next;
- if (fi == dn_fib_info_list)
- dn_fib_info_list = fi->fib_next;
- fi->fib_dead = 1;
- dn_fib_info_put(fi);
- }
- spin_unlock(&dn_fib_info_lock);
-}
-
-static inline int dn_fib_nh_comp(const struct dn_fib_info *fi, const struct dn_fib_info *ofi)
-{
- const struct dn_fib_nh *onh = ofi->fib_nh;
-
- for_nexthops(fi) {
- if (nh->nh_oif != onh->nh_oif ||
- nh->nh_gw != onh->nh_gw ||
- nh->nh_scope != onh->nh_scope ||
- nh->nh_weight != onh->nh_weight ||
- ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
- return -1;
- onh++;
- } endfor_nexthops(fi);
- return 0;
-}
-
-static inline struct dn_fib_info *dn_fib_find_info(const struct dn_fib_info *nfi)
-{
- for_fib_info() {
- if (fi->fib_nhs != nfi->fib_nhs)
- continue;
- if (nfi->fib_protocol == fi->fib_protocol &&
- nfi->fib_prefsrc == fi->fib_prefsrc &&
- nfi->fib_priority == fi->fib_priority &&
- memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(fi->fib_metrics)) == 0 &&
- ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
- (nfi->fib_nhs == 0 || dn_fib_nh_comp(fi, nfi) == 0))
- return fi;
- } endfor_fib_info();
- return NULL;
-}
-
-static int dn_fib_count_nhs(const struct nlattr *attr)
-{
- struct rtnexthop *nhp = nla_data(attr);
- int nhs = 0, nhlen = nla_len(attr);
-
- while (rtnh_ok(nhp, nhlen)) {
- nhs++;
- nhp = rtnh_next(nhp, &nhlen);
- }
-
- /* leftover implies invalid nexthop configuration, discard it */
- return nhlen > 0 ? 0 : nhs;
-}
-
-static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr,
- const struct rtmsg *r)
-{
- struct rtnexthop *nhp = nla_data(attr);
- int nhlen = nla_len(attr);
-
- change_nexthops(fi) {
- int attrlen;
-
- if (!rtnh_ok(nhp, nhlen))
- return -EINVAL;
-
- nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
- nh->nh_oif = nhp->rtnh_ifindex;
- nh->nh_weight = nhp->rtnh_hops + 1;
-
- attrlen = rtnh_attrlen(nhp);
- if (attrlen > 0) {
- struct nlattr *gw_attr;
-
- gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY);
- nh->nh_gw = gw_attr ? nla_get_le16(gw_attr) : 0;
- }
-
- nhp = rtnh_next(nhp, &nhlen);
- } endfor_nexthops(fi);
-
- return 0;
-}
-
-
-static int dn_fib_check_nh(const struct rtmsg *r, struct dn_fib_info *fi, struct dn_fib_nh *nh)
-{
- int err;
-
- if (nh->nh_gw) {
- struct flowidn fld;
- struct dn_fib_res res;
-
- if (nh->nh_flags&RTNH_F_ONLINK) {
- struct net_device *dev;
-
- if (r->rtm_scope >= RT_SCOPE_LINK)
- return -EINVAL;
- if (dnet_addr_type(nh->nh_gw) != RTN_UNICAST)
- return -EINVAL;
- if ((dev = __dev_get_by_index(&init_net, nh->nh_oif)) == NULL)
- return -ENODEV;
- if (!(dev->flags&IFF_UP))
- return -ENETDOWN;
- nh->nh_dev = dev;
- dev_hold(dev);
- nh->nh_scope = RT_SCOPE_LINK;
- return 0;
- }
-
- memset(&fld, 0, sizeof(fld));
- fld.daddr = nh->nh_gw;
- fld.flowidn_oif = nh->nh_oif;
- fld.flowidn_scope = r->rtm_scope + 1;
-
- if (fld.flowidn_scope < RT_SCOPE_LINK)
- fld.flowidn_scope = RT_SCOPE_LINK;
-
- if ((err = dn_fib_lookup(&fld, &res)) != 0)
- return err;
-
- err = -EINVAL;
- if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
- goto out;
- nh->nh_scope = res.scope;
- nh->nh_oif = DN_FIB_RES_OIF(res);
- nh->nh_dev = DN_FIB_RES_DEV(res);
- if (nh->nh_dev == NULL)
- goto out;
- dev_hold(nh->nh_dev);
- err = -ENETDOWN;
- if (!(nh->nh_dev->flags & IFF_UP))
- goto out;
- err = 0;
-out:
- dn_fib_res_put(&res);
- return err;
- } else {
- struct net_device *dev;
-
- if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
- return -EINVAL;
-
- dev = __dev_get_by_index(&init_net, nh->nh_oif);
- if (dev == NULL || dev->dn_ptr == NULL)
- return -ENODEV;
- if (!(dev->flags&IFF_UP))
- return -ENETDOWN;
- nh->nh_dev = dev;
- dev_hold(nh->nh_dev);
- nh->nh_scope = RT_SCOPE_HOST;
- }
-
- return 0;
-}
-
-
-struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct nlattr *attrs[],
- const struct nlmsghdr *nlh, int *errp)
-{
- int err;
- struct dn_fib_info *fi = NULL;
- struct dn_fib_info *ofi;
- int nhs = 1;
-
- if (r->rtm_type > RTN_MAX)
- goto err_inval;
-
- if (dn_fib_props[r->rtm_type].scope > r->rtm_scope)
- goto err_inval;
-
- if (attrs[RTA_MULTIPATH] &&
- (nhs = dn_fib_count_nhs(attrs[RTA_MULTIPATH])) == 0)
- goto err_inval;
-
- fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL);
- err = -ENOBUFS;
- if (fi == NULL)
- goto failure;
-
- fi->fib_protocol = r->rtm_protocol;
- fi->fib_nhs = nhs;
- fi->fib_flags = r->rtm_flags;
-
- if (attrs[RTA_PRIORITY])
- fi->fib_priority = nla_get_u32(attrs[RTA_PRIORITY]);
-
- if (attrs[RTA_METRICS]) {
- struct nlattr *attr;
- int rem;
-
- nla_for_each_nested(attr, attrs[RTA_METRICS], rem) {
- int type = nla_type(attr);
-
- if (type) {
- if (type > RTAX_MAX || type == RTAX_CC_ALGO ||
- nla_len(attr) < 4)
- goto err_inval;
-
- fi->fib_metrics[type-1] = nla_get_u32(attr);
- }
- }
- }
-
- if (attrs[RTA_PREFSRC])
- fi->fib_prefsrc = nla_get_le16(attrs[RTA_PREFSRC]);
-
- if (attrs[RTA_MULTIPATH]) {
- if ((err = dn_fib_get_nhs(fi, attrs[RTA_MULTIPATH], r)) != 0)
- goto failure;
-
- if (attrs[RTA_OIF] &&
- fi->fib_nh->nh_oif != nla_get_u32(attrs[RTA_OIF]))
- goto err_inval;
-
- if (attrs[RTA_GATEWAY] &&
- fi->fib_nh->nh_gw != nla_get_le16(attrs[RTA_GATEWAY]))
- goto err_inval;
- } else {
- struct dn_fib_nh *nh = fi->fib_nh;
-
- if (attrs[RTA_OIF])
- nh->nh_oif = nla_get_u32(attrs[RTA_OIF]);
-
- if (attrs[RTA_GATEWAY])
- nh->nh_gw = nla_get_le16(attrs[RTA_GATEWAY]);
-
- nh->nh_flags = r->rtm_flags;
- nh->nh_weight = 1;
- }
-
- if (r->rtm_type == RTN_NAT) {
- if (!attrs[RTA_GATEWAY] || nhs != 1 || attrs[RTA_OIF])
- goto err_inval;
-
- fi->fib_nh->nh_gw = nla_get_le16(attrs[RTA_GATEWAY]);
- goto link_it;
- }
-
- if (dn_fib_props[r->rtm_type].error) {
- if (attrs[RTA_GATEWAY] || attrs[RTA_OIF] || attrs[RTA_MULTIPATH])
- goto err_inval;
-
- goto link_it;
- }
-
- if (r->rtm_scope > RT_SCOPE_HOST)
- goto err_inval;
-
- if (r->rtm_scope == RT_SCOPE_HOST) {
- struct dn_fib_nh *nh = fi->fib_nh;
-
- /* Local address is added */
- if (nhs != 1 || nh->nh_gw)
- goto err_inval;
- nh->nh_scope = RT_SCOPE_NOWHERE;
- nh->nh_dev = dev_get_by_index(&init_net, fi->fib_nh->nh_oif);
- err = -ENODEV;
- if (nh->nh_dev == NULL)
- goto failure;
- } else {
- change_nexthops(fi) {
- if ((err = dn_fib_check_nh(r, fi, nh)) != 0)
- goto failure;
- } endfor_nexthops(fi)
- }
-
- if (fi->fib_prefsrc) {
- if (r->rtm_type != RTN_LOCAL || !attrs[RTA_DST] ||
- fi->fib_prefsrc != nla_get_le16(attrs[RTA_DST]))
- if (dnet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
- goto err_inval;
- }
-
-link_it:
- if ((ofi = dn_fib_find_info(fi)) != NULL) {
- fi->fib_dead = 1;
- dn_fib_free_info(fi);
- refcount_inc(&ofi->fib_treeref);
- return ofi;
- }
-
- refcount_set(&fi->fib_treeref, 1);
- refcount_set(&fi->fib_clntref, 1);
- spin_lock(&dn_fib_info_lock);
- fi->fib_next = dn_fib_info_list;
- fi->fib_prev = NULL;
- if (dn_fib_info_list)
- dn_fib_info_list->fib_prev = fi;
- dn_fib_info_list = fi;
- spin_unlock(&dn_fib_info_lock);
- return fi;
-
-err_inval:
- err = -EINVAL;
-
-failure:
- *errp = err;
- if (fi) {
- fi->fib_dead = 1;
- dn_fib_free_info(fi);
- }
-
- return NULL;
-}
-
-int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowidn *fld, struct dn_fib_res *res)
-{
- int err = dn_fib_props[type].error;
-
- if (err == 0) {
- if (fi->fib_flags & RTNH_F_DEAD)
- return 1;
-
- res->fi = fi;
-
- switch (type) {
- case RTN_NAT:
- DN_FIB_RES_RESET(*res);
- refcount_inc(&fi->fib_clntref);
- return 0;
- case RTN_UNICAST:
- case RTN_LOCAL:
- for_nexthops(fi) {
- if (nh->nh_flags & RTNH_F_DEAD)
- continue;
- if (!fld->flowidn_oif ||
- fld->flowidn_oif == nh->nh_oif)
- break;
- }
- if (nhsel < fi->fib_nhs) {
- res->nh_sel = nhsel;
- refcount_inc(&fi->fib_clntref);
- return 0;
- }
- endfor_nexthops(fi);
- res->fi = NULL;
- return 1;
- default:
- net_err_ratelimited("DECnet: impossible routing event : dn_fib_semantic_match type=%d\n",
- type);
- res->fi = NULL;
- return -EINVAL;
- }
- }
- return err;
-}
-
-void dn_fib_select_multipath(const struct flowidn *fld, struct dn_fib_res *res)
-{
- struct dn_fib_info *fi = res->fi;
- int w;
-
- spin_lock_bh(&dn_fib_multipath_lock);
- if (fi->fib_power <= 0) {
- int power = 0;
- change_nexthops(fi) {
- if (!(nh->nh_flags&RTNH_F_DEAD)) {
- power += nh->nh_weight;
- nh->nh_power = nh->nh_weight;
- }
- } endfor_nexthops(fi);
- fi->fib_power = power;
- if (power < 0) {
- spin_unlock_bh(&dn_fib_multipath_lock);
- res->nh_sel = 0;
- return;
- }
- }
-
- w = jiffies % fi->fib_power;
-
- change_nexthops(fi) {
- if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
- if ((w -= nh->nh_power) <= 0) {
- nh->nh_power--;
- fi->fib_power--;
- res->nh_sel = nhsel;
- spin_unlock_bh(&dn_fib_multipath_lock);
- return;
- }
- }
- } endfor_nexthops(fi);
- res->nh_sel = 0;
- spin_unlock_bh(&dn_fib_multipath_lock);
-}
-
-static inline u32 rtm_get_table(struct nlattr *attrs[], u8 table)
-{
- if (attrs[RTA_TABLE])
- table = nla_get_u32(attrs[RTA_TABLE]);
-
- return table;
-}
-
-static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
-{
- struct net *net = sock_net(skb->sk);
- struct dn_fib_table *tb;
- struct rtmsg *r = nlmsg_data(nlh);
- struct nlattr *attrs[RTA_MAX+1];
- int err;
-
- if (!netlink_capable(skb, CAP_NET_ADMIN))
- return -EPERM;
-
- if (!net_eq(net, &init_net))
- return -EINVAL;
-
- err = nlmsg_parse_deprecated(nlh, sizeof(*r), attrs, RTA_MAX,
- rtm_dn_policy, extack);
- if (err < 0)
- return err;
-
- tb = dn_fib_get_table(rtm_get_table(attrs, r->rtm_table), 0);
- if (!tb)
- return -ESRCH;
-
- return tb->delete(tb, r, attrs, nlh, &NETLINK_CB(skb));
-}
-
-static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
-{
- struct net *net = sock_net(skb->sk);
- struct dn_fib_table *tb;
- struct rtmsg *r = nlmsg_data(nlh);
- struct nlattr *attrs[RTA_MAX+1];
- int err;
-
- if (!netlink_capable(skb, CAP_NET_ADMIN))
- return -EPERM;
-
- if (!net_eq(net, &init_net))
- return -EINVAL;
-
- err = nlmsg_parse_deprecated(nlh, sizeof(*r), attrs, RTA_MAX,
- rtm_dn_policy, extack);
- if (err < 0)
- return err;
-
- tb = dn_fib_get_table(rtm_get_table(attrs, r->rtm_table), 1);
- if (!tb)
- return -ENOBUFS;
-
- return tb->insert(tb, r, attrs, nlh, &NETLINK_CB(skb));
-}
-
-static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifaddr *ifa)
-{
- struct dn_fib_table *tb;
- struct {
- struct nlmsghdr nlh;
- struct rtmsg rtm;
- } req;
- struct {
- struct nlattr hdr;
- __le16 dst;
- } dst_attr = {
- .dst = dst,
- };
- struct {
- struct nlattr hdr;
- __le16 prefsrc;
- } prefsrc_attr = {
- .prefsrc = ifa->ifa_local,
- };
- struct {
- struct nlattr hdr;
- u32 oif;
- } oif_attr = {
- .oif = ifa->ifa_dev->dev->ifindex,
- };
- struct nlattr *attrs[RTA_MAX+1] = {
- [RTA_DST] = (struct nlattr *) &dst_attr,
- [RTA_PREFSRC] = (struct nlattr * ) &prefsrc_attr,
- [RTA_OIF] = (struct nlattr *) &oif_attr,
- };
-
- memset(&req.rtm, 0, sizeof(req.rtm));
-
- if (type == RTN_UNICAST)
- tb = dn_fib_get_table(RT_MIN_TABLE, 1);
- else
- tb = dn_fib_get_table(RT_TABLE_LOCAL, 1);
-
- if (tb == NULL)
- return;
-
- req.nlh.nlmsg_len = sizeof(req);
- req.nlh.nlmsg_type = cmd;
- req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
- req.nlh.nlmsg_pid = 0;
- req.nlh.nlmsg_seq = 0;
-
- req.rtm.rtm_dst_len = dst_len;
- req.rtm.rtm_table = tb->n;
- req.rtm.rtm_protocol = RTPROT_KERNEL;
- req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
- req.rtm.rtm_type = type;
-
- if (cmd == RTM_NEWROUTE)
- tb->insert(tb, &req.rtm, attrs, &req.nlh, NULL);
- else
- tb->delete(tb, &req.rtm, attrs, &req.nlh, NULL);
-}
-
-static void dn_fib_add_ifaddr(struct dn_ifaddr *ifa)
-{
-
- fib_magic(RTM_NEWROUTE, RTN_LOCAL, ifa->ifa_local, 16, ifa);
-
-#if 0
- if (!(dev->flags&IFF_UP))
- return;
- /* In the future, we will want to add default routes here */
-
-#endif
-}
-
-static void dn_fib_del_ifaddr(struct dn_ifaddr *ifa)
-{
- int found_it = 0;
- struct net_device *dev;
- struct dn_dev *dn_db;
- struct dn_ifaddr *ifa2;
-
- ASSERT_RTNL();
-
- /* Scan device list */
- rcu_read_lock();
- for_each_netdev_rcu(&init_net, dev) {
- dn_db = rcu_dereference(dev->dn_ptr);
- if (dn_db == NULL)
- continue;
- for (ifa2 = rcu_dereference(dn_db->ifa_list);
- ifa2 != NULL;
- ifa2 = rcu_dereference(ifa2->ifa_next)) {
- if (ifa2->ifa_local == ifa->ifa_local) {
- found_it = 1;
- break;
- }
- }
- }
- rcu_read_unlock();
-
- if (found_it == 0) {
- fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 16, ifa);
-
- if (dnet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
- if (dn_fib_sync_down(ifa->ifa_local, NULL, 0))
- dn_fib_flush();
- }
- }
-}
-
-static void dn_fib_disable_addr(struct net_device *dev, int force)
-{
- if (dn_fib_sync_down(0, dev, force))
- dn_fib_flush();
- dn_rt_cache_flush(0);
- neigh_ifdown(&dn_neigh_table, dev);
-}
-
-static int dn_fib_dnaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
-{
- struct dn_ifaddr *ifa = (struct dn_ifaddr *)ptr;
-
- switch (event) {
- case NETDEV_UP:
- dn_fib_add_ifaddr(ifa);
- dn_fib_sync_up(ifa->ifa_dev->dev);
- dn_rt_cache_flush(-1);
- break;
- case NETDEV_DOWN:
- dn_fib_del_ifaddr(ifa);
- if (ifa->ifa_dev && ifa->ifa_dev->ifa_list == NULL) {
- dn_fib_disable_addr(ifa->ifa_dev->dev, 1);
- } else {
- dn_rt_cache_flush(-1);
- }
- break;
- }
- return NOTIFY_DONE;
-}
-
-static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force)
-{
- int ret = 0;
- int scope = RT_SCOPE_NOWHERE;
-
- if (force)
- scope = -1;
-
- for_fib_info() {
- /*
- * This makes no sense for DECnet.... we will almost
- * certainly have more than one local address the same
- * over all our interfaces. It needs thinking about
- * some more.
- */
- if (local && fi->fib_prefsrc == local) {
- fi->fib_flags |= RTNH_F_DEAD;
- ret++;
- } else if (dev && fi->fib_nhs) {
- int dead = 0;
-
- change_nexthops(fi) {
- if (nh->nh_flags&RTNH_F_DEAD)
- dead++;
- else if (nh->nh_dev == dev &&
- nh->nh_scope != scope) {
- spin_lock_bh(&dn_fib_multipath_lock);
- nh->nh_flags |= RTNH_F_DEAD;
- fi->fib_power -= nh->nh_power;
- nh->nh_power = 0;
- spin_unlock_bh(&dn_fib_multipath_lock);
- dead++;
- }
- } endfor_nexthops(fi)
- if (dead == fi->fib_nhs) {
- fi->fib_flags |= RTNH_F_DEAD;
- ret++;
- }
- }
- } endfor_fib_info();
- return ret;
-}
-
-
-static int dn_fib_sync_up(struct net_device *dev)
-{
- int ret = 0;
-
- if (!(dev->flags&IFF_UP))
- return 0;
-
- for_fib_info() {
- int alive = 0;
-
- change_nexthops(fi) {
- if (!(nh->nh_flags&RTNH_F_DEAD)) {
- alive++;
- continue;
- }
- if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
- continue;
- if (nh->nh_dev != dev || dev->dn_ptr == NULL)
- continue;
- alive++;
- spin_lock_bh(&dn_fib_multipath_lock);
- nh->nh_power = 0;
- nh->nh_flags &= ~RTNH_F_DEAD;
- spin_unlock_bh(&dn_fib_multipath_lock);
- } endfor_nexthops(fi);
-
- if (alive > 0) {
- fi->fib_flags &= ~RTNH_F_DEAD;
- ret++;
- }
- } endfor_fib_info();
- return ret;
-}
-
-static struct notifier_block dn_fib_dnaddr_notifier = {
- .notifier_call = dn_fib_dnaddr_event,
-};
-
-void __exit dn_fib_cleanup(void)
-{
- dn_fib_table_cleanup();
- dn_fib_rules_cleanup();
-
- unregister_dnaddr_notifier(&dn_fib_dnaddr_notifier);
-}
-
-
-void __init dn_fib_init(void)
-{
- dn_fib_table_init();
- dn_fib_rules_init();
-
- register_dnaddr_notifier(&dn_fib_dnaddr_notifier);
-
- rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_NEWROUTE,
- dn_fib_rtm_newroute, NULL, 0);
- rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_DELROUTE,
- dn_fib_rtm_delroute, NULL, 0);
-}
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
deleted file mode 100644
index 7c569bcc0aca..000000000000
--- a/net/decnet/dn_neigh.c
+++ /dev/null
@@ -1,607 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Neighbour Functions (Adjacency Database and
- * On-Ethernet Cache)
- *
- * Author: Steve Whitehouse <SteveW@ACM.org>
- *
- *
- * Changes:
- * Steve Whitehouse : Fixed router listing routine
- * Steve Whitehouse : Added error_report functions
- * Steve Whitehouse : Added default router detection
- * Steve Whitehouse : Hop counts in outgoing messages
- * Steve Whitehouse : Fixed src/dst in outgoing messages so
- * forwarding now stands a good chance of
- * working.
- * Steve Whitehouse : Fixed neighbour states (for now anyway).
- * Steve Whitehouse : Made error_report functions dummies. This
- * is not the right place to return skbs.
- * Steve Whitehouse : Convert to seq_file
- *
- */
-
-#include <linux/net.h>
-#include <linux/module.h>
-#include <linux/socket.h>
-#include <linux/if_arp.h>
-#include <linux/slab.h>
-#include <linux/if_ether.h>
-#include <linux/init.h>
-#include <linux/proc_fs.h>
-#include <linux/string.h>
-#include <linux/netfilter_decnet.h>
-#include <linux/spinlock.h>
-#include <linux/seq_file.h>
-#include <linux/rcupdate.h>
-#include <linux/jhash.h>
-#include <linux/atomic.h>
-#include <net/net_namespace.h>
-#include <net/neighbour.h>
-#include <net/dst.h>
-#include <net/flow.h>
-#include <net/dn.h>
-#include <net/dn_dev.h>
-#include <net/dn_neigh.h>
-#include <net/dn_route.h>
-
-static int dn_neigh_construct(struct neighbour *);
-static void dn_neigh_error_report(struct neighbour *, struct sk_buff *);
-static int dn_neigh_output(struct neighbour *neigh, struct sk_buff *skb);
-
-/*
- * Operations for adding the link layer header.
- */
-static const struct neigh_ops dn_neigh_ops = {
- .family = AF_DECnet,
- .error_report = dn_neigh_error_report,
- .output = dn_neigh_output,
- .connected_output = dn_neigh_output,
-};
-
-static u32 dn_neigh_hash(const void *pkey,
- const struct net_device *dev,
- __u32 *hash_rnd)
-{
- return jhash_2words(*(__u16 *)pkey, 0, hash_rnd[0]);
-}
-
-static bool dn_key_eq(const struct neighbour *neigh, const void *pkey)
-{
- return neigh_key_eq16(neigh, pkey);
-}
-
-struct neigh_table dn_neigh_table = {
- .family = PF_DECnet,
- .entry_size = NEIGH_ENTRY_SIZE(sizeof(struct dn_neigh)),
- .key_len = sizeof(__le16),
- .protocol = cpu_to_be16(ETH_P_DNA_RT),
- .hash = dn_neigh_hash,
- .key_eq = dn_key_eq,
- .constructor = dn_neigh_construct,
- .id = "dn_neigh_cache",
- .parms ={
- .tbl = &dn_neigh_table,
- .reachable_time = 30 * HZ,
- .data = {
- [NEIGH_VAR_MCAST_PROBES] = 0,
- [NEIGH_VAR_UCAST_PROBES] = 0,
- [NEIGH_VAR_APP_PROBES] = 0,
- [NEIGH_VAR_RETRANS_TIME] = 1 * HZ,
- [NEIGH_VAR_BASE_REACHABLE_TIME] = 30 * HZ,
- [NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
- [NEIGH_VAR_INTERVAL_PROBE_TIME_MS] = 5 * HZ,
- [NEIGH_VAR_GC_STALETIME] = 60 * HZ,
- [NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX,
- [NEIGH_VAR_PROXY_QLEN] = 0,
- [NEIGH_VAR_ANYCAST_DELAY] = 0,
- [NEIGH_VAR_PROXY_DELAY] = 0,
- [NEIGH_VAR_LOCKTIME] = 1 * HZ,
- },
- },
- .gc_interval = 30 * HZ,
- .gc_thresh1 = 128,
- .gc_thresh2 = 512,
- .gc_thresh3 = 1024,
-};
-
-static int dn_neigh_construct(struct neighbour *neigh)
-{
- struct net_device *dev = neigh->dev;
- struct dn_neigh *dn = container_of(neigh, struct dn_neigh, n);
- struct dn_dev *dn_db;
- struct neigh_parms *parms;
-
- rcu_read_lock();
- dn_db = rcu_dereference(dev->dn_ptr);
- if (dn_db == NULL) {
- rcu_read_unlock();
- return -EINVAL;
- }
-
- parms = dn_db->neigh_parms;
- if (!parms) {
- rcu_read_unlock();
- return -EINVAL;
- }
-
- __neigh_parms_put(neigh->parms);
- neigh->parms = neigh_parms_clone(parms);
- rcu_read_unlock();
-
- neigh->ops = &dn_neigh_ops;
- neigh->nud_state = NUD_NOARP;
- neigh->output = neigh->ops->connected_output;
-
- if ((dev->type == ARPHRD_IPGRE) || (dev->flags & IFF_POINTOPOINT))
- memcpy(neigh->ha, dev->broadcast, dev->addr_len);
- else if ((dev->type == ARPHRD_ETHER) || (dev->type == ARPHRD_LOOPBACK))
- dn_dn2eth(neigh->ha, dn->addr);
- else {
- net_dbg_ratelimited("Trying to create neigh for hw %d\n",
- dev->type);
- return -EINVAL;
- }
-
- /*
- * Make an estimate of the remote block size by assuming that its
- * two less then the device mtu, which it true for ethernet (and
- * other things which support long format headers) since there is
- * an extra length field (of 16 bits) which isn't part of the
- * ethernet headers and which the DECnet specs won't admit is part
- * of the DECnet routing headers either.
- *
- * If we over estimate here its no big deal, the NSP negotiations
- * will prevent us from sending packets which are too large for the
- * remote node to handle. In any case this figure is normally updated
- * by a hello message in most cases.
- */
- dn->blksize = dev->mtu - 2;
-
- return 0;
-}
-
-static void dn_neigh_error_report(struct neighbour *neigh, struct sk_buff *skb)
-{
- printk(KERN_DEBUG "dn_neigh_error_report: called\n");
- kfree_skb(skb);
-}
-
-static int dn_neigh_output(struct neighbour *neigh, struct sk_buff *skb)
-{
- struct dst_entry *dst = skb_dst(skb);
- struct dn_route *rt = (struct dn_route *)dst;
- struct net_device *dev = neigh->dev;
- char mac_addr[ETH_ALEN];
- unsigned int seq;
- int err;
-
- dn_dn2eth(mac_addr, rt->rt_local_src);
- do {
- seq = read_seqbegin(&neigh->ha_lock);
- err = dev_hard_header(skb, dev, ntohs(skb->protocol),
- neigh->ha, mac_addr, skb->len);
- } while (read_seqretry(&neigh->ha_lock, seq));
-
- if (err >= 0)
- err = dev_queue_xmit(skb);
- else {
- kfree_skb(skb);
- err = -EINVAL;
- }
- return err;
-}
-
-static int dn_neigh_output_packet(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- struct dst_entry *dst = skb_dst(skb);
- struct dn_route *rt = (struct dn_route *)dst;
- struct neighbour *neigh = rt->n;
-
- return neigh->output(neigh, skb);
-}
-
-/*
- * For talking to broadcast devices: Ethernet & PPP
- */
-static int dn_long_output(struct neighbour *neigh, struct sock *sk,
- struct sk_buff *skb)
-{
- struct net_device *dev = neigh->dev;
- int headroom = dev->hard_header_len + sizeof(struct dn_long_packet) + 3;
- unsigned char *data;
- struct dn_long_packet *lp;
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
-
-
- if (skb_headroom(skb) < headroom) {
- struct sk_buff *skb2 = skb_realloc_headroom(skb, headroom);
- if (skb2 == NULL) {
- net_crit_ratelimited("dn_long_output: no memory\n");
- kfree_skb(skb);
- return -ENOBUFS;
- }
- consume_skb(skb);
- skb = skb2;
- net_info_ratelimited("dn_long_output: Increasing headroom\n");
- }
-
- data = skb_push(skb, sizeof(struct dn_long_packet) + 3);
- lp = (struct dn_long_packet *)(data+3);
-
- *((__le16 *)data) = cpu_to_le16(skb->len - 2);
- *(data + 2) = 1 | DN_RT_F_PF; /* Padding */
-
- lp->msgflg = DN_RT_PKT_LONG|(cb->rt_flags&(DN_RT_F_IE|DN_RT_F_RQR|DN_RT_F_RTS));
- lp->d_area = lp->d_subarea = 0;
- dn_dn2eth(lp->d_id, cb->dst);
- lp->s_area = lp->s_subarea = 0;
- dn_dn2eth(lp->s_id, cb->src);
- lp->nl2 = 0;
- lp->visit_ct = cb->hops & 0x3f;
- lp->s_class = 0;
- lp->pt = 0;
-
- skb_reset_network_header(skb);
-
- return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING,
- &init_net, sk, skb, NULL, neigh->dev,
- dn_neigh_output_packet);
-}
-
-/*
- * For talking to pointopoint and multidrop devices: DDCMP and X.25
- */
-static int dn_short_output(struct neighbour *neigh, struct sock *sk,
- struct sk_buff *skb)
-{
- struct net_device *dev = neigh->dev;
- int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2;
- struct dn_short_packet *sp;
- unsigned char *data;
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
-
-
- if (skb_headroom(skb) < headroom) {
- struct sk_buff *skb2 = skb_realloc_headroom(skb, headroom);
- if (skb2 == NULL) {
- net_crit_ratelimited("dn_short_output: no memory\n");
- kfree_skb(skb);
- return -ENOBUFS;
- }
- consume_skb(skb);
- skb = skb2;
- net_info_ratelimited("dn_short_output: Increasing headroom\n");
- }
-
- data = skb_push(skb, sizeof(struct dn_short_packet) + 2);
- *((__le16 *)data) = cpu_to_le16(skb->len - 2);
- sp = (struct dn_short_packet *)(data+2);
-
- sp->msgflg = DN_RT_PKT_SHORT|(cb->rt_flags&(DN_RT_F_RQR|DN_RT_F_RTS));
- sp->dstnode = cb->dst;
- sp->srcnode = cb->src;
- sp->forward = cb->hops & 0x3f;
-
- skb_reset_network_header(skb);
-
- return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING,
- &init_net, sk, skb, NULL, neigh->dev,
- dn_neigh_output_packet);
-}
-
-/*
- * For talking to DECnet phase III nodes
- * Phase 3 output is the same as short output, execpt that
- * it clears the area bits before transmission.
- */
-static int dn_phase3_output(struct neighbour *neigh, struct sock *sk,
- struct sk_buff *skb)
-{
- struct net_device *dev = neigh->dev;
- int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2;
- struct dn_short_packet *sp;
- unsigned char *data;
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
-
- if (skb_headroom(skb) < headroom) {
- struct sk_buff *skb2 = skb_realloc_headroom(skb, headroom);
- if (skb2 == NULL) {
- net_crit_ratelimited("dn_phase3_output: no memory\n");
- kfree_skb(skb);
- return -ENOBUFS;
- }
- consume_skb(skb);
- skb = skb2;
- net_info_ratelimited("dn_phase3_output: Increasing headroom\n");
- }
-
- data = skb_push(skb, sizeof(struct dn_short_packet) + 2);
- *((__le16 *)data) = cpu_to_le16(skb->len - 2);
- sp = (struct dn_short_packet *)(data + 2);
-
- sp->msgflg = DN_RT_PKT_SHORT|(cb->rt_flags&(DN_RT_F_RQR|DN_RT_F_RTS));
- sp->dstnode = cb->dst & cpu_to_le16(0x03ff);
- sp->srcnode = cb->src & cpu_to_le16(0x03ff);
- sp->forward = cb->hops & 0x3f;
-
- skb_reset_network_header(skb);
-
- return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING,
- &init_net, sk, skb, NULL, neigh->dev,
- dn_neigh_output_packet);
-}
-
-int dn_to_neigh_output(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- struct dst_entry *dst = skb_dst(skb);
- struct dn_route *rt = (struct dn_route *) dst;
- struct neighbour *neigh = rt->n;
- struct dn_neigh *dn = container_of(neigh, struct dn_neigh, n);
- struct dn_dev *dn_db;
- bool use_long;
-
- rcu_read_lock();
- dn_db = rcu_dereference(neigh->dev->dn_ptr);
- if (dn_db == NULL) {
- rcu_read_unlock();
- return -EINVAL;
- }
- use_long = dn_db->use_long;
- rcu_read_unlock();
-
- if (dn->flags & DN_NDFLAG_P3)
- return dn_phase3_output(neigh, sk, skb);
- if (use_long)
- return dn_long_output(neigh, sk, skb);
- else
- return dn_short_output(neigh, sk, skb);
-}
-
-/*
- * Unfortunately, the neighbour code uses the device in its hash
- * function, so we don't get any advantage from it. This function
- * basically does a neigh_lookup(), but without comparing the device
- * field. This is required for the On-Ethernet cache
- */
-
-/*
- * Pointopoint link receives a hello message
- */
-void dn_neigh_pointopoint_hello(struct sk_buff *skb)
-{
- kfree_skb(skb);
-}
-
-/*
- * Ethernet router hello message received
- */
-int dn_neigh_router_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- struct rtnode_hello_message *msg = (struct rtnode_hello_message *)skb->data;
-
- struct neighbour *neigh;
- struct dn_neigh *dn;
- struct dn_dev *dn_db;
- __le16 src;
-
- src = dn_eth2dn(msg->id);
-
- neigh = __neigh_lookup(&dn_neigh_table, &src, skb->dev, 1);
-
- dn = container_of(neigh, struct dn_neigh, n);
-
- if (neigh) {
- write_lock(&neigh->lock);
-
- neigh->used = jiffies;
- dn_db = rcu_dereference(neigh->dev->dn_ptr);
-
- if (!(neigh->nud_state & NUD_PERMANENT)) {
- neigh->updated = jiffies;
-
- if (neigh->dev->type == ARPHRD_ETHER)
- memcpy(neigh->ha, &eth_hdr(skb)->h_source, ETH_ALEN);
-
- dn->blksize = le16_to_cpu(msg->blksize);
- dn->priority = msg->priority;
-
- dn->flags &= ~DN_NDFLAG_P3;
-
- switch (msg->iinfo & DN_RT_INFO_TYPE) {
- case DN_RT_INFO_L1RT:
- dn->flags &=~DN_NDFLAG_R2;
- dn->flags |= DN_NDFLAG_R1;
- break;
- case DN_RT_INFO_L2RT:
- dn->flags |= DN_NDFLAG_R2;
- }
- }
-
- /* Only use routers in our area */
- if ((le16_to_cpu(src)>>10) == (le16_to_cpu((decnet_address))>>10)) {
- if (!dn_db->router) {
- dn_db->router = neigh_clone(neigh);
- } else {
- if (msg->priority > container_of(dn_db->router,
- struct dn_neigh, n)->priority)
- neigh_release(xchg(&dn_db->router, neigh_clone(neigh)));
- }
- }
- write_unlock(&neigh->lock);
- neigh_release(neigh);
- }
-
- kfree_skb(skb);
- return 0;
-}
-
-/*
- * Endnode hello message received
- */
-int dn_neigh_endnode_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- struct endnode_hello_message *msg = (struct endnode_hello_message *)skb->data;
- struct neighbour *neigh;
- struct dn_neigh *dn;
- __le16 src;
-
- src = dn_eth2dn(msg->id);
-
- neigh = __neigh_lookup(&dn_neigh_table, &src, skb->dev, 1);
-
- dn = container_of(neigh, struct dn_neigh, n);
-
- if (neigh) {
- write_lock(&neigh->lock);
-
- neigh->used = jiffies;
-
- if (!(neigh->nud_state & NUD_PERMANENT)) {
- neigh->updated = jiffies;
-
- if (neigh->dev->type == ARPHRD_ETHER)
- memcpy(neigh->ha, &eth_hdr(skb)->h_source, ETH_ALEN);
- dn->flags &= ~(DN_NDFLAG_R1 | DN_NDFLAG_R2);
- dn->blksize = le16_to_cpu(msg->blksize);
- dn->priority = 0;
- }
-
- write_unlock(&neigh->lock);
- neigh_release(neigh);
- }
-
- kfree_skb(skb);
- return 0;
-}
-
-static char *dn_find_slot(char *base, int max, int priority)
-{
- int i;
- unsigned char *min = NULL;
-
- base += 6; /* skip first id */
-
- for(i = 0; i < max; i++) {
- if (!min || (*base < *min))
- min = base;
- base += 7; /* find next priority */
- }
-
- if (!min)
- return NULL;
-
- return (*min < priority) ? (min - 6) : NULL;
-}
-
-struct elist_cb_state {
- struct net_device *dev;
- unsigned char *ptr;
- unsigned char *rs;
- int t, n;
-};
-
-static void neigh_elist_cb(struct neighbour *neigh, void *_info)
-{
- struct elist_cb_state *s = _info;
- struct dn_neigh *dn;
-
- if (neigh->dev != s->dev)
- return;
-
- dn = container_of(neigh, struct dn_neigh, n);
- if (!(dn->flags & (DN_NDFLAG_R1|DN_NDFLAG_R2)))
- return;
-
- if (s->t == s->n)
- s->rs = dn_find_slot(s->ptr, s->n, dn->priority);
- else
- s->t++;
- if (s->rs == NULL)
- return;
-
- dn_dn2eth(s->rs, dn->addr);
- s->rs += 6;
- *(s->rs) = neigh->nud_state & NUD_CONNECTED ? 0x80 : 0x0;
- *(s->rs) |= dn->priority;
- s->rs++;
-}
-
-int dn_neigh_elist(struct net_device *dev, unsigned char *ptr, int n)
-{
- struct elist_cb_state state;
-
- state.dev = dev;
- state.t = 0;
- state.n = n;
- state.ptr = ptr;
- state.rs = ptr;
-
- neigh_for_each(&dn_neigh_table, neigh_elist_cb, &state);
-
- return state.t;
-}
-
-
-#ifdef CONFIG_PROC_FS
-
-static inline void dn_neigh_format_entry(struct seq_file *seq,
- struct neighbour *n)
-{
- struct dn_neigh *dn = container_of(n, struct dn_neigh, n);
- char buf[DN_ASCBUF_LEN];
-
- read_lock(&n->lock);
- seq_printf(seq, "%-7s %s%s%s %02x %02d %07ld %-8s\n",
- dn_addr2asc(le16_to_cpu(dn->addr), buf),
- (dn->flags&DN_NDFLAG_R1) ? "1" : "-",
- (dn->flags&DN_NDFLAG_R2) ? "2" : "-",
- (dn->flags&DN_NDFLAG_P3) ? "3" : "-",
- dn->n.nud_state,
- refcount_read(&dn->n.refcnt),
- dn->blksize,
- (dn->n.dev) ? dn->n.dev->name : "?");
- read_unlock(&n->lock);
-}
-
-static int dn_neigh_seq_show(struct seq_file *seq, void *v)
-{
- if (v == SEQ_START_TOKEN) {
- seq_puts(seq, "Addr Flags State Use Blksize Dev\n");
- } else {
- dn_neigh_format_entry(seq, v);
- }
-
- return 0;
-}
-
-static void *dn_neigh_seq_start(struct seq_file *seq, loff_t *pos)
-{
- return neigh_seq_start(seq, pos, &dn_neigh_table,
- NEIGH_SEQ_NEIGH_ONLY);
-}
-
-static const struct seq_operations dn_neigh_seq_ops = {
- .start = dn_neigh_seq_start,
- .next = neigh_seq_next,
- .stop = neigh_seq_stop,
- .show = dn_neigh_seq_show,
-};
-#endif
-
-void __init dn_neigh_init(void)
-{
- neigh_table_init(NEIGH_DN_TABLE, &dn_neigh_table);
- proc_create_net("decnet_neigh", 0444, init_net.proc_net,
- &dn_neigh_seq_ops, sizeof(struct neigh_seq_state));
-}
-
-void __exit dn_neigh_cleanup(void)
-{
- remove_proc_entry("decnet_neigh", init_net.proc_net);
- neigh_table_clear(NEIGH_DN_TABLE, &dn_neigh_table);
-}
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
deleted file mode 100644
index c59be5b04479..000000000000
--- a/net/decnet/dn_nsp_in.c
+++ /dev/null
@@ -1,907 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Network Services Protocol (Input)
- *
- * Author: Eduardo Marcelo Serrat <emserrat@geocities.com>
- *
- * Changes:
- *
- * Steve Whitehouse: Split into dn_nsp_in.c and dn_nsp_out.c from
- * original dn_nsp.c.
- * Steve Whitehouse: Updated to work with my new routing architecture.
- * Steve Whitehouse: Add changes from Eduardo Serrat's patches.
- * Steve Whitehouse: Put all ack handling code in a common routine.
- * Steve Whitehouse: Put other common bits into dn_nsp_rx()
- * Steve Whitehouse: More checks on skb->len to catch bogus packets
- * Fixed various race conditions and possible nasties.
- * Steve Whitehouse: Now handles returned conninit frames.
- * David S. Miller: New socket locking
- * Steve Whitehouse: Fixed lockup when socket filtering was enabled.
- * Paul Koning: Fix to push CC sockets into RUN when acks are
- * received.
- * Steve Whitehouse:
- * Patrick Caulfield: Checking conninits for correctness & sending of error
- * responses.
- * Steve Whitehouse: Added backlog congestion level return codes.
- * Patrick Caulfield:
- * Steve Whitehouse: Added flow control support (outbound)
- * Steve Whitehouse: Prepare for nonlinear skbs
- */
-
-/******************************************************************************
- (c) 1995-1998 E.M. Serrat emserrat@geocities.com
-
-*******************************************************************************/
-
-#include <linux/errno.h>
-#include <linux/filter.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/netdevice.h>
-#include <linux/inet.h>
-#include <linux/route.h>
-#include <linux/slab.h>
-#include <net/sock.h>
-#include <net/tcp_states.h>
-#include <linux/fcntl.h>
-#include <linux/mm.h>
-#include <linux/termios.h>
-#include <linux/interrupt.h>
-#include <linux/proc_fs.h>
-#include <linux/stat.h>
-#include <linux/init.h>
-#include <linux/poll.h>
-#include <linux/netfilter_decnet.h>
-#include <net/neighbour.h>
-#include <net/dst.h>
-#include <net/dn.h>
-#include <net/dn_nsp.h>
-#include <net/dn_dev.h>
-#include <net/dn_route.h>
-
-extern int decnet_log_martians;
-
-static void dn_log_martian(struct sk_buff *skb, const char *msg)
-{
- if (decnet_log_martians) {
- char *devname = skb->dev ? skb->dev->name : "???";
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- net_info_ratelimited("DECnet: Martian packet (%s) dev=%s src=0x%04hx dst=0x%04hx srcport=0x%04hx dstport=0x%04hx\n",
- msg, devname,
- le16_to_cpu(cb->src),
- le16_to_cpu(cb->dst),
- le16_to_cpu(cb->src_port),
- le16_to_cpu(cb->dst_port));
- }
-}
-
-/*
- * For this function we've flipped the cross-subchannel bit
- * if the message is an otherdata or linkservice message. Thus
- * we can use it to work out what to update.
- */
-static void dn_ack(struct sock *sk, struct sk_buff *skb, unsigned short ack)
-{
- struct dn_scp *scp = DN_SK(sk);
- unsigned short type = ((ack >> 12) & 0x0003);
- int wakeup = 0;
-
- switch (type) {
- case 0: /* ACK - Data */
- if (dn_after(ack, scp->ackrcv_dat)) {
- scp->ackrcv_dat = ack & 0x0fff;
- wakeup |= dn_nsp_check_xmit_queue(sk, skb,
- &scp->data_xmit_queue,
- ack);
- }
- break;
- case 1: /* NAK - Data */
- break;
- case 2: /* ACK - OtherData */
- if (dn_after(ack, scp->ackrcv_oth)) {
- scp->ackrcv_oth = ack & 0x0fff;
- wakeup |= dn_nsp_check_xmit_queue(sk, skb,
- &scp->other_xmit_queue,
- ack);
- }
- break;
- case 3: /* NAK - OtherData */
- break;
- }
-
- if (wakeup && !sock_flag(sk, SOCK_DEAD))
- sk->sk_state_change(sk);
-}
-
-/*
- * This function is a universal ack processor.
- */
-static int dn_process_ack(struct sock *sk, struct sk_buff *skb, int oth)
-{
- __le16 *ptr = (__le16 *)skb->data;
- int len = 0;
- unsigned short ack;
-
- if (skb->len < 2)
- return len;
-
- if ((ack = le16_to_cpu(*ptr)) & 0x8000) {
- skb_pull(skb, 2);
- ptr++;
- len += 2;
- if ((ack & 0x4000) == 0) {
- if (oth)
- ack ^= 0x2000;
- dn_ack(sk, skb, ack);
- }
- }
-
- if (skb->len < 2)
- return len;
-
- if ((ack = le16_to_cpu(*ptr)) & 0x8000) {
- skb_pull(skb, 2);
- len += 2;
- if ((ack & 0x4000) == 0) {
- if (oth)
- ack ^= 0x2000;
- dn_ack(sk, skb, ack);
- }
- }
-
- return len;
-}
-
-
-/**
- * dn_check_idf - Check an image data field format is correct.
- * @pptr: Pointer to pointer to image data
- * @len: Pointer to length of image data
- * @max: The maximum allowed length of the data in the image data field
- * @follow_on: Check that this many bytes exist beyond the end of the image data
- *
- * Returns: 0 if ok, -1 on error
- */
-static inline int dn_check_idf(unsigned char **pptr, int *len, unsigned char max, unsigned char follow_on)
-{
- unsigned char *ptr = *pptr;
- unsigned char flen = *ptr++;
-
- (*len)--;
- if (flen > max)
- return -1;
- if ((flen + follow_on) > *len)
- return -1;
-
- *len -= flen;
- *pptr = ptr + flen;
- return 0;
-}
-
-/*
- * Table of reason codes to pass back to node which sent us a badly
- * formed message, plus text messages for the log. A zero entry in
- * the reason field means "don't reply" otherwise a disc init is sent with
- * the specified reason code.
- */
-static struct {
- unsigned short reason;
- const char *text;
-} ci_err_table[] = {
- { 0, "CI: Truncated message" },
- { NSP_REASON_ID, "CI: Destination username error" },
- { NSP_REASON_ID, "CI: Destination username type" },
- { NSP_REASON_US, "CI: Source username error" },
- { 0, "CI: Truncated at menuver" },
- { 0, "CI: Truncated before access or user data" },
- { NSP_REASON_IO, "CI: Access data format error" },
- { NSP_REASON_IO, "CI: User data format error" }
-};
-
-/*
- * This function uses a slightly different lookup method
- * to find its sockets, since it searches on object name/number
- * rather than port numbers. Various tests are done to ensure that
- * the incoming data is in the correct format before it is queued to
- * a socket.
- */
-static struct sock *dn_find_listener(struct sk_buff *skb, unsigned short *reason)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- struct nsp_conn_init_msg *msg = (struct nsp_conn_init_msg *)skb->data;
- struct sockaddr_dn dstaddr;
- struct sockaddr_dn srcaddr;
- unsigned char type = 0;
- int dstlen;
- int srclen;
- unsigned char *ptr;
- int len;
- int err = 0;
- unsigned char menuver;
-
- memset(&dstaddr, 0, sizeof(struct sockaddr_dn));
- memset(&srcaddr, 0, sizeof(struct sockaddr_dn));
-
- /*
- * 1. Decode & remove message header
- */
- cb->src_port = msg->srcaddr;
- cb->dst_port = msg->dstaddr;
- cb->services = msg->services;
- cb->info = msg->info;
- cb->segsize = le16_to_cpu(msg->segsize);
-
- if (!pskb_may_pull(skb, sizeof(*msg)))
- goto err_out;
-
- skb_pull(skb, sizeof(*msg));
-
- len = skb->len;
- ptr = skb->data;
-
- /*
- * 2. Check destination end username format
- */
- dstlen = dn_username2sockaddr(ptr, len, &dstaddr, &type);
- err++;
- if (dstlen < 0)
- goto err_out;
-
- err++;
- if (type > 1)
- goto err_out;
-
- len -= dstlen;
- ptr += dstlen;
-
- /*
- * 3. Check source end username format
- */
- srclen = dn_username2sockaddr(ptr, len, &srcaddr, &type);
- err++;
- if (srclen < 0)
- goto err_out;
-
- len -= srclen;
- ptr += srclen;
- err++;
- if (len < 1)
- goto err_out;
-
- menuver = *ptr;
- ptr++;
- len--;
-
- /*
- * 4. Check that optional data actually exists if menuver says it does
- */
- err++;
- if ((menuver & (DN_MENUVER_ACC | DN_MENUVER_USR)) && (len < 1))
- goto err_out;
-
- /*
- * 5. Check optional access data format
- */
- err++;
- if (menuver & DN_MENUVER_ACC) {
- if (dn_check_idf(&ptr, &len, 39, 1))
- goto err_out;
- if (dn_check_idf(&ptr, &len, 39, 1))
- goto err_out;
- if (dn_check_idf(&ptr, &len, 39, (menuver & DN_MENUVER_USR) ? 1 : 0))
- goto err_out;
- }
-
- /*
- * 6. Check optional user data format
- */
- err++;
- if (menuver & DN_MENUVER_USR) {
- if (dn_check_idf(&ptr, &len, 16, 0))
- goto err_out;
- }
-
- /*
- * 7. Look up socket based on destination end username
- */
- return dn_sklist_find_listener(&dstaddr);
-err_out:
- dn_log_martian(skb, ci_err_table[err].text);
- *reason = ci_err_table[err].reason;
- return NULL;
-}
-
-
-static void dn_nsp_conn_init(struct sock *sk, struct sk_buff *skb)
-{
- if (sk_acceptq_is_full(sk)) {
- kfree_skb(skb);
- return;
- }
-
- sk_acceptq_added(sk);
- skb_queue_tail(&sk->sk_receive_queue, skb);
- sk->sk_state_change(sk);
-}
-
-static void dn_nsp_conn_conf(struct sock *sk, struct sk_buff *skb)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- struct dn_scp *scp = DN_SK(sk);
- unsigned char *ptr;
-
- if (skb->len < 4)
- goto out;
-
- ptr = skb->data;
- cb->services = *ptr++;
- cb->info = *ptr++;
- cb->segsize = le16_to_cpu(*(__le16 *)ptr);
-
- if ((scp->state == DN_CI) || (scp->state == DN_CD)) {
- scp->persist = 0;
- scp->addrrem = cb->src_port;
- sk->sk_state = TCP_ESTABLISHED;
- scp->state = DN_RUN;
- scp->services_rem = cb->services;
- scp->info_rem = cb->info;
- scp->segsize_rem = cb->segsize;
-
- if ((scp->services_rem & NSP_FC_MASK) == NSP_FC_NONE)
- scp->max_window = decnet_no_fc_max_cwnd;
-
- if (skb->len > 0) {
- u16 dlen = *skb->data;
- if ((dlen <= 16) && (dlen <= skb->len)) {
- scp->conndata_in.opt_optl = cpu_to_le16(dlen);
- skb_copy_from_linear_data_offset(skb, 1,
- scp->conndata_in.opt_data, dlen);
- }
- }
- dn_nsp_send_link(sk, DN_NOCHANGE, 0);
- if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_state_change(sk);
- }
-
-out:
- kfree_skb(skb);
-}
-
-static void dn_nsp_conn_ack(struct sock *sk, struct sk_buff *skb)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- if (scp->state == DN_CI) {
- scp->state = DN_CD;
- scp->persist = 0;
- }
-
- kfree_skb(skb);
-}
-
-static void dn_nsp_disc_init(struct sock *sk, struct sk_buff *skb)
-{
- struct dn_scp *scp = DN_SK(sk);
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- unsigned short reason;
-
- if (skb->len < 2)
- goto out;
-
- reason = le16_to_cpu(*(__le16 *)skb->data);
- skb_pull(skb, 2);
-
- scp->discdata_in.opt_status = cpu_to_le16(reason);
- scp->discdata_in.opt_optl = 0;
- memset(scp->discdata_in.opt_data, 0, 16);
-
- if (skb->len > 0) {
- u16 dlen = *skb->data;
- if ((dlen <= 16) && (dlen <= skb->len)) {
- scp->discdata_in.opt_optl = cpu_to_le16(dlen);
- skb_copy_from_linear_data_offset(skb, 1, scp->discdata_in.opt_data, dlen);
- }
- }
-
- scp->addrrem = cb->src_port;
- sk->sk_state = TCP_CLOSE;
-
- switch (scp->state) {
- case DN_CI:
- case DN_CD:
- scp->state = DN_RJ;
- sk->sk_err = ECONNREFUSED;
- break;
- case DN_RUN:
- sk->sk_shutdown |= SHUTDOWN_MASK;
- scp->state = DN_DN;
- break;
- case DN_DI:
- scp->state = DN_DIC;
- break;
- }
-
- if (!sock_flag(sk, SOCK_DEAD)) {
- if (sk->sk_socket->state != SS_UNCONNECTED)
- sk->sk_socket->state = SS_DISCONNECTING;
- sk->sk_state_change(sk);
- }
-
- /*
- * It appears that its possible for remote machines to send disc
- * init messages with no port identifier if we are in the CI and
- * possibly also the CD state. Obviously we shouldn't reply with
- * a message if we don't know what the end point is.
- */
- if (scp->addrrem) {
- dn_nsp_send_disc(sk, NSP_DISCCONF, NSP_REASON_DC, GFP_ATOMIC);
- }
- scp->persist_fxn = dn_destroy_timer;
- scp->persist = dn_nsp_persist(sk);
-
-out:
- kfree_skb(skb);
-}
-
-/*
- * disc_conf messages are also called no_resources or no_link
- * messages depending upon the "reason" field.
- */
-static void dn_nsp_disc_conf(struct sock *sk, struct sk_buff *skb)
-{
- struct dn_scp *scp = DN_SK(sk);
- unsigned short reason;
-
- if (skb->len != 2)
- goto out;
-
- reason = le16_to_cpu(*(__le16 *)skb->data);
-
- sk->sk_state = TCP_CLOSE;
-
- switch (scp->state) {
- case DN_CI:
- scp->state = DN_NR;
- break;
- case DN_DR:
- if (reason == NSP_REASON_DC)
- scp->state = DN_DRC;
- if (reason == NSP_REASON_NL)
- scp->state = DN_CN;
- break;
- case DN_DI:
- scp->state = DN_DIC;
- break;
- case DN_RUN:
- sk->sk_shutdown |= SHUTDOWN_MASK;
- fallthrough;
- case DN_CC:
- scp->state = DN_CN;
- }
-
- if (!sock_flag(sk, SOCK_DEAD)) {
- if (sk->sk_socket->state != SS_UNCONNECTED)
- sk->sk_socket->state = SS_DISCONNECTING;
- sk->sk_state_change(sk);
- }
-
- scp->persist_fxn = dn_destroy_timer;
- scp->persist = dn_nsp_persist(sk);
-
-out:
- kfree_skb(skb);
-}
-
-static void dn_nsp_linkservice(struct sock *sk, struct sk_buff *skb)
-{
- struct dn_scp *scp = DN_SK(sk);
- unsigned short segnum;
- unsigned char lsflags;
- signed char fcval;
- int wake_up = 0;
- char *ptr = skb->data;
- unsigned char fctype = scp->services_rem & NSP_FC_MASK;
-
- if (skb->len != 4)
- goto out;
-
- segnum = le16_to_cpu(*(__le16 *)ptr);
- ptr += 2;
- lsflags = *(unsigned char *)ptr++;
- fcval = *ptr;
-
- /*
- * Here we ignore erroneous packets which should really
- * should cause a connection abort. It is not critical
- * for now though.
- */
- if (lsflags & 0xf8)
- goto out;
-
- if (seq_next(scp->numoth_rcv, segnum)) {
- seq_add(&scp->numoth_rcv, 1);
- switch(lsflags & 0x04) { /* FCVAL INT */
- case 0x00: /* Normal Request */
- switch(lsflags & 0x03) { /* FCVAL MOD */
- case 0x00: /* Request count */
- if (fcval < 0) {
- unsigned char p_fcval = -fcval;
- if ((scp->flowrem_dat > p_fcval) &&
- (fctype == NSP_FC_SCMC)) {
- scp->flowrem_dat -= p_fcval;
- }
- } else if (fcval > 0) {
- scp->flowrem_dat += fcval;
- wake_up = 1;
- }
- break;
- case 0x01: /* Stop outgoing data */
- scp->flowrem_sw = DN_DONTSEND;
- break;
- case 0x02: /* Ok to start again */
- scp->flowrem_sw = DN_SEND;
- dn_nsp_output(sk);
- wake_up = 1;
- }
- break;
- case 0x04: /* Interrupt Request */
- if (fcval > 0) {
- scp->flowrem_oth += fcval;
- wake_up = 1;
- }
- break;
- }
- if (wake_up && !sock_flag(sk, SOCK_DEAD))
- sk->sk_state_change(sk);
- }
-
- dn_nsp_send_oth_ack(sk);
-
-out:
- kfree_skb(skb);
-}
-
-/*
- * Copy of sock_queue_rcv_skb (from sock.h) without
- * bh_lock_sock() (its already held when this is called) which
- * also allows data and other data to be queued to a socket.
- */
-static __inline__ int dn_queue_skb(struct sock *sk, struct sk_buff *skb, int sig, struct sk_buff_head *queue)
-{
- int err;
-
- /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
- number of warnings when compiling with -W --ANK
- */
- if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
- (unsigned int)sk->sk_rcvbuf) {
- err = -ENOMEM;
- goto out;
- }
-
- err = sk_filter(sk, skb);
- if (err)
- goto out;
-
- skb_set_owner_r(skb, sk);
- skb_queue_tail(queue, skb);
-
- if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk);
-out:
- return err;
-}
-
-static void dn_nsp_otherdata(struct sock *sk, struct sk_buff *skb)
-{
- struct dn_scp *scp = DN_SK(sk);
- unsigned short segnum;
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- int queued = 0;
-
- if (skb->len < 2)
- goto out;
-
- cb->segnum = segnum = le16_to_cpu(*(__le16 *)skb->data);
- skb_pull(skb, 2);
-
- if (seq_next(scp->numoth_rcv, segnum)) {
-
- if (dn_queue_skb(sk, skb, SIGURG, &scp->other_receive_queue) == 0) {
- seq_add(&scp->numoth_rcv, 1);
- scp->other_report = 0;
- queued = 1;
- }
- }
-
- dn_nsp_send_oth_ack(sk);
-out:
- if (!queued)
- kfree_skb(skb);
-}
-
-static void dn_nsp_data(struct sock *sk, struct sk_buff *skb)
-{
- int queued = 0;
- unsigned short segnum;
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- struct dn_scp *scp = DN_SK(sk);
-
- if (skb->len < 2)
- goto out;
-
- cb->segnum = segnum = le16_to_cpu(*(__le16 *)skb->data);
- skb_pull(skb, 2);
-
- if (seq_next(scp->numdat_rcv, segnum)) {
- if (dn_queue_skb(sk, skb, SIGIO, &sk->sk_receive_queue) == 0) {
- seq_add(&scp->numdat_rcv, 1);
- queued = 1;
- }
-
- if ((scp->flowloc_sw == DN_SEND) && dn_congested(sk)) {
- scp->flowloc_sw = DN_DONTSEND;
- dn_nsp_send_link(sk, DN_DONTSEND, 0);
- }
- }
-
- dn_nsp_send_data_ack(sk);
-out:
- if (!queued)
- kfree_skb(skb);
-}
-
-/*
- * If one of our conninit messages is returned, this function
- * deals with it. It puts the socket into the NO_COMMUNICATION
- * state.
- */
-static void dn_returned_conn_init(struct sock *sk, struct sk_buff *skb)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- if (scp->state == DN_CI) {
- scp->state = DN_NC;
- sk->sk_state = TCP_CLOSE;
- if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_state_change(sk);
- }
-
- kfree_skb(skb);
-}
-
-static int dn_nsp_no_socket(struct sk_buff *skb, unsigned short reason)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- int ret = NET_RX_DROP;
-
- /* Must not reply to returned packets */
- if (cb->rt_flags & DN_RT_F_RTS)
- goto out;
-
- if ((reason != NSP_REASON_OK) && ((cb->nsp_flags & 0x0c) == 0x08)) {
- switch (cb->nsp_flags & 0x70) {
- case 0x10:
- case 0x60: /* (Retransmitted) Connect Init */
- dn_nsp_return_disc(skb, NSP_DISCINIT, reason);
- ret = NET_RX_SUCCESS;
- break;
- case 0x20: /* Connect Confirm */
- dn_nsp_return_disc(skb, NSP_DISCCONF, reason);
- ret = NET_RX_SUCCESS;
- break;
- }
- }
-
-out:
- kfree_skb(skb);
- return ret;
-}
-
-static int dn_nsp_rx_packet(struct net *net, struct sock *sk2,
- struct sk_buff *skb)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- struct sock *sk = NULL;
- unsigned char *ptr = (unsigned char *)skb->data;
- unsigned short reason = NSP_REASON_NL;
-
- if (!pskb_may_pull(skb, 2))
- goto free_out;
-
- skb_reset_transport_header(skb);
- cb->nsp_flags = *ptr++;
-
- if (decnet_debug_level & 2)
- printk(KERN_DEBUG "dn_nsp_rx: Message type 0x%02x\n", (int)cb->nsp_flags);
-
- if (cb->nsp_flags & 0x83)
- goto free_out;
-
- /*
- * Filter out conninits and useless packet types
- */
- if ((cb->nsp_flags & 0x0c) == 0x08) {
- switch (cb->nsp_flags & 0x70) {
- case 0x00: /* NOP */
- case 0x70: /* Reserved */
- case 0x50: /* Reserved, Phase II node init */
- goto free_out;
- case 0x10:
- case 0x60:
- if (unlikely(cb->rt_flags & DN_RT_F_RTS))
- goto free_out;
- sk = dn_find_listener(skb, &reason);
- goto got_it;
- }
- }
-
- if (!pskb_may_pull(skb, 3))
- goto free_out;
-
- /*
- * Grab the destination address.
- */
- cb->dst_port = *(__le16 *)ptr;
- cb->src_port = 0;
- ptr += 2;
-
- /*
- * If not a connack, grab the source address too.
- */
- if (pskb_may_pull(skb, 5)) {
- cb->src_port = *(__le16 *)ptr;
- ptr += 2;
- skb_pull(skb, 5);
- }
-
- /*
- * Returned packets...
- * Swap src & dst and look up in the normal way.
- */
- if (unlikely(cb->rt_flags & DN_RT_F_RTS)) {
- swap(cb->dst_port, cb->src_port);
- swap(cb->dst, cb->src);
- }
-
- /*
- * Find the socket to which this skb is destined.
- */
- sk = dn_find_by_skb(skb);
-got_it:
- if (sk != NULL) {
- struct dn_scp *scp = DN_SK(sk);
-
- /* Reset backoff */
- scp->nsp_rxtshift = 0;
-
- /*
- * We linearize everything except data segments here.
- */
- if (cb->nsp_flags & ~0x60) {
- if (unlikely(skb_linearize(skb)))
- goto free_out;
- }
-
- return sk_receive_skb(sk, skb, 0);
- }
-
- return dn_nsp_no_socket(skb, reason);
-
-free_out:
- kfree_skb(skb);
- return NET_RX_DROP;
-}
-
-int dn_nsp_rx(struct sk_buff *skb)
-{
- return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_IN,
- &init_net, NULL, skb, skb->dev, NULL,
- dn_nsp_rx_packet);
-}
-
-/*
- * This is the main receive routine for sockets. It is called
- * from the above when the socket is not busy, and also from
- * sock_release() when there is a backlog queued up.
- */
-int dn_nsp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
-{
- struct dn_scp *scp = DN_SK(sk);
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
-
- if (cb->rt_flags & DN_RT_F_RTS) {
- if (cb->nsp_flags == 0x18 || cb->nsp_flags == 0x68)
- dn_returned_conn_init(sk, skb);
- else
- kfree_skb(skb);
- return NET_RX_SUCCESS;
- }
-
- /*
- * Control packet.
- */
- if ((cb->nsp_flags & 0x0c) == 0x08) {
- switch (cb->nsp_flags & 0x70) {
- case 0x10:
- case 0x60:
- dn_nsp_conn_init(sk, skb);
- break;
- case 0x20:
- dn_nsp_conn_conf(sk, skb);
- break;
- case 0x30:
- dn_nsp_disc_init(sk, skb);
- break;
- case 0x40:
- dn_nsp_disc_conf(sk, skb);
- break;
- }
-
- } else if (cb->nsp_flags == 0x24) {
- /*
- * Special for connacks, 'cos they don't have
- * ack data or ack otherdata info.
- */
- dn_nsp_conn_ack(sk, skb);
- } else {
- int other = 1;
-
- /* both data and ack frames can kick a CC socket into RUN */
- if ((scp->state == DN_CC) && !sock_flag(sk, SOCK_DEAD)) {
- scp->state = DN_RUN;
- sk->sk_state = TCP_ESTABLISHED;
- sk->sk_state_change(sk);
- }
-
- if ((cb->nsp_flags & 0x1c) == 0)
- other = 0;
- if (cb->nsp_flags == 0x04)
- other = 0;
-
- /*
- * Read out ack data here, this applies equally
- * to data, other data, link service and both
- * ack data and ack otherdata.
- */
- dn_process_ack(sk, skb, other);
-
- /*
- * If we've some sort of data here then call a
- * suitable routine for dealing with it, otherwise
- * the packet is an ack and can be discarded.
- */
- if ((cb->nsp_flags & 0x0c) == 0) {
-
- if (scp->state != DN_RUN)
- goto free_out;
-
- switch (cb->nsp_flags) {
- case 0x10: /* LS */
- dn_nsp_linkservice(sk, skb);
- break;
- case 0x30: /* OD */
- dn_nsp_otherdata(sk, skb);
- break;
- default:
- dn_nsp_data(sk, skb);
- }
-
- } else { /* Ack, chuck it out here */
-free_out:
- kfree_skb(skb);
- }
- }
-
- return NET_RX_SUCCESS;
-}
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
deleted file mode 100644
index b05639bdfc8f..000000000000
--- a/net/decnet/dn_nsp_out.c
+++ /dev/null
@@ -1,696 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Network Services Protocol (Output)
- *
- * Author: Eduardo Marcelo Serrat <emserrat@geocities.com>
- *
- * Changes:
- *
- * Steve Whitehouse: Split into dn_nsp_in.c and dn_nsp_out.c from
- * original dn_nsp.c.
- * Steve Whitehouse: Updated to work with my new routing architecture.
- * Steve Whitehouse: Added changes from Eduardo Serrat's patches.
- * Steve Whitehouse: Now conninits have the "return" bit set.
- * Steve Whitehouse: Fixes to check alloc'd skbs are non NULL!
- * Moved output state machine into one function
- * Steve Whitehouse: New output state machine
- * Paul Koning: Connect Confirm message fix.
- * Eduardo Serrat: Fix to stop dn_nsp_do_disc() sending malformed packets.
- * Steve Whitehouse: dn_nsp_output() and friends needed a spring clean
- * Steve Whitehouse: Moved dn_nsp_send() in here from route.h
- */
-
-/******************************************************************************
- (c) 1995-1998 E.M. Serrat emserrat@geocities.com
-
-*******************************************************************************/
-
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/netdevice.h>
-#include <linux/inet.h>
-#include <linux/route.h>
-#include <linux/slab.h>
-#include <net/sock.h>
-#include <linux/fcntl.h>
-#include <linux/mm.h>
-#include <linux/termios.h>
-#include <linux/interrupt.h>
-#include <linux/proc_fs.h>
-#include <linux/stat.h>
-#include <linux/init.h>
-#include <linux/poll.h>
-#include <linux/if_packet.h>
-#include <linux/jiffies.h>
-#include <net/neighbour.h>
-#include <net/dst.h>
-#include <net/flow.h>
-#include <net/dn.h>
-#include <net/dn_nsp.h>
-#include <net/dn_dev.h>
-#include <net/dn_route.h>
-
-
-static int nsp_backoff[NSP_MAXRXTSHIFT + 1] = { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
-
-static void dn_nsp_send(struct sk_buff *skb)
-{
- struct sock *sk = skb->sk;
- struct dn_scp *scp = DN_SK(sk);
- struct dst_entry *dst;
- struct flowidn fld;
-
- skb_reset_transport_header(skb);
- scp->stamp = jiffies;
-
- dst = sk_dst_check(sk, 0);
- if (dst) {
-try_again:
- skb_dst_set(skb, dst);
- dst_output(&init_net, skb->sk, skb);
- return;
- }
-
- memset(&fld, 0, sizeof(fld));
- fld.flowidn_oif = sk->sk_bound_dev_if;
- fld.saddr = dn_saddr2dn(&scp->addr);
- fld.daddr = dn_saddr2dn(&scp->peer);
- dn_sk_ports_copy(&fld, scp);
- fld.flowidn_proto = DNPROTO_NSP;
- if (dn_route_output_sock(&sk->sk_dst_cache, &fld, sk, 0) == 0) {
- dst = sk_dst_get(sk);
- sk->sk_route_caps = dst->dev->features;
- goto try_again;
- }
-
- sk->sk_err = EHOSTUNREACH;
- if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_state_change(sk);
-}
-
-
-/*
- * If sk == NULL, then we assume that we are supposed to be making
- * a routing layer skb. If sk != NULL, then we are supposed to be
- * creating an skb for the NSP layer.
- *
- * The eventual aim is for each socket to have a cached header size
- * for its outgoing packets, and to set hdr from this when sk != NULL.
- */
-struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri)
-{
- struct sk_buff *skb;
- int hdr = 64;
-
- if ((skb = alloc_skb(size + hdr, pri)) == NULL)
- return NULL;
-
- skb->protocol = htons(ETH_P_DNA_RT);
- skb->pkt_type = PACKET_OUTGOING;
-
- if (sk)
- skb_set_owner_w(skb, sk);
-
- skb_reserve(skb, hdr);
-
- return skb;
-}
-
-/*
- * Calculate persist timer based upon the smoothed round
- * trip time and the variance. Backoff according to the
- * nsp_backoff[] array.
- */
-unsigned long dn_nsp_persist(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- unsigned long t = ((scp->nsp_srtt >> 2) + scp->nsp_rttvar) >> 1;
-
- t *= nsp_backoff[scp->nsp_rxtshift];
-
- if (t < HZ) t = HZ;
- if (t > (600*HZ)) t = (600*HZ);
-
- if (scp->nsp_rxtshift < NSP_MAXRXTSHIFT)
- scp->nsp_rxtshift++;
-
- /* printk(KERN_DEBUG "rxtshift %lu, t=%lu\n", scp->nsp_rxtshift, t); */
-
- return t;
-}
-
-/*
- * This is called each time we get an estimate for the rtt
- * on the link.
- */
-static void dn_nsp_rtt(struct sock *sk, long rtt)
-{
- struct dn_scp *scp = DN_SK(sk);
- long srtt = (long)scp->nsp_srtt;
- long rttvar = (long)scp->nsp_rttvar;
- long delta;
-
- /*
- * If the jiffies clock flips over in the middle of timestamp
- * gathering this value might turn out negative, so we make sure
- * that is it always positive here.
- */
- if (rtt < 0)
- rtt = -rtt;
- /*
- * Add new rtt to smoothed average
- */
- delta = ((rtt << 3) - srtt);
- srtt += (delta >> 3);
- if (srtt >= 1)
- scp->nsp_srtt = (unsigned long)srtt;
- else
- scp->nsp_srtt = 1;
-
- /*
- * Add new rtt variance to smoothed varience
- */
- delta >>= 1;
- rttvar += ((((delta>0)?(delta):(-delta)) - rttvar) >> 2);
- if (rttvar >= 1)
- scp->nsp_rttvar = (unsigned long)rttvar;
- else
- scp->nsp_rttvar = 1;
-
- /* printk(KERN_DEBUG "srtt=%lu rttvar=%lu\n", scp->nsp_srtt, scp->nsp_rttvar); */
-}
-
-/**
- * dn_nsp_clone_and_send - Send a data packet by cloning it
- * @skb: The packet to clone and transmit
- * @gfp: memory allocation flag
- *
- * Clone a queued data or other data packet and transmit it.
- *
- * Returns: The number of times the packet has been sent previously
- */
-static inline unsigned int dn_nsp_clone_and_send(struct sk_buff *skb,
- gfp_t gfp)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- struct sk_buff *skb2;
- int ret = 0;
-
- if ((skb2 = skb_clone(skb, gfp)) != NULL) {
- ret = cb->xmit_count;
- cb->xmit_count++;
- cb->stamp = jiffies;
- skb2->sk = skb->sk;
- dn_nsp_send(skb2);
- }
-
- return ret;
-}
-
-/**
- * dn_nsp_output - Try and send something from socket queues
- * @sk: The socket whose queues are to be investigated
- *
- * Try and send the packet on the end of the data and other data queues.
- * Other data gets priority over data, and if we retransmit a packet we
- * reduce the window by dividing it in two.
- *
- */
-void dn_nsp_output(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
- struct sk_buff *skb;
- unsigned int reduce_win = 0;
-
- /*
- * First we check for otherdata/linkservice messages
- */
- if ((skb = skb_peek(&scp->other_xmit_queue)) != NULL)
- reduce_win = dn_nsp_clone_and_send(skb, GFP_ATOMIC);
-
- /*
- * If we may not send any data, we don't.
- * If we are still trying to get some other data down the
- * channel, we don't try and send any data.
- */
- if (reduce_win || (scp->flowrem_sw != DN_SEND))
- goto recalc_window;
-
- if ((skb = skb_peek(&scp->data_xmit_queue)) != NULL)
- reduce_win = dn_nsp_clone_and_send(skb, GFP_ATOMIC);
-
- /*
- * If we've sent any frame more than once, we cut the
- * send window size in half. There is always a minimum
- * window size of one available.
- */
-recalc_window:
- if (reduce_win) {
- scp->snd_window >>= 1;
- if (scp->snd_window < NSP_MIN_WINDOW)
- scp->snd_window = NSP_MIN_WINDOW;
- }
-}
-
-int dn_nsp_xmit_timeout(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- dn_nsp_output(sk);
-
- if (!skb_queue_empty(&scp->data_xmit_queue) ||
- !skb_queue_empty(&scp->other_xmit_queue))
- scp->persist = dn_nsp_persist(sk);
-
- return 0;
-}
-
-static inline __le16 *dn_mk_common_header(struct dn_scp *scp, struct sk_buff *skb, unsigned char msgflag, int len)
-{
- unsigned char *ptr = skb_push(skb, len);
-
- BUG_ON(len < 5);
-
- *ptr++ = msgflag;
- *((__le16 *)ptr) = scp->addrrem;
- ptr += 2;
- *((__le16 *)ptr) = scp->addrloc;
- ptr += 2;
- return (__le16 __force *)ptr;
-}
-
-static __le16 *dn_mk_ack_header(struct sock *sk, struct sk_buff *skb, unsigned char msgflag, int hlen, int other)
-{
- struct dn_scp *scp = DN_SK(sk);
- unsigned short acknum = scp->numdat_rcv & 0x0FFF;
- unsigned short ackcrs = scp->numoth_rcv & 0x0FFF;
- __le16 *ptr;
-
- BUG_ON(hlen < 9);
-
- scp->ackxmt_dat = acknum;
- scp->ackxmt_oth = ackcrs;
- acknum |= 0x8000;
- ackcrs |= 0x8000;
-
- /* If this is an "other data/ack" message, swap acknum and ackcrs */
- if (other)
- swap(acknum, ackcrs);
-
- /* Set "cross subchannel" bit in ackcrs */
- ackcrs |= 0x2000;
-
- ptr = dn_mk_common_header(scp, skb, msgflag, hlen);
-
- *ptr++ = cpu_to_le16(acknum);
- *ptr++ = cpu_to_le16(ackcrs);
-
- return ptr;
-}
-
-static __le16 *dn_nsp_mk_data_header(struct sock *sk, struct sk_buff *skb, int oth)
-{
- struct dn_scp *scp = DN_SK(sk);
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- __le16 *ptr = dn_mk_ack_header(sk, skb, cb->nsp_flags, 11, oth);
-
- if (unlikely(oth)) {
- cb->segnum = scp->numoth;
- seq_add(&scp->numoth, 1);
- } else {
- cb->segnum = scp->numdat;
- seq_add(&scp->numdat, 1);
- }
- *(ptr++) = cpu_to_le16(cb->segnum);
-
- return ptr;
-}
-
-void dn_nsp_queue_xmit(struct sock *sk, struct sk_buff *skb,
- gfp_t gfp, int oth)
-{
- struct dn_scp *scp = DN_SK(sk);
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- unsigned long t = ((scp->nsp_srtt >> 2) + scp->nsp_rttvar) >> 1;
-
- cb->xmit_count = 0;
- dn_nsp_mk_data_header(sk, skb, oth);
-
- /*
- * Slow start: If we have been idle for more than
- * one RTT, then reset window to min size.
- */
- if (time_is_before_jiffies(scp->stamp + t))
- scp->snd_window = NSP_MIN_WINDOW;
-
- if (oth)
- skb_queue_tail(&scp->other_xmit_queue, skb);
- else
- skb_queue_tail(&scp->data_xmit_queue, skb);
-
- if (scp->flowrem_sw != DN_SEND)
- return;
-
- dn_nsp_clone_and_send(skb, gfp);
-}
-
-
-int dn_nsp_check_xmit_queue(struct sock *sk, struct sk_buff *skb, struct sk_buff_head *q, unsigned short acknum)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- struct dn_scp *scp = DN_SK(sk);
- struct sk_buff *skb2, *n, *ack = NULL;
- int wakeup = 0;
- int try_retrans = 0;
- unsigned long reftime = cb->stamp;
- unsigned long pkttime;
- unsigned short xmit_count;
- unsigned short segnum;
-
- skb_queue_walk_safe(q, skb2, n) {
- struct dn_skb_cb *cb2 = DN_SKB_CB(skb2);
-
- if (dn_before_or_equal(cb2->segnum, acknum))
- ack = skb2;
-
- /* printk(KERN_DEBUG "ack: %s %04x %04x\n", ack ? "ACK" : "SKIP", (int)cb2->segnum, (int)acknum); */
-
- if (ack == NULL)
- continue;
-
- /* printk(KERN_DEBUG "check_xmit_queue: %04x, %d\n", acknum, cb2->xmit_count); */
-
- /* Does _last_ packet acked have xmit_count > 1 */
- try_retrans = 0;
- /* Remember to wake up the sending process */
- wakeup = 1;
- /* Keep various statistics */
- pkttime = cb2->stamp;
- xmit_count = cb2->xmit_count;
- segnum = cb2->segnum;
- /* Remove and drop ack'ed packet */
- skb_unlink(ack, q);
- kfree_skb(ack);
- ack = NULL;
-
- /*
- * We don't expect to see acknowledgements for packets we
- * haven't sent yet.
- */
- WARN_ON(xmit_count == 0);
-
- /*
- * If the packet has only been sent once, we can use it
- * to calculate the RTT and also open the window a little
- * further.
- */
- if (xmit_count == 1) {
- if (dn_equal(segnum, acknum))
- dn_nsp_rtt(sk, (long)(pkttime - reftime));
-
- if (scp->snd_window < scp->max_window)
- scp->snd_window++;
- }
-
- /*
- * Packet has been sent more than once. If this is the last
- * packet to be acknowledged then we want to send the next
- * packet in the send queue again (assumes the remote host does
- * go-back-N error control).
- */
- if (xmit_count > 1)
- try_retrans = 1;
- }
-
- if (try_retrans)
- dn_nsp_output(sk);
-
- return wakeup;
-}
-
-void dn_nsp_send_data_ack(struct sock *sk)
-{
- struct sk_buff *skb = NULL;
-
- if ((skb = dn_alloc_skb(sk, 9, GFP_ATOMIC)) == NULL)
- return;
-
- skb_reserve(skb, 9);
- dn_mk_ack_header(sk, skb, 0x04, 9, 0);
- dn_nsp_send(skb);
-}
-
-void dn_nsp_send_oth_ack(struct sock *sk)
-{
- struct sk_buff *skb = NULL;
-
- if ((skb = dn_alloc_skb(sk, 9, GFP_ATOMIC)) == NULL)
- return;
-
- skb_reserve(skb, 9);
- dn_mk_ack_header(sk, skb, 0x14, 9, 1);
- dn_nsp_send(skb);
-}
-
-
-void dn_send_conn_ack (struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
- struct sk_buff *skb = NULL;
- struct nsp_conn_ack_msg *msg;
-
- if ((skb = dn_alloc_skb(sk, 3, sk->sk_allocation)) == NULL)
- return;
-
- msg = skb_put(skb, 3);
- msg->msgflg = 0x24;
- msg->dstaddr = scp->addrrem;
-
- dn_nsp_send(skb);
-}
-
-static int dn_nsp_retrans_conn_conf(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- if (scp->state == DN_CC)
- dn_send_conn_conf(sk, GFP_ATOMIC);
-
- return 0;
-}
-
-void dn_send_conn_conf(struct sock *sk, gfp_t gfp)
-{
- struct dn_scp *scp = DN_SK(sk);
- struct sk_buff *skb = NULL;
- struct nsp_conn_init_msg *msg;
- __u8 len = (__u8)le16_to_cpu(scp->conndata_out.opt_optl);
-
- if ((skb = dn_alloc_skb(sk, 50 + len, gfp)) == NULL)
- return;
-
- msg = skb_put(skb, sizeof(*msg));
- msg->msgflg = 0x28;
- msg->dstaddr = scp->addrrem;
- msg->srcaddr = scp->addrloc;
- msg->services = scp->services_loc;
- msg->info = scp->info_loc;
- msg->segsize = cpu_to_le16(scp->segsize_loc);
-
- skb_put_u8(skb, len);
-
- if (len > 0)
- skb_put_data(skb, scp->conndata_out.opt_data, len);
-
-
- dn_nsp_send(skb);
-
- scp->persist = dn_nsp_persist(sk);
- scp->persist_fxn = dn_nsp_retrans_conn_conf;
-}
-
-
-static __inline__ void dn_nsp_do_disc(struct sock *sk, unsigned char msgflg,
- unsigned short reason, gfp_t gfp,
- struct dst_entry *dst,
- int ddl, unsigned char *dd, __le16 rem, __le16 loc)
-{
- struct sk_buff *skb = NULL;
- int size = 7 + ddl + ((msgflg == NSP_DISCINIT) ? 1 : 0);
- unsigned char *msg;
-
- if ((dst == NULL) || (rem == 0)) {
- net_dbg_ratelimited("DECnet: dn_nsp_do_disc: BUG! Please report this to SteveW@ACM.org rem=%u dst=%p\n",
- le16_to_cpu(rem), dst);
- return;
- }
-
- if ((skb = dn_alloc_skb(sk, size, gfp)) == NULL)
- return;
-
- msg = skb_put(skb, size);
- *msg++ = msgflg;
- *(__le16 *)msg = rem;
- msg += 2;
- *(__le16 *)msg = loc;
- msg += 2;
- *(__le16 *)msg = cpu_to_le16(reason);
- msg += 2;
- if (msgflg == NSP_DISCINIT)
- *msg++ = ddl;
-
- if (ddl) {
- memcpy(msg, dd, ddl);
- }
-
- /*
- * This doesn't go via the dn_nsp_send() function since we need
- * to be able to send disc packets out which have no socket
- * associations.
- */
- skb_dst_set(skb, dst_clone(dst));
- dst_output(&init_net, skb->sk, skb);
-}
-
-
-void dn_nsp_send_disc(struct sock *sk, unsigned char msgflg,
- unsigned short reason, gfp_t gfp)
-{
- struct dn_scp *scp = DN_SK(sk);
- int ddl = 0;
-
- if (msgflg == NSP_DISCINIT)
- ddl = le16_to_cpu(scp->discdata_out.opt_optl);
-
- if (reason == 0)
- reason = le16_to_cpu(scp->discdata_out.opt_status);
-
- dn_nsp_do_disc(sk, msgflg, reason, gfp, __sk_dst_get(sk), ddl,
- scp->discdata_out.opt_data, scp->addrrem, scp->addrloc);
-}
-
-
-void dn_nsp_return_disc(struct sk_buff *skb, unsigned char msgflg,
- unsigned short reason)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- int ddl = 0;
- gfp_t gfp = GFP_ATOMIC;
-
- dn_nsp_do_disc(NULL, msgflg, reason, gfp, skb_dst(skb), ddl,
- NULL, cb->src_port, cb->dst_port);
-}
-
-
-void dn_nsp_send_link(struct sock *sk, unsigned char lsflags, char fcval)
-{
- struct dn_scp *scp = DN_SK(sk);
- struct sk_buff *skb;
- unsigned char *ptr;
- gfp_t gfp = GFP_ATOMIC;
-
- if ((skb = dn_alloc_skb(sk, DN_MAX_NSP_DATA_HEADER + 2, gfp)) == NULL)
- return;
-
- skb_reserve(skb, DN_MAX_NSP_DATA_HEADER);
- ptr = skb_put(skb, 2);
- DN_SKB_CB(skb)->nsp_flags = 0x10;
- *ptr++ = lsflags;
- *ptr = fcval;
-
- dn_nsp_queue_xmit(sk, skb, gfp, 1);
-
- scp->persist = dn_nsp_persist(sk);
- scp->persist_fxn = dn_nsp_xmit_timeout;
-}
-
-static int dn_nsp_retrans_conninit(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- if (scp->state == DN_CI)
- dn_nsp_send_conninit(sk, NSP_RCI);
-
- return 0;
-}
-
-void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg)
-{
- struct dn_scp *scp = DN_SK(sk);
- struct nsp_conn_init_msg *msg;
- unsigned char aux;
- unsigned char menuver;
- struct dn_skb_cb *cb;
- unsigned char type = 1;
- gfp_t allocation = (msgflg == NSP_CI) ? sk->sk_allocation : GFP_ATOMIC;
- struct sk_buff *skb = dn_alloc_skb(sk, 200, allocation);
-
- if (!skb)
- return;
-
- cb = DN_SKB_CB(skb);
- msg = skb_put(skb, sizeof(*msg));
-
- msg->msgflg = msgflg;
- msg->dstaddr = 0x0000; /* Remote Node will assign it*/
-
- msg->srcaddr = scp->addrloc;
- msg->services = scp->services_loc; /* Requested flow control */
- msg->info = scp->info_loc; /* Version Number */
- msg->segsize = cpu_to_le16(scp->segsize_loc); /* Max segment size */
-
- if (scp->peer.sdn_objnum)
- type = 0;
-
- skb_put(skb, dn_sockaddr2username(&scp->peer,
- skb_tail_pointer(skb), type));
- skb_put(skb, dn_sockaddr2username(&scp->addr,
- skb_tail_pointer(skb), 2));
-
- menuver = DN_MENUVER_ACC | DN_MENUVER_USR;
- if (scp->peer.sdn_flags & SDF_PROXY)
- menuver |= DN_MENUVER_PRX;
- if (scp->peer.sdn_flags & SDF_UICPROXY)
- menuver |= DN_MENUVER_UIC;
-
- skb_put_u8(skb, menuver); /* Menu Version */
-
- aux = scp->accessdata.acc_userl;
- skb_put_u8(skb, aux);
- if (aux > 0)
- skb_put_data(skb, scp->accessdata.acc_user, aux);
-
- aux = scp->accessdata.acc_passl;
- skb_put_u8(skb, aux);
- if (aux > 0)
- skb_put_data(skb, scp->accessdata.acc_pass, aux);
-
- aux = scp->accessdata.acc_accl;
- skb_put_u8(skb, aux);
- if (aux > 0)
- skb_put_data(skb, scp->accessdata.acc_acc, aux);
-
- aux = (__u8)le16_to_cpu(scp->conndata_out.opt_optl);
- skb_put_u8(skb, aux);
- if (aux > 0)
- skb_put_data(skb, scp->conndata_out.opt_data, aux);
-
- scp->persist = dn_nsp_persist(sk);
- scp->persist_fxn = dn_nsp_retrans_conninit;
-
- cb->rt_flags = DN_RT_F_RQR;
-
- dn_nsp_send(skb);
-}
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
deleted file mode 100644
index ac2ee1689111..000000000000
--- a/net/decnet/dn_route.c
+++ /dev/null
@@ -1,1922 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Routing Functions (Endnode and Router)
- *
- * Authors: Steve Whitehouse <SteveW@ACM.org>
- * Eduardo Marcelo Serrat <emserrat@geocities.com>
- *
- * Changes:
- * Steve Whitehouse : Fixes to allow "intra-ethernet" and
- * "return-to-sender" bits on outgoing
- * packets.
- * Steve Whitehouse : Timeouts for cached routes.
- * Steve Whitehouse : Use dst cache for input routes too.
- * Steve Whitehouse : Fixed error values in dn_send_skb.
- * Steve Whitehouse : Rework routing functions to better fit
- * DECnet routing design
- * Alexey Kuznetsov : New SMP locking
- * Steve Whitehouse : More SMP locking changes & dn_cache_dump()
- * Steve Whitehouse : Prerouting NF hook, now really is prerouting.
- * Fixed possible skb leak in rtnetlink funcs.
- * Steve Whitehouse : Dave Miller's dynamic hash table sizing and
- * Alexey Kuznetsov's finer grained locking
- * from ipv4/route.c.
- * Steve Whitehouse : Routing is now starting to look like a
- * sensible set of code now, mainly due to
- * my copying the IPv4 routing code. The
- * hooks here are modified and will continue
- * to evolve for a while.
- * Steve Whitehouse : Real SMP at last :-) Also new netfilter
- * stuff. Look out raw sockets your days
- * are numbered!
- * Steve Whitehouse : Added return-to-sender functions. Added
- * backlog congestion level return codes.
- * Steve Whitehouse : Fixed bug where routes were set up with
- * no ref count on net devices.
- * Steve Whitehouse : RCU for the route cache
- * Steve Whitehouse : Preparations for the flow cache
- * Steve Whitehouse : Prepare for nonlinear skbs
- */
-
-/******************************************************************************
- (c) 1995-1998 E.M. Serrat emserrat@geocities.com
-
-*******************************************************************************/
-
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/netdevice.h>
-#include <linux/inet.h>
-#include <linux/route.h>
-#include <linux/in_route.h>
-#include <linux/slab.h>
-#include <net/sock.h>
-#include <linux/mm.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/init.h>
-#include <linux/rtnetlink.h>
-#include <linux/string.h>
-#include <linux/netfilter_decnet.h>
-#include <linux/rcupdate.h>
-#include <linux/times.h>
-#include <linux/export.h>
-#include <asm/errno.h>
-#include <net/net_namespace.h>
-#include <net/netlink.h>
-#include <net/neighbour.h>
-#include <net/dst.h>
-#include <net/flow.h>
-#include <net/fib_rules.h>
-#include <net/dn.h>
-#include <net/dn_dev.h>
-#include <net/dn_nsp.h>
-#include <net/dn_route.h>
-#include <net/dn_neigh.h>
-#include <net/dn_fib.h>
-
-struct dn_rt_hash_bucket {
- struct dn_route __rcu *chain;
- spinlock_t lock;
-};
-
-extern struct neigh_table dn_neigh_table;
-
-
-static unsigned char dn_hiord_addr[6] = {0xAA, 0x00, 0x04, 0x00, 0x00, 0x00};
-
-static const int dn_rt_min_delay = 2 * HZ;
-static const int dn_rt_max_delay = 10 * HZ;
-static const int dn_rt_mtu_expires = 10 * 60 * HZ;
-
-static unsigned long dn_rt_deadline;
-
-static int dn_dst_gc(struct dst_ops *ops);
-static struct dst_entry *dn_dst_check(struct dst_entry *, __u32);
-static unsigned int dn_dst_default_advmss(const struct dst_entry *dst);
-static unsigned int dn_dst_mtu(const struct dst_entry *dst);
-static void dn_dst_destroy(struct dst_entry *);
-static void dn_dst_ifdown(struct dst_entry *, struct net_device *dev, int how);
-static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
-static void dn_dst_link_failure(struct sk_buff *);
-static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb , u32 mtu,
- bool confirm_neigh);
-static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb);
-static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst,
- struct sk_buff *skb,
- const void *daddr);
-static int dn_route_input(struct sk_buff *);
-static void dn_run_flush(struct timer_list *unused);
-
-static struct dn_rt_hash_bucket *dn_rt_hash_table;
-static unsigned int dn_rt_hash_mask;
-
-static struct timer_list dn_route_timer;
-static DEFINE_TIMER(dn_rt_flush_timer, dn_run_flush);
-int decnet_dst_gc_interval = 2;
-
-static struct dst_ops dn_dst_ops = {
- .family = PF_DECnet,
- .gc_thresh = 128,
- .gc = dn_dst_gc,
- .check = dn_dst_check,
- .default_advmss = dn_dst_default_advmss,
- .mtu = dn_dst_mtu,
- .cow_metrics = dst_cow_metrics_generic,
- .destroy = dn_dst_destroy,
- .ifdown = dn_dst_ifdown,
- .negative_advice = dn_dst_negative_advice,
- .link_failure = dn_dst_link_failure,
- .update_pmtu = dn_dst_update_pmtu,
- .redirect = dn_dst_redirect,
- .neigh_lookup = dn_dst_neigh_lookup,
-};
-
-static void dn_dst_destroy(struct dst_entry *dst)
-{
- struct dn_route *rt = (struct dn_route *) dst;
-
- if (rt->n)
- neigh_release(rt->n);
- dst_destroy_metrics_generic(dst);
-}
-
-static void dn_dst_ifdown(struct dst_entry *dst, struct net_device *dev, int how)
-{
- if (how) {
- struct dn_route *rt = (struct dn_route *) dst;
- struct neighbour *n = rt->n;
-
- if (n && n->dev == dev) {
- n->dev = blackhole_netdev;
- dev_hold(n->dev);
- dev_put(dev);
- }
- }
-}
-
-static __inline__ unsigned int dn_hash(__le16 src, __le16 dst)
-{
- __u16 tmp = (__u16 __force)(src ^ dst);
- tmp ^= (tmp >> 3);
- tmp ^= (tmp >> 5);
- tmp ^= (tmp >> 10);
- return dn_rt_hash_mask & (unsigned int)tmp;
-}
-
-static void dn_dst_check_expire(struct timer_list *unused)
-{
- int i;
- struct dn_route *rt;
- struct dn_route __rcu **rtp;
- unsigned long now = jiffies;
- unsigned long expire = 120 * HZ;
-
- for (i = 0; i <= dn_rt_hash_mask; i++) {
- rtp = &dn_rt_hash_table[i].chain;
-
- spin_lock(&dn_rt_hash_table[i].lock);
- while ((rt = rcu_dereference_protected(*rtp,
- lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) {
- if (atomic_read(&rt->dst.__refcnt) > 1 ||
- (now - rt->dst.lastuse) < expire) {
- rtp = &rt->dn_next;
- continue;
- }
- *rtp = rt->dn_next;
- rt->dn_next = NULL;
- dst_dev_put(&rt->dst);
- dst_release(&rt->dst);
- }
- spin_unlock(&dn_rt_hash_table[i].lock);
-
- if (jiffies != now)
- break;
- }
-
- mod_timer(&dn_route_timer, now + decnet_dst_gc_interval * HZ);
-}
-
-static int dn_dst_gc(struct dst_ops *ops)
-{
- struct dn_route *rt;
- struct dn_route __rcu **rtp;
- int i;
- unsigned long now = jiffies;
- unsigned long expire = 10 * HZ;
-
- for (i = 0; i <= dn_rt_hash_mask; i++) {
-
- spin_lock_bh(&dn_rt_hash_table[i].lock);
- rtp = &dn_rt_hash_table[i].chain;
-
- while ((rt = rcu_dereference_protected(*rtp,
- lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) {
- if (atomic_read(&rt->dst.__refcnt) > 1 ||
- (now - rt->dst.lastuse) < expire) {
- rtp = &rt->dn_next;
- continue;
- }
- *rtp = rt->dn_next;
- rt->dn_next = NULL;
- dst_dev_put(&rt->dst);
- dst_release(&rt->dst);
- break;
- }
- spin_unlock_bh(&dn_rt_hash_table[i].lock);
- }
-
- return 0;
-}
-
-/*
- * The decnet standards don't impose a particular minimum mtu, what they
- * do insist on is that the routing layer accepts a datagram of at least
- * 230 bytes long. Here we have to subtract the routing header length from
- * 230 to get the minimum acceptable mtu. If there is no neighbour, then we
- * assume the worst and use a long header size.
- *
- * We update both the mtu and the advertised mss (i.e. the segment size we
- * advertise to the other end).
- */
-static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb, u32 mtu,
- bool confirm_neigh)
-{
- struct dn_route *rt = (struct dn_route *) dst;
- struct neighbour *n = rt->n;
- u32 min_mtu = 230;
- struct dn_dev *dn;
-
- dn = n ? rcu_dereference_raw(n->dev->dn_ptr) : NULL;
-
- if (dn && dn->use_long == 0)
- min_mtu -= 6;
- else
- min_mtu -= 21;
-
- if (dst_metric(dst, RTAX_MTU) > mtu && mtu >= min_mtu) {
- if (!(dst_metric_locked(dst, RTAX_MTU))) {
- dst_metric_set(dst, RTAX_MTU, mtu);
- dst_set_expires(dst, dn_rt_mtu_expires);
- }
- if (!(dst_metric_locked(dst, RTAX_ADVMSS))) {
- u32 mss = mtu - DN_MAX_NSP_DATA_HEADER;
- u32 existing_mss = dst_metric_raw(dst, RTAX_ADVMSS);
- if (!existing_mss || existing_mss > mss)
- dst_metric_set(dst, RTAX_ADVMSS, mss);
- }
- }
-}
-
-static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb)
-{
-}
-
-/*
- * When a route has been marked obsolete. (e.g. routing cache flush)
- */
-static struct dst_entry *dn_dst_check(struct dst_entry *dst, __u32 cookie)
-{
- return NULL;
-}
-
-static struct dst_entry *dn_dst_negative_advice(struct dst_entry *dst)
-{
- dst_release(dst);
- return NULL;
-}
-
-static void dn_dst_link_failure(struct sk_buff *skb)
-{
-}
-
-static inline int compare_keys(struct flowidn *fl1, struct flowidn *fl2)
-{
- return ((fl1->daddr ^ fl2->daddr) |
- (fl1->saddr ^ fl2->saddr) |
- (fl1->flowidn_mark ^ fl2->flowidn_mark) |
- (fl1->flowidn_scope ^ fl2->flowidn_scope) |
- (fl1->flowidn_oif ^ fl2->flowidn_oif) |
- (fl1->flowidn_iif ^ fl2->flowidn_iif)) == 0;
-}
-
-static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_route **rp)
-{
- struct dn_route *rth;
- struct dn_route __rcu **rthp;
- unsigned long now = jiffies;
-
- rthp = &dn_rt_hash_table[hash].chain;
-
- spin_lock_bh(&dn_rt_hash_table[hash].lock);
- while ((rth = rcu_dereference_protected(*rthp,
- lockdep_is_held(&dn_rt_hash_table[hash].lock))) != NULL) {
- if (compare_keys(&rth->fld, &rt->fld)) {
- /* Put it first */
- *rthp = rth->dn_next;
- rcu_assign_pointer(rth->dn_next,
- dn_rt_hash_table[hash].chain);
- rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth);
-
- dst_hold_and_use(&rth->dst, now);
- spin_unlock_bh(&dn_rt_hash_table[hash].lock);
-
- dst_release_immediate(&rt->dst);
- *rp = rth;
- return 0;
- }
- rthp = &rth->dn_next;
- }
-
- rcu_assign_pointer(rt->dn_next, dn_rt_hash_table[hash].chain);
- rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt);
-
- dst_hold_and_use(&rt->dst, now);
- spin_unlock_bh(&dn_rt_hash_table[hash].lock);
- *rp = rt;
- return 0;
-}
-
-static void dn_run_flush(struct timer_list *unused)
-{
- int i;
- struct dn_route *rt, *next;
-
- for (i = 0; i < dn_rt_hash_mask; i++) {
- spin_lock_bh(&dn_rt_hash_table[i].lock);
-
- rt = xchg((struct dn_route **)&dn_rt_hash_table[i].chain, NULL);
- if (!rt)
- goto nothing_to_declare;
-
- for (; rt; rt = next) {
- next = rcu_dereference_raw(rt->dn_next);
- RCU_INIT_POINTER(rt->dn_next, NULL);
- dst_dev_put(&rt->dst);
- dst_release(&rt->dst);
- }
-
-nothing_to_declare:
- spin_unlock_bh(&dn_rt_hash_table[i].lock);
- }
-}
-
-static DEFINE_SPINLOCK(dn_rt_flush_lock);
-
-void dn_rt_cache_flush(int delay)
-{
- unsigned long now = jiffies;
- int user_mode = !in_interrupt();
-
- if (delay < 0)
- delay = dn_rt_min_delay;
-
- spin_lock_bh(&dn_rt_flush_lock);
-
- if (del_timer(&dn_rt_flush_timer) && delay > 0 && dn_rt_deadline) {
- long tmo = (long)(dn_rt_deadline - now);
-
- if (user_mode && tmo < dn_rt_max_delay - dn_rt_min_delay)
- tmo = 0;
-
- if (delay > tmo)
- delay = tmo;
- }
-
- if (delay <= 0) {
- spin_unlock_bh(&dn_rt_flush_lock);
- dn_run_flush(NULL);
- return;
- }
-
- if (dn_rt_deadline == 0)
- dn_rt_deadline = now + dn_rt_max_delay;
-
- dn_rt_flush_timer.expires = now + delay;
- add_timer(&dn_rt_flush_timer);
- spin_unlock_bh(&dn_rt_flush_lock);
-}
-
-/**
- * dn_return_short - Return a short packet to its sender
- * @skb: The packet to return
- *
- */
-static int dn_return_short(struct sk_buff *skb)
-{
- struct dn_skb_cb *cb;
- unsigned char *ptr;
- __le16 *src;
- __le16 *dst;
-
- /* Add back headers */
- skb_push(skb, skb->data - skb_network_header(skb));
-
- skb = skb_unshare(skb, GFP_ATOMIC);
- if (!skb)
- return NET_RX_DROP;
-
- cb = DN_SKB_CB(skb);
- /* Skip packet length and point to flags */
- ptr = skb->data + 2;
- *ptr++ = (cb->rt_flags & ~DN_RT_F_RQR) | DN_RT_F_RTS;
-
- dst = (__le16 *)ptr;
- ptr += 2;
- src = (__le16 *)ptr;
- ptr += 2;
- *ptr = 0; /* Zero hop count */
-
- swap(*src, *dst);
-
- skb->pkt_type = PACKET_OUTGOING;
- dn_rt_finish_output(skb, NULL, NULL);
- return NET_RX_SUCCESS;
-}
-
-/**
- * dn_return_long - Return a long packet to its sender
- * @skb: The long format packet to return
- *
- */
-static int dn_return_long(struct sk_buff *skb)
-{
- struct dn_skb_cb *cb;
- unsigned char *ptr;
- unsigned char *src_addr, *dst_addr;
- unsigned char tmp[ETH_ALEN];
-
- /* Add back all headers */
- skb_push(skb, skb->data - skb_network_header(skb));
-
- skb = skb_unshare(skb, GFP_ATOMIC);
- if (!skb)
- return NET_RX_DROP;
-
- cb = DN_SKB_CB(skb);
- /* Ignore packet length and point to flags */
- ptr = skb->data + 2;
-
- /* Skip padding */
- if (*ptr & DN_RT_F_PF) {
- char padlen = (*ptr & ~DN_RT_F_PF);
- ptr += padlen;
- }
-
- *ptr++ = (cb->rt_flags & ~DN_RT_F_RQR) | DN_RT_F_RTS;
- ptr += 2;
- dst_addr = ptr;
- ptr += 8;
- src_addr = ptr;
- ptr += 6;
- *ptr = 0; /* Zero hop count */
-
- /* Swap source and destination */
- memcpy(tmp, src_addr, ETH_ALEN);
- memcpy(src_addr, dst_addr, ETH_ALEN);
- memcpy(dst_addr, tmp, ETH_ALEN);
-
- skb->pkt_type = PACKET_OUTGOING;
- dn_rt_finish_output(skb, dst_addr, src_addr);
- return NET_RX_SUCCESS;
-}
-
-/**
- * dn_route_rx_packet - Try and find a route for an incoming packet
- * @net: The applicable net namespace
- * @sk: Socket packet transmitted on
- * @skb: The packet to find a route for
- *
- * Returns: result of input function if route is found, error code otherwise
- */
-static int dn_route_rx_packet(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- struct dn_skb_cb *cb;
- int err;
-
- err = dn_route_input(skb);
- if (err == 0)
- return dst_input(skb);
-
- cb = DN_SKB_CB(skb);
- if (decnet_debug_level & 4) {
- char *devname = skb->dev ? skb->dev->name : "???";
-
- printk(KERN_DEBUG
- "DECnet: dn_route_rx_packet: rt_flags=0x%02x dev=%s len=%d src=0x%04hx dst=0x%04hx err=%d type=%d\n",
- (int)cb->rt_flags, devname, skb->len,
- le16_to_cpu(cb->src), le16_to_cpu(cb->dst),
- err, skb->pkt_type);
- }
-
- if ((skb->pkt_type == PACKET_HOST) && (cb->rt_flags & DN_RT_F_RQR)) {
- switch (cb->rt_flags & DN_RT_PKT_MSK) {
- case DN_RT_PKT_SHORT:
- return dn_return_short(skb);
- case DN_RT_PKT_LONG:
- return dn_return_long(skb);
- }
- }
-
- kfree_skb(skb);
- return NET_RX_DROP;
-}
-
-static int dn_route_rx_long(struct sk_buff *skb)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- unsigned char *ptr = skb->data;
-
- if (!pskb_may_pull(skb, 21)) /* 20 for long header, 1 for shortest nsp */
- goto drop_it;
-
- skb_pull(skb, 20);
- skb_reset_transport_header(skb);
-
- /* Destination info */
- ptr += 2;
- cb->dst = dn_eth2dn(ptr);
- if (memcmp(ptr, dn_hiord_addr, 4) != 0)
- goto drop_it;
- ptr += 6;
-
-
- /* Source info */
- ptr += 2;
- cb->src = dn_eth2dn(ptr);
- if (memcmp(ptr, dn_hiord_addr, 4) != 0)
- goto drop_it;
- ptr += 6;
- /* Other junk */
- ptr++;
- cb->hops = *ptr++; /* Visit Count */
-
- return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING,
- &init_net, NULL, skb, skb->dev, NULL,
- dn_route_rx_packet);
-
-drop_it:
- kfree_skb(skb);
- return NET_RX_DROP;
-}
-
-
-
-static int dn_route_rx_short(struct sk_buff *skb)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- unsigned char *ptr = skb->data;
-
- if (!pskb_may_pull(skb, 6)) /* 5 for short header + 1 for shortest nsp */
- goto drop_it;
-
- skb_pull(skb, 5);
- skb_reset_transport_header(skb);
-
- cb->dst = *(__le16 *)ptr;
- ptr += 2;
- cb->src = *(__le16 *)ptr;
- ptr += 2;
- cb->hops = *ptr & 0x3f;
-
- return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING,
- &init_net, NULL, skb, skb->dev, NULL,
- dn_route_rx_packet);
-
-drop_it:
- kfree_skb(skb);
- return NET_RX_DROP;
-}
-
-static int dn_route_discard(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- /*
- * I know we drop the packet here, but that's considered success in
- * this case
- */
- kfree_skb(skb);
- return NET_RX_SUCCESS;
-}
-
-static int dn_route_ptp_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- dn_dev_hello(skb);
- dn_neigh_pointopoint_hello(skb);
- return NET_RX_SUCCESS;
-}
-
-int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
-{
- struct dn_skb_cb *cb;
- unsigned char flags = 0;
- __u16 len = le16_to_cpu(*(__le16 *)skb->data);
- struct dn_dev *dn = rcu_dereference(dev->dn_ptr);
- unsigned char padlen = 0;
-
- if (!net_eq(dev_net(dev), &init_net))
- goto dump_it;
-
- if (dn == NULL)
- goto dump_it;
-
- skb = skb_share_check(skb, GFP_ATOMIC);
- if (!skb)
- goto out;
-
- if (!pskb_may_pull(skb, 3))
- goto dump_it;
-
- skb_pull(skb, 2);
-
- if (len > skb->len)
- goto dump_it;
-
- skb_trim(skb, len);
-
- flags = *skb->data;
-
- cb = DN_SKB_CB(skb);
- cb->stamp = jiffies;
- cb->iif = dev->ifindex;
-
- /*
- * If we have padding, remove it.
- */
- if (flags & DN_RT_F_PF) {
- padlen = flags & ~DN_RT_F_PF;
- if (!pskb_may_pull(skb, padlen + 1))
- goto dump_it;
- skb_pull(skb, padlen);
- flags = *skb->data;
- }
-
- skb_reset_network_header(skb);
-
- /*
- * Weed out future version DECnet
- */
- if (flags & DN_RT_F_VER)
- goto dump_it;
-
- cb->rt_flags = flags;
-
- if (decnet_debug_level & 1)
- printk(KERN_DEBUG
- "dn_route_rcv: got 0x%02x from %s [%d %d %d]\n",
- (int)flags, dev->name, len, skb->len,
- padlen);
-
- if (flags & DN_RT_PKT_CNTL) {
- if (unlikely(skb_linearize(skb)))
- goto dump_it;
-
- switch (flags & DN_RT_CNTL_MSK) {
- case DN_RT_PKT_INIT:
- dn_dev_init_pkt(skb);
- break;
- case DN_RT_PKT_VERI:
- dn_dev_veri_pkt(skb);
- break;
- }
-
- if (dn->parms.state != DN_DEV_S_RU)
- goto dump_it;
-
- switch (flags & DN_RT_CNTL_MSK) {
- case DN_RT_PKT_HELO:
- return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
- &init_net, NULL, skb, skb->dev, NULL,
- dn_route_ptp_hello);
-
- case DN_RT_PKT_L1RT:
- case DN_RT_PKT_L2RT:
- return NF_HOOK(NFPROTO_DECNET, NF_DN_ROUTE,
- &init_net, NULL, skb, skb->dev, NULL,
- dn_route_discard);
- case DN_RT_PKT_ERTH:
- return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
- &init_net, NULL, skb, skb->dev, NULL,
- dn_neigh_router_hello);
-
- case DN_RT_PKT_EEDH:
- return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
- &init_net, NULL, skb, skb->dev, NULL,
- dn_neigh_endnode_hello);
- }
- } else {
- if (dn->parms.state != DN_DEV_S_RU)
- goto dump_it;
-
- skb_pull(skb, 1); /* Pull flags */
-
- switch (flags & DN_RT_PKT_MSK) {
- case DN_RT_PKT_LONG:
- return dn_route_rx_long(skb);
- case DN_RT_PKT_SHORT:
- return dn_route_rx_short(skb);
- }
- }
-
-dump_it:
- kfree_skb(skb);
-out:
- return NET_RX_DROP;
-}
-
-static int dn_output(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- struct dst_entry *dst = skb_dst(skb);
- struct dn_route *rt = (struct dn_route *)dst;
- struct net_device *dev = dst->dev;
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
-
- int err = -EINVAL;
-
- if (rt->n == NULL)
- goto error;
-
- skb->dev = dev;
-
- cb->src = rt->rt_saddr;
- cb->dst = rt->rt_daddr;
-
- /*
- * Always set the Intra-Ethernet bit on all outgoing packets
- * originated on this node. Only valid flag from upper layers
- * is return-to-sender-requested. Set hop count to 0 too.
- */
- cb->rt_flags &= ~DN_RT_F_RQR;
- cb->rt_flags |= DN_RT_F_IE;
- cb->hops = 0;
-
- return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_OUT,
- &init_net, sk, skb, NULL, dev,
- dn_to_neigh_output);
-
-error:
- net_dbg_ratelimited("dn_output: This should not happen\n");
-
- kfree_skb(skb);
-
- return err;
-}
-
-static int dn_forward(struct sk_buff *skb)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- struct dst_entry *dst = skb_dst(skb);
- struct dn_dev *dn_db = rcu_dereference(dst->dev->dn_ptr);
- struct dn_route *rt;
- int header_len;
- struct net_device *dev = skb->dev;
-
- if (skb->pkt_type != PACKET_HOST)
- goto drop;
-
- /* Ensure that we have enough space for headers */
- rt = (struct dn_route *)skb_dst(skb);
- header_len = dn_db->use_long ? 21 : 6;
- if (skb_cow(skb, LL_RESERVED_SPACE(rt->dst.dev)+header_len))
- goto drop;
-
- /*
- * Hop count exceeded.
- */
- if (++cb->hops > 30)
- goto drop;
-
- skb->dev = rt->dst.dev;
-
- /*
- * If packet goes out same interface it came in on, then set
- * the Intra-Ethernet bit. This has no effect for short
- * packets, so we don't need to test for them here.
- */
- cb->rt_flags &= ~DN_RT_F_IE;
- if (rt->rt_flags & RTCF_DOREDIRECT)
- cb->rt_flags |= DN_RT_F_IE;
-
- return NF_HOOK(NFPROTO_DECNET, NF_DN_FORWARD,
- &init_net, NULL, skb, dev, skb->dev,
- dn_to_neigh_output);
-
-drop:
- kfree_skb(skb);
- return NET_RX_DROP;
-}
-
-/*
- * Used to catch bugs. This should never normally get
- * called.
- */
-static int dn_rt_bug_out(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
-
- net_dbg_ratelimited("dn_rt_bug: skb from:%04x to:%04x\n",
- le16_to_cpu(cb->src), le16_to_cpu(cb->dst));
-
- kfree_skb(skb);
-
- return NET_RX_DROP;
-}
-
-static int dn_rt_bug(struct sk_buff *skb)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
-
- net_dbg_ratelimited("dn_rt_bug: skb from:%04x to:%04x\n",
- le16_to_cpu(cb->src), le16_to_cpu(cb->dst));
-
- kfree_skb(skb);
-
- return NET_RX_DROP;
-}
-
-static unsigned int dn_dst_default_advmss(const struct dst_entry *dst)
-{
- return dn_mss_from_pmtu(dst->dev, dst_mtu(dst));
-}
-
-static unsigned int dn_dst_mtu(const struct dst_entry *dst)
-{
- unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
-
- return mtu ? : dst->dev->mtu;
-}
-
-static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst,
- struct sk_buff *skb,
- const void *daddr)
-{
- return __neigh_lookup_errno(&dn_neigh_table, daddr, dst->dev);
-}
-
-static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
-{
- struct dn_fib_info *fi = res->fi;
- struct net_device *dev = rt->dst.dev;
- unsigned int mss_metric;
- struct neighbour *n;
-
- if (fi) {
- if (DN_FIB_RES_GW(*res) &&
- DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
- rt->rt_gateway = DN_FIB_RES_GW(*res);
- dst_init_metrics(&rt->dst, fi->fib_metrics, true);
- }
- rt->rt_type = res->type;
-
- if (dev != NULL && rt->n == NULL) {
- n = __neigh_lookup_errno(&dn_neigh_table, &rt->rt_gateway, dev);
- if (IS_ERR(n))
- return PTR_ERR(n);
- rt->n = n;
- }
-
- if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu)
- dst_metric_set(&rt->dst, RTAX_MTU, rt->dst.dev->mtu);
- mss_metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS);
- if (mss_metric) {
- unsigned int mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst));
- if (mss_metric > mss)
- dst_metric_set(&rt->dst, RTAX_ADVMSS, mss);
- }
- return 0;
-}
-
-static inline int dn_match_addr(__le16 addr1, __le16 addr2)
-{
- __u16 tmp = le16_to_cpu(addr1) ^ le16_to_cpu(addr2);
- int match = 16;
- while (tmp) {
- tmp >>= 1;
- match--;
- }
- return match;
-}
-
-static __le16 dnet_select_source(const struct net_device *dev, __le16 daddr, int scope)
-{
- __le16 saddr = 0;
- struct dn_dev *dn_db;
- struct dn_ifaddr *ifa;
- int best_match = 0;
- int ret;
-
- rcu_read_lock();
- dn_db = rcu_dereference(dev->dn_ptr);
- for (ifa = rcu_dereference(dn_db->ifa_list);
- ifa != NULL;
- ifa = rcu_dereference(ifa->ifa_next)) {
- if (ifa->ifa_scope > scope)
- continue;
- if (!daddr) {
- saddr = ifa->ifa_local;
- break;
- }
- ret = dn_match_addr(daddr, ifa->ifa_local);
- if (ret > best_match)
- saddr = ifa->ifa_local;
- if (best_match == 0)
- saddr = ifa->ifa_local;
- }
- rcu_read_unlock();
-
- return saddr;
-}
-
-static inline __le16 __dn_fib_res_prefsrc(struct dn_fib_res *res)
-{
- return dnet_select_source(DN_FIB_RES_DEV(*res), DN_FIB_RES_GW(*res), res->scope);
-}
-
-static inline __le16 dn_fib_rules_map_destination(__le16 daddr, struct dn_fib_res *res)
-{
- __le16 mask = dnet_make_mask(res->prefixlen);
- return (daddr&~mask)|res->fi->fib_nh->nh_gw;
-}
-
-static int dn_route_output_slow(struct dst_entry **pprt, const struct flowidn *oldflp, int try_hard)
-{
- struct flowidn fld = {
- .daddr = oldflp->daddr,
- .saddr = oldflp->saddr,
- .flowidn_scope = RT_SCOPE_UNIVERSE,
- .flowidn_mark = oldflp->flowidn_mark,
- .flowidn_iif = LOOPBACK_IFINDEX,
- .flowidn_oif = oldflp->flowidn_oif,
- };
- struct dn_route *rt = NULL;
- struct net_device *dev_out = NULL, *dev;
- struct neighbour *neigh = NULL;
- unsigned int hash;
- unsigned int flags = 0;
- struct dn_fib_res res = { .fi = NULL, .type = RTN_UNICAST };
- int err;
- int free_res = 0;
- __le16 gateway = 0;
-
- if (decnet_debug_level & 16)
- printk(KERN_DEBUG
- "dn_route_output_slow: dst=%04x src=%04x mark=%d"
- " iif=%d oif=%d\n", le16_to_cpu(oldflp->daddr),
- le16_to_cpu(oldflp->saddr),
- oldflp->flowidn_mark, LOOPBACK_IFINDEX,
- oldflp->flowidn_oif);
-
- /* If we have an output interface, verify its a DECnet device */
- if (oldflp->flowidn_oif) {
- dev_out = dev_get_by_index(&init_net, oldflp->flowidn_oif);
- err = -ENODEV;
- if (dev_out && dev_out->dn_ptr == NULL) {
- dev_put(dev_out);
- dev_out = NULL;
- }
- if (dev_out == NULL)
- goto out;
- }
-
- /* If we have a source address, verify that its a local address */
- if (oldflp->saddr) {
- err = -EADDRNOTAVAIL;
-
- if (dev_out) {
- if (dn_dev_islocal(dev_out, oldflp->saddr))
- goto source_ok;
- dev_put(dev_out);
- goto out;
- }
- rcu_read_lock();
- for_each_netdev_rcu(&init_net, dev) {
- if (!dev->dn_ptr)
- continue;
- if (!dn_dev_islocal(dev, oldflp->saddr))
- continue;
- if ((dev->flags & IFF_LOOPBACK) &&
- oldflp->daddr &&
- !dn_dev_islocal(dev, oldflp->daddr))
- continue;
-
- dev_out = dev;
- break;
- }
- rcu_read_unlock();
- if (dev_out == NULL)
- goto out;
- dev_hold(dev_out);
-source_ok:
- ;
- }
-
- /* No destination? Assume its local */
- if (!fld.daddr) {
- fld.daddr = fld.saddr;
-
- dev_put(dev_out);
- err = -EINVAL;
- dev_out = init_net.loopback_dev;
- if (!dev_out->dn_ptr)
- goto out;
- err = -EADDRNOTAVAIL;
- dev_hold(dev_out);
- if (!fld.daddr) {
- fld.daddr =
- fld.saddr = dnet_select_source(dev_out, 0,
- RT_SCOPE_HOST);
- if (!fld.daddr)
- goto done;
- }
- fld.flowidn_oif = LOOPBACK_IFINDEX;
- res.type = RTN_LOCAL;
- goto make_route;
- }
-
- if (decnet_debug_level & 16)
- printk(KERN_DEBUG
- "dn_route_output_slow: initial checks complete."
- " dst=%04x src=%04x oif=%d try_hard=%d\n",
- le16_to_cpu(fld.daddr), le16_to_cpu(fld.saddr),
- fld.flowidn_oif, try_hard);
-
- /*
- * N.B. If the kernel is compiled without router support then
- * dn_fib_lookup() will evaluate to non-zero so this if () block
- * will always be executed.
- */
- err = -ESRCH;
- if (try_hard || (err = dn_fib_lookup(&fld, &res)) != 0) {
- struct dn_dev *dn_db;
- if (err != -ESRCH)
- goto out;
- /*
- * Here the fallback is basically the standard algorithm for
- * routing in endnodes which is described in the DECnet routing
- * docs
- *
- * If we are not trying hard, look in neighbour cache.
- * The result is tested to ensure that if a specific output
- * device/source address was requested, then we honour that
- * here
- */
- if (!try_hard) {
- neigh = neigh_lookup_nodev(&dn_neigh_table, &init_net, &fld.daddr);
- if (neigh) {
- if ((oldflp->flowidn_oif &&
- (neigh->dev->ifindex != oldflp->flowidn_oif)) ||
- (oldflp->saddr &&
- (!dn_dev_islocal(neigh->dev,
- oldflp->saddr)))) {
- neigh_release(neigh);
- neigh = NULL;
- } else {
- dev_put(dev_out);
- if (dn_dev_islocal(neigh->dev, fld.daddr)) {
- dev_out = init_net.loopback_dev;
- res.type = RTN_LOCAL;
- } else {
- dev_out = neigh->dev;
- }
- dev_hold(dev_out);
- goto select_source;
- }
- }
- }
-
- /* Not there? Perhaps its a local address */
- if (dev_out == NULL)
- dev_out = dn_dev_get_default();
- err = -ENODEV;
- if (dev_out == NULL)
- goto out;
- dn_db = rcu_dereference_raw(dev_out->dn_ptr);
- if (!dn_db)
- goto e_inval;
- /* Possible improvement - check all devices for local addr */
- if (dn_dev_islocal(dev_out, fld.daddr)) {
- dev_put(dev_out);
- dev_out = init_net.loopback_dev;
- dev_hold(dev_out);
- res.type = RTN_LOCAL;
- goto select_source;
- }
- /* Not local either.... try sending it to the default router */
- neigh = neigh_clone(dn_db->router);
- BUG_ON(neigh && neigh->dev != dev_out);
-
- /* Ok then, we assume its directly connected and move on */
-select_source:
- if (neigh)
- gateway = container_of(neigh, struct dn_neigh, n)->addr;
- if (gateway == 0)
- gateway = fld.daddr;
- if (fld.saddr == 0) {
- fld.saddr = dnet_select_source(dev_out, gateway,
- res.type == RTN_LOCAL ?
- RT_SCOPE_HOST :
- RT_SCOPE_LINK);
- if (fld.saddr == 0 && res.type != RTN_LOCAL)
- goto e_addr;
- }
- fld.flowidn_oif = dev_out->ifindex;
- goto make_route;
- }
- free_res = 1;
-
- if (res.type == RTN_NAT)
- goto e_inval;
-
- if (res.type == RTN_LOCAL) {
- if (!fld.saddr)
- fld.saddr = fld.daddr;
- dev_put(dev_out);
- dev_out = init_net.loopback_dev;
- dev_hold(dev_out);
- if (!dev_out->dn_ptr)
- goto e_inval;
- fld.flowidn_oif = dev_out->ifindex;
- if (res.fi)
- dn_fib_info_put(res.fi);
- res.fi = NULL;
- goto make_route;
- }
-
- if (res.fi->fib_nhs > 1 && fld.flowidn_oif == 0)
- dn_fib_select_multipath(&fld, &res);
-
- /*
- * We could add some logic to deal with default routes here and
- * get rid of some of the special casing above.
- */
-
- if (!fld.saddr)
- fld.saddr = DN_FIB_RES_PREFSRC(res);
-
- dev_put(dev_out);
- dev_out = DN_FIB_RES_DEV(res);
- dev_hold(dev_out);
- fld.flowidn_oif = dev_out->ifindex;
- gateway = DN_FIB_RES_GW(res);
-
-make_route:
- if (dev_out->flags & IFF_LOOPBACK)
- flags |= RTCF_LOCAL;
-
- rt = dst_alloc(&dn_dst_ops, dev_out, 0, DST_OBSOLETE_NONE, 0);
- if (rt == NULL)
- goto e_nobufs;
-
- rt->dn_next = NULL;
- memset(&rt->fld, 0, sizeof(rt->fld));
- rt->fld.saddr = oldflp->saddr;
- rt->fld.daddr = oldflp->daddr;
- rt->fld.flowidn_oif = oldflp->flowidn_oif;
- rt->fld.flowidn_iif = 0;
- rt->fld.flowidn_mark = oldflp->flowidn_mark;
-
- rt->rt_saddr = fld.saddr;
- rt->rt_daddr = fld.daddr;
- rt->rt_gateway = gateway ? gateway : fld.daddr;
- rt->rt_local_src = fld.saddr;
-
- rt->rt_dst_map = fld.daddr;
- rt->rt_src_map = fld.saddr;
-
- rt->n = neigh;
- neigh = NULL;
-
- rt->dst.lastuse = jiffies;
- rt->dst.output = dn_output;
- rt->dst.input = dn_rt_bug;
- rt->rt_flags = flags;
- if (flags & RTCF_LOCAL)
- rt->dst.input = dn_nsp_rx;
-
- err = dn_rt_set_next_hop(rt, &res);
- if (err)
- goto e_neighbour;
-
- hash = dn_hash(rt->fld.saddr, rt->fld.daddr);
- /* dn_insert_route() increments dst->__refcnt */
- dn_insert_route(rt, hash, (struct dn_route **)pprt);
-
-done:
- if (neigh)
- neigh_release(neigh);
- if (free_res)
- dn_fib_res_put(&res);
- dev_put(dev_out);
-out:
- return err;
-
-e_addr:
- err = -EADDRNOTAVAIL;
- goto done;
-e_inval:
- err = -EINVAL;
- goto done;
-e_nobufs:
- err = -ENOBUFS;
- goto done;
-e_neighbour:
- dst_release_immediate(&rt->dst);
- goto e_nobufs;
-}
-
-
-/*
- * N.B. The flags may be moved into the flowi at some future stage.
- */
-static int __dn_route_output_key(struct dst_entry **pprt, const struct flowidn *flp, int flags)
-{
- unsigned int hash = dn_hash(flp->saddr, flp->daddr);
- struct dn_route *rt = NULL;
-
- if (!(flags & MSG_TRYHARD)) {
- rcu_read_lock_bh();
- for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt;
- rt = rcu_dereference_bh(rt->dn_next)) {
- if ((flp->daddr == rt->fld.daddr) &&
- (flp->saddr == rt->fld.saddr) &&
- (flp->flowidn_mark == rt->fld.flowidn_mark) &&
- dn_is_output_route(rt) &&
- (rt->fld.flowidn_oif == flp->flowidn_oif)) {
- dst_hold_and_use(&rt->dst, jiffies);
- rcu_read_unlock_bh();
- *pprt = &rt->dst;
- return 0;
- }
- }
- rcu_read_unlock_bh();
- }
-
- return dn_route_output_slow(pprt, flp, flags);
-}
-
-static int dn_route_output_key(struct dst_entry **pprt, struct flowidn *flp, int flags)
-{
- int err;
-
- err = __dn_route_output_key(pprt, flp, flags);
- if (err == 0 && flp->flowidn_proto) {
- *pprt = xfrm_lookup(&init_net, *pprt,
- flowidn_to_flowi(flp), NULL, 0);
- if (IS_ERR(*pprt)) {
- err = PTR_ERR(*pprt);
- *pprt = NULL;
- }
- }
- return err;
-}
-
-int dn_route_output_sock(struct dst_entry __rcu **pprt, struct flowidn *fl, struct sock *sk, int flags)
-{
- int err;
-
- err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD);
- if (err == 0 && fl->flowidn_proto) {
- *pprt = xfrm_lookup(&init_net, *pprt,
- flowidn_to_flowi(fl), sk, 0);
- if (IS_ERR(*pprt)) {
- err = PTR_ERR(*pprt);
- *pprt = NULL;
- }
- }
- return err;
-}
-
-static int dn_route_input_slow(struct sk_buff *skb)
-{
- struct dn_route *rt = NULL;
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- struct net_device *in_dev = skb->dev;
- struct net_device *out_dev = NULL;
- struct dn_dev *dn_db;
- struct neighbour *neigh = NULL;
- unsigned int hash;
- int flags = 0;
- __le16 gateway = 0;
- __le16 local_src = 0;
- struct flowidn fld = {
- .daddr = cb->dst,
- .saddr = cb->src,
- .flowidn_scope = RT_SCOPE_UNIVERSE,
- .flowidn_mark = skb->mark,
- .flowidn_iif = skb->dev->ifindex,
- };
- struct dn_fib_res res = { .fi = NULL, .type = RTN_UNREACHABLE };
- int err = -EINVAL;
- int free_res = 0;
-
- dev_hold(in_dev);
-
- dn_db = rcu_dereference(in_dev->dn_ptr);
- if (!dn_db)
- goto out;
-
- /* Zero source addresses are not allowed */
- if (fld.saddr == 0)
- goto out;
-
- /*
- * In this case we've just received a packet from a source
- * outside ourselves pretending to come from us. We don't
- * allow it any further to prevent routing loops, spoofing and
- * other nasties. Loopback packets already have the dst attached
- * so this only affects packets which have originated elsewhere.
- */
- err = -ENOTUNIQ;
- if (dn_dev_islocal(in_dev, cb->src))
- goto out;
-
- err = dn_fib_lookup(&fld, &res);
- if (err) {
- if (err != -ESRCH)
- goto out;
- /*
- * Is the destination us ?
- */
- if (!dn_dev_islocal(in_dev, cb->dst))
- goto e_inval;
-
- res.type = RTN_LOCAL;
- } else {
- __le16 src_map = fld.saddr;
- free_res = 1;
-
- out_dev = DN_FIB_RES_DEV(res);
- if (out_dev == NULL) {
- net_crit_ratelimited("Bug in dn_route_input_slow() No output device\n");
- goto e_inval;
- }
- dev_hold(out_dev);
-
- if (res.r)
- src_map = fld.saddr; /* no NAT support for now */
-
- gateway = DN_FIB_RES_GW(res);
- if (res.type == RTN_NAT) {
- fld.daddr = dn_fib_rules_map_destination(fld.daddr, &res);
- dn_fib_res_put(&res);
- free_res = 0;
- if (dn_fib_lookup(&fld, &res))
- goto e_inval;
- free_res = 1;
- if (res.type != RTN_UNICAST)
- goto e_inval;
- flags |= RTCF_DNAT;
- gateway = fld.daddr;
- }
- fld.saddr = src_map;
- }
-
- switch (res.type) {
- case RTN_UNICAST:
- /*
- * Forwarding check here, we only check for forwarding
- * being turned off, if you want to only forward intra
- * area, its up to you to set the routing tables up
- * correctly.
- */
- if (dn_db->parms.forwarding == 0)
- goto e_inval;
-
- if (res.fi->fib_nhs > 1 && fld.flowidn_oif == 0)
- dn_fib_select_multipath(&fld, &res);
-
- /*
- * Check for out_dev == in_dev. We use the RTCF_DOREDIRECT
- * flag as a hint to set the intra-ethernet bit when
- * forwarding. If we've got NAT in operation, we don't do
- * this optimisation.
- */
- if (out_dev == in_dev && !(flags & RTCF_NAT))
- flags |= RTCF_DOREDIRECT;
-
- local_src = DN_FIB_RES_PREFSRC(res);
- break;
- case RTN_BLACKHOLE:
- case RTN_UNREACHABLE:
- break;
- case RTN_LOCAL:
- flags |= RTCF_LOCAL;
- fld.saddr = cb->dst;
- fld.daddr = cb->src;
-
- /* Routing tables gave us a gateway */
- if (gateway)
- goto make_route;
-
- /* Packet was intra-ethernet, so we know its on-link */
- if (cb->rt_flags & DN_RT_F_IE) {
- gateway = cb->src;
- goto make_route;
- }
-
- /* Use the default router if there is one */
- neigh = neigh_clone(dn_db->router);
- if (neigh) {
- gateway = container_of(neigh, struct dn_neigh, n)->addr;
- goto make_route;
- }
-
- /* Close eyes and pray */
- gateway = cb->src;
- goto make_route;
- default:
- goto e_inval;
- }
-
-make_route:
- rt = dst_alloc(&dn_dst_ops, out_dev, 1, DST_OBSOLETE_NONE, 0);
- if (rt == NULL)
- goto e_nobufs;
-
- rt->dn_next = NULL;
- memset(&rt->fld, 0, sizeof(rt->fld));
- rt->rt_saddr = fld.saddr;
- rt->rt_daddr = fld.daddr;
- rt->rt_gateway = fld.daddr;
- if (gateway)
- rt->rt_gateway = gateway;
- rt->rt_local_src = local_src ? local_src : rt->rt_saddr;
-
- rt->rt_dst_map = fld.daddr;
- rt->rt_src_map = fld.saddr;
-
- rt->fld.saddr = cb->src;
- rt->fld.daddr = cb->dst;
- rt->fld.flowidn_oif = 0;
- rt->fld.flowidn_iif = in_dev->ifindex;
- rt->fld.flowidn_mark = fld.flowidn_mark;
-
- rt->n = neigh;
- rt->dst.lastuse = jiffies;
- rt->dst.output = dn_rt_bug_out;
- switch (res.type) {
- case RTN_UNICAST:
- rt->dst.input = dn_forward;
- break;
- case RTN_LOCAL:
- rt->dst.output = dn_output;
- rt->dst.input = dn_nsp_rx;
- rt->dst.dev = in_dev;
- flags |= RTCF_LOCAL;
- break;
- default:
- case RTN_UNREACHABLE:
- case RTN_BLACKHOLE:
- rt->dst.input = dst_discard;
- }
- rt->rt_flags = flags;
-
- err = dn_rt_set_next_hop(rt, &res);
- if (err)
- goto e_neighbour;
-
- hash = dn_hash(rt->fld.saddr, rt->fld.daddr);
- /* dn_insert_route() increments dst->__refcnt */
- dn_insert_route(rt, hash, &rt);
- skb_dst_set(skb, &rt->dst);
-
-done:
- if (neigh)
- neigh_release(neigh);
- if (free_res)
- dn_fib_res_put(&res);
- dev_put(in_dev);
- dev_put(out_dev);
-out:
- return err;
-
-e_inval:
- err = -EINVAL;
- goto done;
-
-e_nobufs:
- err = -ENOBUFS;
- goto done;
-
-e_neighbour:
- dst_release_immediate(&rt->dst);
- goto done;
-}
-
-static int dn_route_input(struct sk_buff *skb)
-{
- struct dn_route *rt;
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- unsigned int hash = dn_hash(cb->src, cb->dst);
-
- if (skb_dst(skb))
- return 0;
-
- rcu_read_lock();
- for (rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL;
- rt = rcu_dereference(rt->dn_next)) {
- if ((rt->fld.saddr == cb->src) &&
- (rt->fld.daddr == cb->dst) &&
- (rt->fld.flowidn_oif == 0) &&
- (rt->fld.flowidn_mark == skb->mark) &&
- (rt->fld.flowidn_iif == cb->iif)) {
- dst_hold_and_use(&rt->dst, jiffies);
- rcu_read_unlock();
- skb_dst_set(skb, (struct dst_entry *)rt);
- return 0;
- }
- }
- rcu_read_unlock();
-
- return dn_route_input_slow(skb);
-}
-
-static int dn_rt_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
- int event, int nowait, unsigned int flags)
-{
- struct dn_route *rt = (struct dn_route *)skb_dst(skb);
- struct rtmsg *r;
- struct nlmsghdr *nlh;
- long expires;
-
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags);
- if (!nlh)
- return -EMSGSIZE;
-
- r = nlmsg_data(nlh);
- r->rtm_family = AF_DECnet;
- r->rtm_dst_len = 16;
- r->rtm_src_len = 0;
- r->rtm_tos = 0;
- r->rtm_table = RT_TABLE_MAIN;
- r->rtm_type = rt->rt_type;
- r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
- r->rtm_scope = RT_SCOPE_UNIVERSE;
- r->rtm_protocol = RTPROT_UNSPEC;
-
- if (rt->rt_flags & RTCF_NOTIFY)
- r->rtm_flags |= RTM_F_NOTIFY;
-
- if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN) < 0 ||
- nla_put_le16(skb, RTA_DST, rt->rt_daddr) < 0)
- goto errout;
-
- if (rt->fld.saddr) {
- r->rtm_src_len = 16;
- if (nla_put_le16(skb, RTA_SRC, rt->fld.saddr) < 0)
- goto errout;
- }
- if (rt->dst.dev &&
- nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex) < 0)
- goto errout;
-
- /*
- * Note to self - change this if input routes reverse direction when
- * they deal only with inputs and not with replies like they do
- * currently.
- */
- if (nla_put_le16(skb, RTA_PREFSRC, rt->rt_local_src) < 0)
- goto errout;
-
- if (rt->rt_daddr != rt->rt_gateway &&
- nla_put_le16(skb, RTA_GATEWAY, rt->rt_gateway) < 0)
- goto errout;
-
- if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
- goto errout;
-
- expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
- if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires,
- rt->dst.error) < 0)
- goto errout;
-
- if (dn_is_input_route(rt) &&
- nla_put_u32(skb, RTA_IIF, rt->fld.flowidn_iif) < 0)
- goto errout;
-
- nlmsg_end(skb, nlh);
- return 0;
-
-errout:
- nlmsg_cancel(skb, nlh);
- return -EMSGSIZE;
-}
-
-const struct nla_policy rtm_dn_policy[RTA_MAX + 1] = {
- [RTA_DST] = { .type = NLA_U16 },
- [RTA_SRC] = { .type = NLA_U16 },
- [RTA_IIF] = { .type = NLA_U32 },
- [RTA_OIF] = { .type = NLA_U32 },
- [RTA_GATEWAY] = { .type = NLA_U16 },
- [RTA_PRIORITY] = { .type = NLA_U32 },
- [RTA_PREFSRC] = { .type = NLA_U16 },
- [RTA_METRICS] = { .type = NLA_NESTED },
- [RTA_MULTIPATH] = { .type = NLA_NESTED },
- [RTA_TABLE] = { .type = NLA_U32 },
- [RTA_MARK] = { .type = NLA_U32 },
-};
-
-/*
- * This is called by both endnodes and routers now.
- */
-static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
-{
- struct net *net = sock_net(in_skb->sk);
- struct rtmsg *rtm = nlmsg_data(nlh);
- struct dn_route *rt = NULL;
- struct dn_skb_cb *cb;
- int err;
- struct sk_buff *skb;
- struct flowidn fld;
- struct nlattr *tb[RTA_MAX+1];
-
- if (!net_eq(net, &init_net))
- return -EINVAL;
-
- err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
- rtm_dn_policy, extack);
- if (err < 0)
- return err;
-
- memset(&fld, 0, sizeof(fld));
- fld.flowidn_proto = DNPROTO_NSP;
-
- skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (skb == NULL)
- return -ENOBUFS;
- skb_reset_mac_header(skb);
- cb = DN_SKB_CB(skb);
-
- if (tb[RTA_SRC])
- fld.saddr = nla_get_le16(tb[RTA_SRC]);
-
- if (tb[RTA_DST])
- fld.daddr = nla_get_le16(tb[RTA_DST]);
-
- if (tb[RTA_IIF])
- fld.flowidn_iif = nla_get_u32(tb[RTA_IIF]);
-
- if (fld.flowidn_iif) {
- struct net_device *dev;
- dev = __dev_get_by_index(&init_net, fld.flowidn_iif);
- if (!dev || !dev->dn_ptr) {
- kfree_skb(skb);
- return -ENODEV;
- }
- skb->protocol = htons(ETH_P_DNA_RT);
- skb->dev = dev;
- cb->src = fld.saddr;
- cb->dst = fld.daddr;
- local_bh_disable();
- err = dn_route_input(skb);
- local_bh_enable();
- memset(cb, 0, sizeof(struct dn_skb_cb));
- rt = (struct dn_route *)skb_dst(skb);
- if (!err && -rt->dst.error)
- err = rt->dst.error;
- } else {
- if (tb[RTA_OIF])
- fld.flowidn_oif = nla_get_u32(tb[RTA_OIF]);
-
- err = dn_route_output_key((struct dst_entry **)&rt, &fld, 0);
- }
-
- skb->dev = NULL;
- if (err)
- goto out_free;
- skb_dst_set(skb, &rt->dst);
- if (rtm->rtm_flags & RTM_F_NOTIFY)
- rt->rt_flags |= RTCF_NOTIFY;
-
- err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0);
- if (err < 0) {
- err = -EMSGSIZE;
- goto out_free;
- }
-
- return rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).portid);
-
-out_free:
- kfree_skb(skb);
- return err;
-}
-
-/*
- * For routers, this is called from dn_fib_dump, but for endnodes its
- * called directly from the rtnetlink dispatch table.
- */
-int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
-{
- struct net *net = sock_net(skb->sk);
- struct dn_route *rt;
- int h, s_h;
- int idx, s_idx;
- struct rtmsg *rtm;
-
- if (!net_eq(net, &init_net))
- return 0;
-
- if (nlmsg_len(cb->nlh) < sizeof(struct rtmsg))
- return -EINVAL;
-
- rtm = nlmsg_data(cb->nlh);
- if (!(rtm->rtm_flags & RTM_F_CLONED))
- return 0;
-
- s_h = cb->args[0];
- s_idx = idx = cb->args[1];
- for (h = 0; h <= dn_rt_hash_mask; h++) {
- if (h < s_h)
- continue;
- if (h > s_h)
- s_idx = 0;
- rcu_read_lock_bh();
- for (rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0;
- rt;
- rt = rcu_dereference_bh(rt->dn_next), idx++) {
- if (idx < s_idx)
- continue;
- skb_dst_set(skb, dst_clone(&rt->dst));
- if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, RTM_NEWROUTE,
- 1, NLM_F_MULTI) < 0) {
- skb_dst_drop(skb);
- rcu_read_unlock_bh();
- goto done;
- }
- skb_dst_drop(skb);
- }
- rcu_read_unlock_bh();
- }
-
-done:
- cb->args[0] = h;
- cb->args[1] = idx;
- return skb->len;
-}
-
-#ifdef CONFIG_PROC_FS
-struct dn_rt_cache_iter_state {
- int bucket;
-};
-
-static struct dn_route *dn_rt_cache_get_first(struct seq_file *seq)
-{
- struct dn_route *rt = NULL;
- struct dn_rt_cache_iter_state *s = seq->private;
-
- for (s->bucket = dn_rt_hash_mask; s->bucket >= 0; --s->bucket) {
- rcu_read_lock_bh();
- rt = rcu_dereference_bh(dn_rt_hash_table[s->bucket].chain);
- if (rt)
- break;
- rcu_read_unlock_bh();
- }
- return rt;
-}
-
-static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_route *rt)
-{
- struct dn_rt_cache_iter_state *s = seq->private;
-
- rt = rcu_dereference_bh(rt->dn_next);
- while (!rt) {
- rcu_read_unlock_bh();
- if (--s->bucket < 0)
- break;
- rcu_read_lock_bh();
- rt = rcu_dereference_bh(dn_rt_hash_table[s->bucket].chain);
- }
- return rt;
-}
-
-static void *dn_rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
-{
- struct dn_route *rt = dn_rt_cache_get_first(seq);
-
- if (rt) {
- while (*pos && (rt = dn_rt_cache_get_next(seq, rt)))
- --*pos;
- }
- return *pos ? NULL : rt;
-}
-
-static void *dn_rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct dn_route *rt = dn_rt_cache_get_next(seq, v);
- ++*pos;
- return rt;
-}
-
-static void dn_rt_cache_seq_stop(struct seq_file *seq, void *v)
-{
- if (v)
- rcu_read_unlock_bh();
-}
-
-static int dn_rt_cache_seq_show(struct seq_file *seq, void *v)
-{
- struct dn_route *rt = v;
- char buf1[DN_ASCBUF_LEN], buf2[DN_ASCBUF_LEN];
-
- seq_printf(seq, "%-8s %-7s %-7s %04d %04d %04d\n",
- rt->dst.dev ? rt->dst.dev->name : "*",
- dn_addr2asc(le16_to_cpu(rt->rt_daddr), buf1),
- dn_addr2asc(le16_to_cpu(rt->rt_saddr), buf2),
- atomic_read(&rt->dst.__refcnt),
- rt->dst.__use, 0);
- return 0;
-}
-
-static const struct seq_operations dn_rt_cache_seq_ops = {
- .start = dn_rt_cache_seq_start,
- .next = dn_rt_cache_seq_next,
- .stop = dn_rt_cache_seq_stop,
- .show = dn_rt_cache_seq_show,
-};
-#endif /* CONFIG_PROC_FS */
-
-void __init dn_route_init(void)
-{
- int i, goal, order;
-
- dn_dst_ops.kmem_cachep =
- kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
- dst_entries_init(&dn_dst_ops);
- timer_setup(&dn_route_timer, dn_dst_check_expire, 0);
- dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ;
- add_timer(&dn_route_timer);
-
- goal = totalram_pages() >> (26 - PAGE_SHIFT);
-
- for (order = 0; (1UL << order) < goal; order++)
- /* NOTHING */;
-
- /*
- * Only want 1024 entries max, since the table is very, very unlikely
- * to be larger than that.
- */
- while (order && ((((1UL << order) * PAGE_SIZE) /
- sizeof(struct dn_rt_hash_bucket)) >= 2048))
- order--;
-
- do {
- dn_rt_hash_mask = (1UL << order) * PAGE_SIZE /
- sizeof(struct dn_rt_hash_bucket);
- while (dn_rt_hash_mask & (dn_rt_hash_mask - 1))
- dn_rt_hash_mask--;
- dn_rt_hash_table = (struct dn_rt_hash_bucket *)
- __get_free_pages(GFP_ATOMIC, order);
- } while (dn_rt_hash_table == NULL && --order > 0);
-
- if (!dn_rt_hash_table)
- panic("Failed to allocate DECnet route cache hash table\n");
-
- printk(KERN_INFO
- "DECnet: Routing cache hash table of %u buckets, %ldKbytes\n",
- dn_rt_hash_mask,
- (long)(dn_rt_hash_mask*sizeof(struct dn_rt_hash_bucket))/1024);
-
- dn_rt_hash_mask--;
- for (i = 0; i <= dn_rt_hash_mask; i++) {
- spin_lock_init(&dn_rt_hash_table[i].lock);
- dn_rt_hash_table[i].chain = NULL;
- }
-
- dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1);
-
- proc_create_seq_private("decnet_cache", 0444, init_net.proc_net,
- &dn_rt_cache_seq_ops,
- sizeof(struct dn_rt_cache_iter_state), NULL);
-
-#ifdef CONFIG_DECNET_ROUTER
- rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETROUTE,
- dn_cache_getroute, dn_fib_dump, 0);
-#else
- rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETROUTE,
- dn_cache_getroute, dn_cache_dump, 0);
-#endif
-}
-
-void __exit dn_route_cleanup(void)
-{
- del_timer(&dn_route_timer);
- dn_run_flush(NULL);
-
- remove_proc_entry("decnet_cache", init_net.proc_net);
- dst_entries_destroy(&dn_dst_ops);
-}
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
deleted file mode 100644
index ee73057529cf..000000000000
--- a/net/decnet/dn_rules.c
+++ /dev/null
@@ -1,253 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Routing Forwarding Information Base (Rules)
- *
- * Author: Steve Whitehouse <SteveW@ACM.org>
- * Mostly copied from Alexey Kuznetsov's ipv4/fib_rules.c
- *
- *
- * Changes:
- * Steve Whitehouse <steve@chygwyn.com>
- * Updated for Thomas Graf's generic rules
- *
- */
-#include <linux/net.h>
-#include <linux/init.h>
-#include <linux/netlink.h>
-#include <linux/rtnetlink.h>
-#include <linux/netdevice.h>
-#include <linux/spinlock.h>
-#include <linux/list.h>
-#include <linux/rcupdate.h>
-#include <linux/export.h>
-#include <net/neighbour.h>
-#include <net/dst.h>
-#include <net/flow.h>
-#include <net/fib_rules.h>
-#include <net/dn.h>
-#include <net/dn_fib.h>
-#include <net/dn_neigh.h>
-#include <net/dn_dev.h>
-#include <net/dn_route.h>
-
-static struct fib_rules_ops *dn_fib_rules_ops;
-
-struct dn_fib_rule
-{
- struct fib_rule common;
- unsigned char dst_len;
- unsigned char src_len;
- __le16 src;
- __le16 srcmask;
- __le16 dst;
- __le16 dstmask;
- __le16 srcmap;
- u8 flags;
-};
-
-
-int dn_fib_lookup(struct flowidn *flp, struct dn_fib_res *res)
-{
- struct fib_lookup_arg arg = {
- .result = res,
- };
- int err;
-
- err = fib_rules_lookup(dn_fib_rules_ops,
- flowidn_to_flowi(flp), 0, &arg);
- res->r = arg.rule;
-
- return err;
-}
-
-static int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp,
- int flags, struct fib_lookup_arg *arg)
-{
- struct flowidn *fld = &flp->u.dn;
- int err = -EAGAIN;
- struct dn_fib_table *tbl;
-
- switch(rule->action) {
- case FR_ACT_TO_TBL:
- break;
-
- case FR_ACT_UNREACHABLE:
- err = -ENETUNREACH;
- goto errout;
-
- case FR_ACT_PROHIBIT:
- err = -EACCES;
- goto errout;
-
- case FR_ACT_BLACKHOLE:
- default:
- err = -EINVAL;
- goto errout;
- }
-
- tbl = dn_fib_get_table(rule->table, 0);
- if (tbl == NULL)
- goto errout;
-
- err = tbl->lookup(tbl, fld, (struct dn_fib_res *)arg->result);
- if (err > 0)
- err = -EAGAIN;
-errout:
- return err;
-}
-
-static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
-{
- struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
- struct flowidn *fld = &fl->u.dn;
- __le16 daddr = fld->daddr;
- __le16 saddr = fld->saddr;
-
- if (((saddr ^ r->src) & r->srcmask) ||
- ((daddr ^ r->dst) & r->dstmask))
- return 0;
-
- return 1;
-}
-
-static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
- struct fib_rule_hdr *frh,
- struct nlattr **tb,
- struct netlink_ext_ack *extack)
-{
- int err = -EINVAL;
- struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
-
- if (frh->tos) {
- NL_SET_ERR_MSG(extack, "Invalid tos value");
- goto errout;
- }
-
- if (rule->table == RT_TABLE_UNSPEC) {
- if (rule->action == FR_ACT_TO_TBL) {
- struct dn_fib_table *table;
-
- table = dn_fib_empty_table();
- if (table == NULL) {
- err = -ENOBUFS;
- goto errout;
- }
-
- rule->table = table->n;
- }
- }
-
- if (frh->src_len)
- r->src = nla_get_le16(tb[FRA_SRC]);
-
- if (frh->dst_len)
- r->dst = nla_get_le16(tb[FRA_DST]);
-
- r->src_len = frh->src_len;
- r->srcmask = dnet_make_mask(r->src_len);
- r->dst_len = frh->dst_len;
- r->dstmask = dnet_make_mask(r->dst_len);
- err = 0;
-errout:
- return err;
-}
-
-static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
- struct nlattr **tb)
-{
- struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
-
- if (frh->src_len && (r->src_len != frh->src_len))
- return 0;
-
- if (frh->dst_len && (r->dst_len != frh->dst_len))
- return 0;
-
- if (frh->src_len && (r->src != nla_get_le16(tb[FRA_SRC])))
- return 0;
-
- if (frh->dst_len && (r->dst != nla_get_le16(tb[FRA_DST])))
- return 0;
-
- return 1;
-}
-
-unsigned int dnet_addr_type(__le16 addr)
-{
- struct flowidn fld = { .daddr = addr };
- struct dn_fib_res res;
- unsigned int ret = RTN_UNICAST;
- struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0);
-
- res.r = NULL;
-
- if (tb) {
- if (!tb->lookup(tb, &fld, &res)) {
- ret = res.type;
- dn_fib_res_put(&res);
- }
- }
- return ret;
-}
-
-static int dn_fib_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
- struct fib_rule_hdr *frh)
-{
- struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
-
- frh->dst_len = r->dst_len;
- frh->src_len = r->src_len;
- frh->tos = 0;
-
- if ((r->dst_len &&
- nla_put_le16(skb, FRA_DST, r->dst)) ||
- (r->src_len &&
- nla_put_le16(skb, FRA_SRC, r->src)))
- goto nla_put_failure;
- return 0;
-
-nla_put_failure:
- return -ENOBUFS;
-}
-
-static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops)
-{
- dn_rt_cache_flush(-1);
-}
-
-static const struct fib_rules_ops __net_initconst dn_fib_rules_ops_template = {
- .family = AF_DECnet,
- .rule_size = sizeof(struct dn_fib_rule),
- .addr_size = sizeof(u16),
- .action = dn_fib_rule_action,
- .match = dn_fib_rule_match,
- .configure = dn_fib_rule_configure,
- .compare = dn_fib_rule_compare,
- .fill = dn_fib_rule_fill,
- .flush_cache = dn_fib_rule_flush_cache,
- .nlgroup = RTNLGRP_DECnet_RULE,
- .owner = THIS_MODULE,
- .fro_net = &init_net,
-};
-
-void __init dn_fib_rules_init(void)
-{
- dn_fib_rules_ops =
- fib_rules_register(&dn_fib_rules_ops_template, &init_net);
- BUG_ON(IS_ERR(dn_fib_rules_ops));
- BUG_ON(fib_default_rule_add(dn_fib_rules_ops, 0x7fff,
- RT_TABLE_MAIN, 0));
-}
-
-void __exit dn_fib_rules_cleanup(void)
-{
- rtnl_lock();
- fib_rules_unregister(dn_fib_rules_ops);
- rtnl_unlock();
- rcu_barrier();
-}
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
deleted file mode 100644
index 4086f9c746af..000000000000
--- a/net/decnet/dn_table.c
+++ /dev/null
@@ -1,929 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Routing Forwarding Information Base (Routing Tables)
- *
- * Author: Steve Whitehouse <SteveW@ACM.org>
- * Mostly copied from the IPv4 routing code
- *
- *
- * Changes:
- *
- */
-#include <linux/string.h>
-#include <linux/net.h>
-#include <linux/socket.h>
-#include <linux/slab.h>
-#include <linux/sockios.h>
-#include <linux/init.h>
-#include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
-#include <linux/proc_fs.h>
-#include <linux/netdevice.h>
-#include <linux/timer.h>
-#include <linux/spinlock.h>
-#include <linux/atomic.h>
-#include <linux/uaccess.h>
-#include <linux/route.h> /* RTF_xxx */
-#include <net/neighbour.h>
-#include <net/netlink.h>
-#include <net/tcp.h>
-#include <net/dst.h>
-#include <net/flow.h>
-#include <net/fib_rules.h>
-#include <net/dn.h>
-#include <net/dn_route.h>
-#include <net/dn_fib.h>
-#include <net/dn_neigh.h>
-#include <net/dn_dev.h>
-
-struct dn_zone
-{
- struct dn_zone *dz_next;
- struct dn_fib_node **dz_hash;
- int dz_nent;
- int dz_divisor;
- u32 dz_hashmask;
-#define DZ_HASHMASK(dz) ((dz)->dz_hashmask)
- int dz_order;
- __le16 dz_mask;
-#define DZ_MASK(dz) ((dz)->dz_mask)
-};
-
-struct dn_hash
-{
- struct dn_zone *dh_zones[17];
- struct dn_zone *dh_zone_list;
-};
-
-#define dz_key_0(key) ((key).datum = 0)
-
-#define for_nexthops(fi) { int nhsel; const struct dn_fib_nh *nh;\
- for(nhsel = 0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
-
-#define endfor_nexthops(fi) }
-
-#define DN_MAX_DIVISOR 1024
-#define DN_S_ZOMBIE 1
-#define DN_S_ACCESSED 2
-
-#define DN_FIB_SCAN(f, fp) \
-for( ; ((f) = *(fp)) != NULL; (fp) = &(f)->fn_next)
-
-#define DN_FIB_SCAN_KEY(f, fp, key) \
-for( ; ((f) = *(fp)) != NULL && dn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_next)
-
-#define RT_TABLE_MIN 1
-#define DN_FIB_TABLE_HASHSZ 256
-static struct hlist_head dn_fib_table_hash[DN_FIB_TABLE_HASHSZ];
-static DEFINE_RWLOCK(dn_fib_tables_lock);
-
-static struct kmem_cache *dn_hash_kmem __read_mostly;
-static int dn_fib_hash_zombies;
-
-static inline dn_fib_idx_t dn_hash(dn_fib_key_t key, struct dn_zone *dz)
-{
- u16 h = le16_to_cpu(key.datum)>>(16 - dz->dz_order);
- h ^= (h >> 10);
- h ^= (h >> 6);
- h &= DZ_HASHMASK(dz);
- return *(dn_fib_idx_t *)&h;
-}
-
-static inline dn_fib_key_t dz_key(__le16 dst, struct dn_zone *dz)
-{
- dn_fib_key_t k;
- k.datum = dst & DZ_MASK(dz);
- return k;
-}
-
-static inline struct dn_fib_node **dn_chain_p(dn_fib_key_t key, struct dn_zone *dz)
-{
- return &dz->dz_hash[dn_hash(key, dz).datum];
-}
-
-static inline struct dn_fib_node *dz_chain(dn_fib_key_t key, struct dn_zone *dz)
-{
- return dz->dz_hash[dn_hash(key, dz).datum];
-}
-
-static inline int dn_key_eq(dn_fib_key_t a, dn_fib_key_t b)
-{
- return a.datum == b.datum;
-}
-
-static inline int dn_key_leq(dn_fib_key_t a, dn_fib_key_t b)
-{
- return a.datum <= b.datum;
-}
-
-static inline void dn_rebuild_zone(struct dn_zone *dz,
- struct dn_fib_node **old_ht,
- int old_divisor)
-{
- struct dn_fib_node *f, **fp, *next;
- int i;
-
- for(i = 0; i < old_divisor; i++) {
- for(f = old_ht[i]; f; f = next) {
- next = f->fn_next;
- for(fp = dn_chain_p(f->fn_key, dz);
- *fp && dn_key_leq((*fp)->fn_key, f->fn_key);
- fp = &(*fp)->fn_next)
- /* NOTHING */;
- f->fn_next = *fp;
- *fp = f;
- }
- }
-}
-
-static void dn_rehash_zone(struct dn_zone *dz)
-{
- struct dn_fib_node **ht, **old_ht;
- int old_divisor, new_divisor;
- u32 new_hashmask;
-
- old_divisor = dz->dz_divisor;
-
- switch (old_divisor) {
- case 16:
- new_divisor = 256;
- new_hashmask = 0xFF;
- break;
- default:
- printk(KERN_DEBUG "DECnet: dn_rehash_zone: BUG! %d\n",
- old_divisor);
- fallthrough;
- case 256:
- new_divisor = 1024;
- new_hashmask = 0x3FF;
- break;
- }
-
- ht = kcalloc(new_divisor, sizeof(struct dn_fib_node*), GFP_KERNEL);
- if (ht == NULL)
- return;
-
- write_lock_bh(&dn_fib_tables_lock);
- old_ht = dz->dz_hash;
- dz->dz_hash = ht;
- dz->dz_hashmask = new_hashmask;
- dz->dz_divisor = new_divisor;
- dn_rebuild_zone(dz, old_ht, old_divisor);
- write_unlock_bh(&dn_fib_tables_lock);
- kfree(old_ht);
-}
-
-static void dn_free_node(struct dn_fib_node *f)
-{
- dn_fib_release_info(DN_FIB_INFO(f));
- kmem_cache_free(dn_hash_kmem, f);
-}
-
-
-static struct dn_zone *dn_new_zone(struct dn_hash *table, int z)
-{
- int i;
- struct dn_zone *dz = kzalloc(sizeof(struct dn_zone), GFP_KERNEL);
- if (!dz)
- return NULL;
-
- if (z) {
- dz->dz_divisor = 16;
- dz->dz_hashmask = 0x0F;
- } else {
- dz->dz_divisor = 1;
- dz->dz_hashmask = 0;
- }
-
- dz->dz_hash = kcalloc(dz->dz_divisor, sizeof(struct dn_fib_node *), GFP_KERNEL);
- if (!dz->dz_hash) {
- kfree(dz);
- return NULL;
- }
-
- dz->dz_order = z;
- dz->dz_mask = dnet_make_mask(z);
-
- for(i = z + 1; i <= 16; i++)
- if (table->dh_zones[i])
- break;
-
- write_lock_bh(&dn_fib_tables_lock);
- if (i>16) {
- dz->dz_next = table->dh_zone_list;
- table->dh_zone_list = dz;
- } else {
- dz->dz_next = table->dh_zones[i]->dz_next;
- table->dh_zones[i]->dz_next = dz;
- }
- table->dh_zones[z] = dz;
- write_unlock_bh(&dn_fib_tables_lock);
- return dz;
-}
-
-
-static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct nlattr *attrs[], struct dn_fib_info *fi)
-{
- struct rtnexthop *nhp;
- int nhlen;
-
- if (attrs[RTA_PRIORITY] &&
- nla_get_u32(attrs[RTA_PRIORITY]) != fi->fib_priority)
- return 1;
-
- if (attrs[RTA_OIF] || attrs[RTA_GATEWAY]) {
- if ((!attrs[RTA_OIF] || nla_get_u32(attrs[RTA_OIF]) == fi->fib_nh->nh_oif) &&
- (!attrs[RTA_GATEWAY] || nla_get_le16(attrs[RTA_GATEWAY]) != fi->fib_nh->nh_gw))
- return 0;
- return 1;
- }
-
- if (!attrs[RTA_MULTIPATH])
- return 0;
-
- nhp = nla_data(attrs[RTA_MULTIPATH]);
- nhlen = nla_len(attrs[RTA_MULTIPATH]);
-
- for_nexthops(fi) {
- int attrlen = nhlen - sizeof(struct rtnexthop);
- __le16 gw;
-
- if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
- return -EINVAL;
- if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
- return 1;
- if (attrlen) {
- struct nlattr *gw_attr;
-
- gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY);
- gw = gw_attr ? nla_get_le16(gw_attr) : 0;
-
- if (gw && gw != nh->nh_gw)
- return 1;
- }
- nhp = RTNH_NEXT(nhp);
- } endfor_nexthops(fi);
-
- return 0;
-}
-
-static inline size_t dn_fib_nlmsg_size(struct dn_fib_info *fi)
-{
- size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
- + nla_total_size(4) /* RTA_TABLE */
- + nla_total_size(2) /* RTA_DST */
- + nla_total_size(4) /* RTA_PRIORITY */
- + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */
-
- /* space for nested metrics */
- payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
-
- if (fi->fib_nhs) {
- /* Also handles the special case fib_nhs == 1 */
-
- /* each nexthop is packed in an attribute */
- size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
-
- /* may contain a gateway attribute */
- nhsize += nla_total_size(4);
-
- /* all nexthops are packed in a nested attribute */
- payload += nla_total_size(fi->fib_nhs * nhsize);
- }
-
- return payload;
-}
-
-static int dn_fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
- u32 tb_id, u8 type, u8 scope, void *dst, int dst_len,
- struct dn_fib_info *fi, unsigned int flags)
-{
- struct rtmsg *rtm;
- struct nlmsghdr *nlh;
-
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags);
- if (!nlh)
- return -EMSGSIZE;
-
- rtm = nlmsg_data(nlh);
- rtm->rtm_family = AF_DECnet;
- rtm->rtm_dst_len = dst_len;
- rtm->rtm_src_len = 0;
- rtm->rtm_tos = 0;
- rtm->rtm_table = tb_id;
- rtm->rtm_flags = fi->fib_flags;
- rtm->rtm_scope = scope;
- rtm->rtm_type = type;
- rtm->rtm_protocol = fi->fib_protocol;
-
- if (nla_put_u32(skb, RTA_TABLE, tb_id) < 0)
- goto errout;
-
- if (rtm->rtm_dst_len &&
- nla_put(skb, RTA_DST, 2, dst) < 0)
- goto errout;
-
- if (fi->fib_priority &&
- nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority) < 0)
- goto errout;
-
- if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
- goto errout;
-
- if (fi->fib_nhs == 1) {
- if (fi->fib_nh->nh_gw &&
- nla_put_le16(skb, RTA_GATEWAY, fi->fib_nh->nh_gw) < 0)
- goto errout;
-
- if (fi->fib_nh->nh_oif &&
- nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif) < 0)
- goto errout;
- }
-
- if (fi->fib_nhs > 1) {
- struct rtnexthop *nhp;
- struct nlattr *mp_head;
-
- mp_head = nla_nest_start_noflag(skb, RTA_MULTIPATH);
- if (!mp_head)
- goto errout;
-
- for_nexthops(fi) {
- if (!(nhp = nla_reserve_nohdr(skb, sizeof(*nhp))))
- goto errout;
-
- nhp->rtnh_flags = nh->nh_flags & 0xFF;
- nhp->rtnh_hops = nh->nh_weight - 1;
- nhp->rtnh_ifindex = nh->nh_oif;
-
- if (nh->nh_gw &&
- nla_put_le16(skb, RTA_GATEWAY, nh->nh_gw) < 0)
- goto errout;
-
- nhp->rtnh_len = skb_tail_pointer(skb) - (unsigned char *)nhp;
- } endfor_nexthops(fi);
-
- nla_nest_end(skb, mp_head);
- }
-
- nlmsg_end(skb, nlh);
- return 0;
-
-errout:
- nlmsg_cancel(skb, nlh);
- return -EMSGSIZE;
-}
-
-
-static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id,
- struct nlmsghdr *nlh, struct netlink_skb_parms *req)
-{
- struct sk_buff *skb;
- u32 portid = req ? req->portid : 0;
- int err = -ENOBUFS;
-
- skb = nlmsg_new(dn_fib_nlmsg_size(DN_FIB_INFO(f)), GFP_KERNEL);
- if (skb == NULL)
- goto errout;
-
- err = dn_fib_dump_info(skb, portid, nlh->nlmsg_seq, event, tb_id,
- f->fn_type, f->fn_scope, &f->fn_key, z,
- DN_FIB_INFO(f), 0);
- if (err < 0) {
- /* -EMSGSIZE implies BUG in dn_fib_nlmsg_size() */
- WARN_ON(err == -EMSGSIZE);
- kfree_skb(skb);
- goto errout;
- }
- rtnl_notify(skb, &init_net, portid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL);
- return;
-errout:
- if (err < 0)
- rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_ROUTE, err);
-}
-
-static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb,
- struct netlink_callback *cb,
- struct dn_fib_table *tb,
- struct dn_zone *dz,
- struct dn_fib_node *f)
-{
- int i, s_i;
-
- s_i = cb->args[4];
- for(i = 0; f; i++, f = f->fn_next) {
- if (i < s_i)
- continue;
- if (f->fn_state & DN_S_ZOMBIE)
- continue;
- if (dn_fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWROUTE,
- tb->n,
- (f->fn_state & DN_S_ZOMBIE) ? 0 : f->fn_type,
- f->fn_scope, &f->fn_key, dz->dz_order,
- f->fn_info, NLM_F_MULTI) < 0) {
- cb->args[4] = i;
- return -1;
- }
- }
- cb->args[4] = i;
- return skb->len;
-}
-
-static __inline__ int dn_hash_dump_zone(struct sk_buff *skb,
- struct netlink_callback *cb,
- struct dn_fib_table *tb,
- struct dn_zone *dz)
-{
- int h, s_h;
-
- s_h = cb->args[3];
- for(h = 0; h < dz->dz_divisor; h++) {
- if (h < s_h)
- continue;
- if (h > s_h)
- memset(&cb->args[4], 0, sizeof(cb->args) - 4*sizeof(cb->args[0]));
- if (dz->dz_hash == NULL || dz->dz_hash[h] == NULL)
- continue;
- if (dn_hash_dump_bucket(skb, cb, tb, dz, dz->dz_hash[h]) < 0) {
- cb->args[3] = h;
- return -1;
- }
- }
- cb->args[3] = h;
- return skb->len;
-}
-
-static int dn_fib_table_dump(struct dn_fib_table *tb, struct sk_buff *skb,
- struct netlink_callback *cb)
-{
- int m, s_m;
- struct dn_zone *dz;
- struct dn_hash *table = (struct dn_hash *)tb->data;
-
- s_m = cb->args[2];
- read_lock(&dn_fib_tables_lock);
- for(dz = table->dh_zone_list, m = 0; dz; dz = dz->dz_next, m++) {
- if (m < s_m)
- continue;
- if (m > s_m)
- memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0]));
-
- if (dn_hash_dump_zone(skb, cb, tb, dz) < 0) {
- cb->args[2] = m;
- read_unlock(&dn_fib_tables_lock);
- return -1;
- }
- }
- read_unlock(&dn_fib_tables_lock);
- cb->args[2] = m;
-
- return skb->len;
-}
-
-int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
-{
- struct net *net = sock_net(skb->sk);
- unsigned int h, s_h;
- unsigned int e = 0, s_e;
- struct dn_fib_table *tb;
- int dumped = 0;
-
- if (!net_eq(net, &init_net))
- return 0;
-
- if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
- ((struct rtmsg *)nlmsg_data(cb->nlh))->rtm_flags&RTM_F_CLONED)
- return dn_cache_dump(skb, cb);
-
- s_h = cb->args[0];
- s_e = cb->args[1];
-
- for (h = s_h; h < DN_FIB_TABLE_HASHSZ; h++, s_h = 0) {
- e = 0;
- hlist_for_each_entry(tb, &dn_fib_table_hash[h], hlist) {
- if (e < s_e)
- goto next;
- if (dumped)
- memset(&cb->args[2], 0, sizeof(cb->args) -
- 2 * sizeof(cb->args[0]));
- if (tb->dump(tb, skb, cb) < 0)
- goto out;
- dumped = 1;
-next:
- e++;
- }
- }
-out:
- cb->args[1] = e;
- cb->args[0] = h;
-
- return skb->len;
-}
-
-static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct nlattr *attrs[],
- struct nlmsghdr *n, struct netlink_skb_parms *req)
-{
- struct dn_hash *table = (struct dn_hash *)tb->data;
- struct dn_fib_node *new_f, *f, **fp, **del_fp;
- struct dn_zone *dz;
- struct dn_fib_info *fi;
- int z = r->rtm_dst_len;
- int type = r->rtm_type;
- dn_fib_key_t key;
- int err;
-
- if (z > 16)
- return -EINVAL;
-
- dz = table->dh_zones[z];
- if (!dz && !(dz = dn_new_zone(table, z)))
- return -ENOBUFS;
-
- dz_key_0(key);
- if (attrs[RTA_DST]) {
- __le16 dst = nla_get_le16(attrs[RTA_DST]);
- if (dst & ~DZ_MASK(dz))
- return -EINVAL;
- key = dz_key(dst, dz);
- }
-
- if ((fi = dn_fib_create_info(r, attrs, n, &err)) == NULL)
- return err;
-
- if (dz->dz_nent > (dz->dz_divisor << 2) &&
- dz->dz_divisor > DN_MAX_DIVISOR &&
- (z==16 || (1<<z) > dz->dz_divisor))
- dn_rehash_zone(dz);
-
- fp = dn_chain_p(key, dz);
-
- DN_FIB_SCAN(f, fp) {
- if (dn_key_leq(key, f->fn_key))
- break;
- }
-
- del_fp = NULL;
-
- if (f && (f->fn_state & DN_S_ZOMBIE) &&
- dn_key_eq(f->fn_key, key)) {
- del_fp = fp;
- fp = &f->fn_next;
- f = *fp;
- goto create;
- }
-
- DN_FIB_SCAN_KEY(f, fp, key) {
- if (fi->fib_priority <= DN_FIB_INFO(f)->fib_priority)
- break;
- }
-
- if (f && dn_key_eq(f->fn_key, key) &&
- fi->fib_priority == DN_FIB_INFO(f)->fib_priority) {
- struct dn_fib_node **ins_fp;
-
- err = -EEXIST;
- if (n->nlmsg_flags & NLM_F_EXCL)
- goto out;
-
- if (n->nlmsg_flags & NLM_F_REPLACE) {
- del_fp = fp;
- fp = &f->fn_next;
- f = *fp;
- goto replace;
- }
-
- ins_fp = fp;
- err = -EEXIST;
-
- DN_FIB_SCAN_KEY(f, fp, key) {
- if (fi->fib_priority != DN_FIB_INFO(f)->fib_priority)
- break;
- if (f->fn_type == type &&
- f->fn_scope == r->rtm_scope &&
- DN_FIB_INFO(f) == fi)
- goto out;
- }
-
- if (!(n->nlmsg_flags & NLM_F_APPEND)) {
- fp = ins_fp;
- f = *fp;
- }
- }
-
-create:
- err = -ENOENT;
- if (!(n->nlmsg_flags & NLM_F_CREATE))
- goto out;
-
-replace:
- err = -ENOBUFS;
- new_f = kmem_cache_zalloc(dn_hash_kmem, GFP_KERNEL);
- if (new_f == NULL)
- goto out;
-
- new_f->fn_key = key;
- new_f->fn_type = type;
- new_f->fn_scope = r->rtm_scope;
- DN_FIB_INFO(new_f) = fi;
-
- new_f->fn_next = f;
- write_lock_bh(&dn_fib_tables_lock);
- *fp = new_f;
- write_unlock_bh(&dn_fib_tables_lock);
- dz->dz_nent++;
-
- if (del_fp) {
- f = *del_fp;
- write_lock_bh(&dn_fib_tables_lock);
- *del_fp = f->fn_next;
- write_unlock_bh(&dn_fib_tables_lock);
-
- if (!(f->fn_state & DN_S_ZOMBIE))
- dn_rtmsg_fib(RTM_DELROUTE, f, z, tb->n, n, req);
- if (f->fn_state & DN_S_ACCESSED)
- dn_rt_cache_flush(-1);
- dn_free_node(f);
- dz->dz_nent--;
- } else {
- dn_rt_cache_flush(-1);
- }
-
- dn_rtmsg_fib(RTM_NEWROUTE, new_f, z, tb->n, n, req);
-
- return 0;
-out:
- dn_fib_release_info(fi);
- return err;
-}
-
-
-static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct nlattr *attrs[],
- struct nlmsghdr *n, struct netlink_skb_parms *req)
-{
- struct dn_hash *table = (struct dn_hash*)tb->data;
- struct dn_fib_node **fp, **del_fp, *f;
- int z = r->rtm_dst_len;
- struct dn_zone *dz;
- dn_fib_key_t key;
- int matched;
-
-
- if (z > 16)
- return -EINVAL;
-
- if ((dz = table->dh_zones[z]) == NULL)
- return -ESRCH;
-
- dz_key_0(key);
- if (attrs[RTA_DST]) {
- __le16 dst = nla_get_le16(attrs[RTA_DST]);
- if (dst & ~DZ_MASK(dz))
- return -EINVAL;
- key = dz_key(dst, dz);
- }
-
- fp = dn_chain_p(key, dz);
-
- DN_FIB_SCAN(f, fp) {
- if (dn_key_eq(f->fn_key, key))
- break;
- if (dn_key_leq(key, f->fn_key))
- return -ESRCH;
- }
-
- matched = 0;
- del_fp = NULL;
- DN_FIB_SCAN_KEY(f, fp, key) {
- struct dn_fib_info *fi = DN_FIB_INFO(f);
-
- if (f->fn_state & DN_S_ZOMBIE)
- return -ESRCH;
-
- matched++;
-
- if (del_fp == NULL &&
- (!r->rtm_type || f->fn_type == r->rtm_type) &&
- (r->rtm_scope == RT_SCOPE_NOWHERE || f->fn_scope == r->rtm_scope) &&
- (!r->rtm_protocol ||
- fi->fib_protocol == r->rtm_protocol) &&
- dn_fib_nh_match(r, n, attrs, fi) == 0)
- del_fp = fp;
- }
-
- if (del_fp) {
- f = *del_fp;
- dn_rtmsg_fib(RTM_DELROUTE, f, z, tb->n, n, req);
-
- if (matched != 1) {
- write_lock_bh(&dn_fib_tables_lock);
- *del_fp = f->fn_next;
- write_unlock_bh(&dn_fib_tables_lock);
-
- if (f->fn_state & DN_S_ACCESSED)
- dn_rt_cache_flush(-1);
- dn_free_node(f);
- dz->dz_nent--;
- } else {
- f->fn_state |= DN_S_ZOMBIE;
- if (f->fn_state & DN_S_ACCESSED) {
- f->fn_state &= ~DN_S_ACCESSED;
- dn_rt_cache_flush(-1);
- }
- if (++dn_fib_hash_zombies > 128)
- dn_fib_flush();
- }
-
- return 0;
- }
-
- return -ESRCH;
-}
-
-static inline int dn_flush_list(struct dn_fib_node **fp, int z, struct dn_hash *table)
-{
- int found = 0;
- struct dn_fib_node *f;
-
- while((f = *fp) != NULL) {
- struct dn_fib_info *fi = DN_FIB_INFO(f);
-
- if (fi && ((f->fn_state & DN_S_ZOMBIE) || (fi->fib_flags & RTNH_F_DEAD))) {
- write_lock_bh(&dn_fib_tables_lock);
- *fp = f->fn_next;
- write_unlock_bh(&dn_fib_tables_lock);
-
- dn_free_node(f);
- found++;
- continue;
- }
- fp = &f->fn_next;
- }
-
- return found;
-}
-
-static int dn_fib_table_flush(struct dn_fib_table *tb)
-{
- struct dn_hash *table = (struct dn_hash *)tb->data;
- struct dn_zone *dz;
- int found = 0;
-
- dn_fib_hash_zombies = 0;
- for(dz = table->dh_zone_list; dz; dz = dz->dz_next) {
- int i;
- int tmp = 0;
- for(i = dz->dz_divisor-1; i >= 0; i--)
- tmp += dn_flush_list(&dz->dz_hash[i], dz->dz_order, table);
- dz->dz_nent -= tmp;
- found += tmp;
- }
-
- return found;
-}
-
-static int dn_fib_table_lookup(struct dn_fib_table *tb, const struct flowidn *flp, struct dn_fib_res *res)
-{
- int err;
- struct dn_zone *dz;
- struct dn_hash *t = (struct dn_hash *)tb->data;
-
- read_lock(&dn_fib_tables_lock);
- for(dz = t->dh_zone_list; dz; dz = dz->dz_next) {
- struct dn_fib_node *f;
- dn_fib_key_t k = dz_key(flp->daddr, dz);
-
- for(f = dz_chain(k, dz); f; f = f->fn_next) {
- if (!dn_key_eq(k, f->fn_key)) {
- if (dn_key_leq(k, f->fn_key))
- break;
- else
- continue;
- }
-
- f->fn_state |= DN_S_ACCESSED;
-
- if (f->fn_state&DN_S_ZOMBIE)
- continue;
-
- if (f->fn_scope < flp->flowidn_scope)
- continue;
-
- err = dn_fib_semantic_match(f->fn_type, DN_FIB_INFO(f), flp, res);
-
- if (err == 0) {
- res->type = f->fn_type;
- res->scope = f->fn_scope;
- res->prefixlen = dz->dz_order;
- goto out;
- }
- if (err < 0)
- goto out;
- }
- }
- err = 1;
-out:
- read_unlock(&dn_fib_tables_lock);
- return err;
-}
-
-
-struct dn_fib_table *dn_fib_get_table(u32 n, int create)
-{
- struct dn_fib_table *t;
- unsigned int h;
-
- if (n < RT_TABLE_MIN)
- return NULL;
-
- if (n > RT_TABLE_MAX)
- return NULL;
-
- h = n & (DN_FIB_TABLE_HASHSZ - 1);
- rcu_read_lock();
- hlist_for_each_entry_rcu(t, &dn_fib_table_hash[h], hlist) {
- if (t->n == n) {
- rcu_read_unlock();
- return t;
- }
- }
- rcu_read_unlock();
-
- if (!create)
- return NULL;
-
- if (in_interrupt()) {
- net_dbg_ratelimited("DECnet: BUG! Attempt to create routing table from interrupt\n");
- return NULL;
- }
-
- t = kzalloc(sizeof(struct dn_fib_table) + sizeof(struct dn_hash),
- GFP_KERNEL);
- if (t == NULL)
- return NULL;
-
- t->n = n;
- t->insert = dn_fib_table_insert;
- t->delete = dn_fib_table_delete;
- t->lookup = dn_fib_table_lookup;
- t->flush = dn_fib_table_flush;
- t->dump = dn_fib_table_dump;
- hlist_add_head_rcu(&t->hlist, &dn_fib_table_hash[h]);
-
- return t;
-}
-
-struct dn_fib_table *dn_fib_empty_table(void)
-{
- u32 id;
-
- for(id = RT_TABLE_MIN; id <= RT_TABLE_MAX; id++)
- if (dn_fib_get_table(id, 0) == NULL)
- return dn_fib_get_table(id, 1);
- return NULL;
-}
-
-void dn_fib_flush(void)
-{
- int flushed = 0;
- struct dn_fib_table *tb;
- unsigned int h;
-
- for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) {
- hlist_for_each_entry(tb, &dn_fib_table_hash[h], hlist)
- flushed += tb->flush(tb);
- }
-
- if (flushed)
- dn_rt_cache_flush(-1);
-}
-
-void __init dn_fib_table_init(void)
-{
- dn_hash_kmem = kmem_cache_create("dn_fib_info_cache",
- sizeof(struct dn_fib_info),
- 0, SLAB_HWCACHE_ALIGN,
- NULL);
-}
-
-void __exit dn_fib_table_cleanup(void)
-{
- struct dn_fib_table *t;
- struct hlist_node *next;
- unsigned int h;
-
- write_lock(&dn_fib_tables_lock);
- for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) {
- hlist_for_each_entry_safe(t, next, &dn_fib_table_hash[h],
- hlist) {
- hlist_del(&t->hlist);
- kfree(t);
- }
- }
- write_unlock(&dn_fib_tables_lock);
-}
diff --git a/net/decnet/dn_timer.c b/net/decnet/dn_timer.c
deleted file mode 100644
index aa4155875ca8..000000000000
--- a/net/decnet/dn_timer.c
+++ /dev/null
@@ -1,104 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Socket Timer Functions
- *
- * Author: Steve Whitehouse <SteveW@ACM.org>
- *
- *
- * Changes:
- * Steve Whitehouse : Made keepalive timer part of the same
- * timer idea.
- * Steve Whitehouse : Added checks for sk->sock_readers
- * David S. Miller : New socket locking
- * Steve Whitehouse : Timer grabs socket ref.
- */
-#include <linux/net.h>
-#include <linux/socket.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/timer.h>
-#include <linux/spinlock.h>
-#include <net/sock.h>
-#include <linux/atomic.h>
-#include <linux/jiffies.h>
-#include <net/flow.h>
-#include <net/dn.h>
-
-/*
- * Slow timer is for everything else (n * 500mS)
- */
-
-#define SLOW_INTERVAL (HZ/2)
-
-static void dn_slow_timer(struct timer_list *t);
-
-void dn_start_slow_timer(struct sock *sk)
-{
- timer_setup(&sk->sk_timer, dn_slow_timer, 0);
- sk_reset_timer(sk, &sk->sk_timer, jiffies + SLOW_INTERVAL);
-}
-
-void dn_stop_slow_timer(struct sock *sk)
-{
- sk_stop_timer(sk, &sk->sk_timer);
-}
-
-static void dn_slow_timer(struct timer_list *t)
-{
- struct sock *sk = from_timer(sk, t, sk_timer);
- struct dn_scp *scp = DN_SK(sk);
-
- bh_lock_sock(sk);
-
- if (sock_owned_by_user(sk)) {
- sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 10);
- goto out;
- }
-
- /*
- * The persist timer is the standard slow timer used for retransmits
- * in both connection establishment and disconnection as well as
- * in the RUN state. The different states are catered for by changing
- * the function pointer in the socket. Setting the timer to a value
- * of zero turns it off. We allow the persist_fxn to turn the
- * timer off in a permant way by returning non-zero, so that
- * timer based routines may remove sockets. This is why we have a
- * sock_hold()/sock_put() around the timer to prevent the socket
- * going away in the middle.
- */
- if (scp->persist && scp->persist_fxn) {
- if (scp->persist <= SLOW_INTERVAL) {
- scp->persist = 0;
-
- if (scp->persist_fxn(sk))
- goto out;
- } else {
- scp->persist -= SLOW_INTERVAL;
- }
- }
-
- /*
- * Check for keepalive timeout. After the other timer 'cos if
- * the previous timer caused a retransmit, we don't need to
- * do this. scp->stamp is the last time that we sent a packet.
- * The keepalive function sends a link service packet to the
- * other end. If it remains unacknowledged, the standard
- * socket timers will eventually shut the socket down. Each
- * time we do this, scp->stamp will be updated, thus
- * we won't try and send another until scp->keepalive has passed
- * since the last successful transmission.
- */
- if (scp->keepalive && scp->keepalive_fxn && (scp->state == DN_RUN)) {
- if (time_after_eq(jiffies, scp->stamp + scp->keepalive))
- scp->keepalive_fxn(sk);
- }
-
- sk_reset_timer(sk, &sk->sk_timer, jiffies + SLOW_INTERVAL);
-out:
- bh_unlock_sock(sk);
- sock_put(sk);
-}
diff --git a/net/decnet/netfilter/Kconfig b/net/decnet/netfilter/Kconfig
deleted file mode 100644
index 14ec4ef95fab..000000000000
--- a/net/decnet/netfilter/Kconfig
+++ /dev/null
@@ -1,17 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# DECnet netfilter configuration
-#
-
-menu "DECnet: Netfilter Configuration"
- depends on DECNET && NETFILTER
- depends on NETFILTER_ADVANCED
-
-config DECNET_NF_GRABULATOR
- tristate "Routing message grabulator (for userland routing daemon)"
- help
- Enable this module if you want to use the userland DECnet routing
- daemon. You will also need to enable routing support for DECnet
- unless you just want to monitor routing messages from other nodes.
-
-endmenu
diff --git a/net/decnet/netfilter/Makefile b/net/decnet/netfilter/Makefile
deleted file mode 100644
index 429c84289d0f..000000000000
--- a/net/decnet/netfilter/Makefile
+++ /dev/null
@@ -1,6 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Makefile for DECnet netfilter modules
-#
-
-obj-$(CONFIG_DECNET_NF_GRABULATOR) += dn_rtmsg.o
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
deleted file mode 100644
index 26a9193df783..000000000000
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ /dev/null
@@ -1,158 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Routing Message Grabulator
- *
- * (C) 2000 ChyGwyn Limited - https://www.chygwyn.com/
- *
- * Author: Steven Whitehouse <steve@chygwyn.com>
- */
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/netdevice.h>
-#include <linux/netfilter.h>
-#include <linux/spinlock.h>
-#include <net/netlink.h>
-#include <linux/netfilter_decnet.h>
-
-#include <net/sock.h>
-#include <net/flow.h>
-#include <net/dn.h>
-#include <net/dn_route.h>
-
-static struct sock *dnrmg = NULL;
-
-
-static struct sk_buff *dnrmg_build_message(struct sk_buff *rt_skb, int *errp)
-{
- struct sk_buff *skb = NULL;
- size_t size;
- sk_buff_data_t old_tail;
- struct nlmsghdr *nlh;
- unsigned char *ptr;
- struct nf_dn_rtmsg *rtm;
-
- size = NLMSG_ALIGN(rt_skb->len) +
- NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg));
- skb = nlmsg_new(size, GFP_ATOMIC);
- if (!skb) {
- *errp = -ENOMEM;
- return NULL;
- }
- old_tail = skb->tail;
- nlh = nlmsg_put(skb, 0, 0, 0, size, 0);
- if (!nlh) {
- kfree_skb(skb);
- *errp = -ENOMEM;
- return NULL;
- }
- rtm = (struct nf_dn_rtmsg *)nlmsg_data(nlh);
- rtm->nfdn_ifindex = rt_skb->dev->ifindex;
- ptr = NFDN_RTMSG(rtm);
- skb_copy_from_linear_data(rt_skb, ptr, rt_skb->len);
- nlh->nlmsg_len = skb->tail - old_tail;
- return skb;
-}
-
-static void dnrmg_send_peer(struct sk_buff *skb)
-{
- struct sk_buff *skb2;
- int status = 0;
- int group = 0;
- unsigned char flags = *skb->data;
-
- switch (flags & DN_RT_CNTL_MSK) {
- case DN_RT_PKT_L1RT:
- group = DNRNG_NLGRP_L1;
- break;
- case DN_RT_PKT_L2RT:
- group = DNRNG_NLGRP_L2;
- break;
- default:
- return;
- }
-
- skb2 = dnrmg_build_message(skb, &status);
- if (skb2 == NULL)
- return;
- NETLINK_CB(skb2).dst_group = group;
- netlink_broadcast(dnrmg, skb2, 0, group, GFP_ATOMIC);
-}
-
-
-static unsigned int dnrmg_hook(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- dnrmg_send_peer(skb);
- return NF_ACCEPT;
-}
-
-
-#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err), NULL); return; } while (0)
-
-static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
-{
- struct nlmsghdr *nlh = nlmsg_hdr(skb);
-
- if (skb->len < sizeof(*nlh) ||
- nlh->nlmsg_len < sizeof(*nlh) ||
- skb->len < nlh->nlmsg_len)
- return;
-
- if (!netlink_capable(skb, CAP_NET_ADMIN))
- RCV_SKB_FAIL(-EPERM);
-
- /* Eventually we might send routing messages too */
-
- RCV_SKB_FAIL(-EINVAL);
-}
-
-static const struct nf_hook_ops dnrmg_ops = {
- .hook = dnrmg_hook,
- .pf = NFPROTO_DECNET,
- .hooknum = NF_DN_ROUTE,
- .priority = NF_DN_PRI_DNRTMSG,
-};
-
-static int __init dn_rtmsg_init(void)
-{
- int rv = 0;
- struct netlink_kernel_cfg cfg = {
- .groups = DNRNG_NLGRP_MAX,
- .input = dnrmg_receive_user_skb,
- };
-
- dnrmg = netlink_kernel_create(&init_net, NETLINK_DNRTMSG, &cfg);
- if (dnrmg == NULL) {
- printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket");
- return -ENOMEM;
- }
-
- rv = nf_register_net_hook(&init_net, &dnrmg_ops);
- if (rv) {
- netlink_kernel_release(dnrmg);
- }
-
- return rv;
-}
-
-static void __exit dn_rtmsg_fini(void)
-{
- nf_unregister_net_hook(&init_net, &dnrmg_ops);
- netlink_kernel_release(dnrmg);
-}
-
-
-MODULE_DESCRIPTION("DECnet Routing Message Grabulator");
-MODULE_AUTHOR("Steven Whitehouse <steve@chygwyn.com>");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_DNRTMSG);
-
-module_init(dn_rtmsg_init);
-module_exit(dn_rtmsg_fini);
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
deleted file mode 100644
index 67b5ab2657b7..000000000000
--- a/net/decnet/sysctl_net_decnet.c
+++ /dev/null
@@ -1,362 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet sysctl support functions
- *
- * Author: Steve Whitehouse <SteveW@ACM.org>
- *
- *
- * Changes:
- * Steve Whitehouse - C99 changes and default device handling
- * Steve Whitehouse - Memory buffer settings, like the tcp ones
- *
- */
-#include <linux/mm.h>
-#include <linux/sysctl.h>
-#include <linux/fs.h>
-#include <linux/netdevice.h>
-#include <linux/string.h>
-#include <net/neighbour.h>
-#include <net/dst.h>
-#include <net/flow.h>
-
-#include <linux/uaccess.h>
-
-#include <net/dn.h>
-#include <net/dn_dev.h>
-#include <net/dn_route.h>
-
-
-int decnet_debug_level;
-int decnet_time_wait = 30;
-int decnet_dn_count = 1;
-int decnet_di_count = 3;
-int decnet_dr_count = 3;
-int decnet_log_martians = 1;
-int decnet_no_fc_max_cwnd = NSP_MIN_WINDOW;
-
-/* Reasonable defaults, I hope, based on tcp's defaults */
-long sysctl_decnet_mem[3] = { 768 << 3, 1024 << 3, 1536 << 3 };
-int sysctl_decnet_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 };
-int sysctl_decnet_rmem[3] = { 4 * 1024, 87380, 87380 * 2 };
-
-#ifdef CONFIG_SYSCTL
-extern int decnet_dst_gc_interval;
-static int min_decnet_time_wait[] = { 5 };
-static int max_decnet_time_wait[] = { 600 };
-static int min_state_count[] = { 1 };
-static int max_state_count[] = { NSP_MAXRXTSHIFT };
-static int min_decnet_dst_gc_interval[] = { 1 };
-static int max_decnet_dst_gc_interval[] = { 60 };
-static int min_decnet_no_fc_max_cwnd[] = { NSP_MIN_WINDOW };
-static int max_decnet_no_fc_max_cwnd[] = { NSP_MAX_WINDOW };
-static char node_name[7] = "???";
-
-static struct ctl_table_header *dn_table_header = NULL;
-
-/*
- * ctype.h :-)
- */
-#define ISNUM(x) (((x) >= '0') && ((x) <= '9'))
-#define ISLOWER(x) (((x) >= 'a') && ((x) <= 'z'))
-#define ISUPPER(x) (((x) >= 'A') && ((x) <= 'Z'))
-#define ISALPHA(x) (ISLOWER(x) || ISUPPER(x))
-#define INVALID_END_CHAR(x) (ISNUM(x) || ISALPHA(x))
-
-static void strip_it(char *str)
-{
- for(;;) {
- switch (*str) {
- case ' ':
- case '\n':
- case '\r':
- case ':':
- *str = 0;
- fallthrough;
- case 0:
- return;
- }
- str++;
- }
-}
-
-/*
- * Simple routine to parse an ascii DECnet address
- * into a network order address.
- */
-static int parse_addr(__le16 *addr, char *str)
-{
- __u16 area, node;
-
- while(*str && !ISNUM(*str)) str++;
-
- if (*str == 0)
- return -1;
-
- area = (*str++ - '0');
- if (ISNUM(*str)) {
- area *= 10;
- area += (*str++ - '0');
- }
-
- if (*str++ != '.')
- return -1;
-
- if (!ISNUM(*str))
- return -1;
-
- node = *str++ - '0';
- if (ISNUM(*str)) {
- node *= 10;
- node += (*str++ - '0');
- }
- if (ISNUM(*str)) {
- node *= 10;
- node += (*str++ - '0');
- }
- if (ISNUM(*str)) {
- node *= 10;
- node += (*str++ - '0');
- }
-
- if ((node > 1023) || (area > 63))
- return -1;
-
- if (INVALID_END_CHAR(*str))
- return -1;
-
- *addr = cpu_to_le16((area << 10) | node);
-
- return 0;
-}
-
-static int dn_node_address_handler(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
- char addr[DN_ASCBUF_LEN];
- size_t len;
- __le16 dnaddr;
-
- if (!*lenp || (*ppos && !write)) {
- *lenp = 0;
- return 0;
- }
-
- if (write) {
- len = (*lenp < DN_ASCBUF_LEN) ? *lenp : (DN_ASCBUF_LEN-1);
- memcpy(addr, buffer, len);
- addr[len] = 0;
- strip_it(addr);
-
- if (parse_addr(&dnaddr, addr))
- return -EINVAL;
-
- dn_dev_devices_off();
-
- decnet_address = dnaddr;
-
- dn_dev_devices_on();
-
- *ppos += len;
-
- return 0;
- }
-
- dn_addr2asc(le16_to_cpu(decnet_address), addr);
- len = strlen(addr);
- addr[len++] = '\n';
-
- if (len > *lenp)
- len = *lenp;
- memcpy(buffer, addr, len);
- *lenp = len;
- *ppos += len;
-
- return 0;
-}
-
-static int dn_def_dev_handler(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
- size_t len;
- struct net_device *dev;
- char devname[17];
-
- if (!*lenp || (*ppos && !write)) {
- *lenp = 0;
- return 0;
- }
-
- if (write) {
- if (*lenp > 16)
- return -E2BIG;
-
- memcpy(devname, buffer, *lenp);
- devname[*lenp] = 0;
- strip_it(devname);
-
- dev = dev_get_by_name(&init_net, devname);
- if (dev == NULL)
- return -ENODEV;
-
- if (dev->dn_ptr == NULL) {
- dev_put(dev);
- return -ENODEV;
- }
-
- if (dn_dev_set_default(dev, 1)) {
- dev_put(dev);
- return -ENODEV;
- }
- *ppos += *lenp;
-
- return 0;
- }
-
- dev = dn_dev_get_default();
- if (dev == NULL) {
- *lenp = 0;
- return 0;
- }
-
- strcpy(devname, dev->name);
- dev_put(dev);
- len = strlen(devname);
- devname[len++] = '\n';
-
- if (len > *lenp) len = *lenp;
-
- memcpy(buffer, devname, len);
- *lenp = len;
- *ppos += len;
-
- return 0;
-}
-
-static struct ctl_table dn_table[] = {
- {
- .procname = "node_address",
- .maxlen = 7,
- .mode = 0644,
- .proc_handler = dn_node_address_handler,
- },
- {
- .procname = "node_name",
- .data = node_name,
- .maxlen = 7,
- .mode = 0644,
- .proc_handler = proc_dostring,
- },
- {
- .procname = "default_device",
- .maxlen = 16,
- .mode = 0644,
- .proc_handler = dn_def_dev_handler,
- },
- {
- .procname = "time_wait",
- .data = &decnet_time_wait,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_decnet_time_wait,
- .extra2 = &max_decnet_time_wait
- },
- {
- .procname = "dn_count",
- .data = &decnet_dn_count,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_state_count,
- .extra2 = &max_state_count
- },
- {
- .procname = "di_count",
- .data = &decnet_di_count,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_state_count,
- .extra2 = &max_state_count
- },
- {
- .procname = "dr_count",
- .data = &decnet_dr_count,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_state_count,
- .extra2 = &max_state_count
- },
- {
- .procname = "dst_gc_interval",
- .data = &decnet_dst_gc_interval,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_decnet_dst_gc_interval,
- .extra2 = &max_decnet_dst_gc_interval
- },
- {
- .procname = "no_fc_max_cwnd",
- .data = &decnet_no_fc_max_cwnd,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_decnet_no_fc_max_cwnd,
- .extra2 = &max_decnet_no_fc_max_cwnd
- },
- {
- .procname = "decnet_mem",
- .data = &sysctl_decnet_mem,
- .maxlen = sizeof(sysctl_decnet_mem),
- .mode = 0644,
- .proc_handler = proc_doulongvec_minmax
- },
- {
- .procname = "decnet_rmem",
- .data = &sysctl_decnet_rmem,
- .maxlen = sizeof(sysctl_decnet_rmem),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "decnet_wmem",
- .data = &sysctl_decnet_wmem,
- .maxlen = sizeof(sysctl_decnet_wmem),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "debug",
- .data = &decnet_debug_level,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- { }
-};
-
-void dn_register_sysctl(void)
-{
- dn_table_header = register_net_sysctl(&init_net, "net/decnet", dn_table);
-}
-
-void dn_unregister_sysctl(void)
-{
- unregister_net_sysctl_table(dn_table_header);
-}
-
-#else /* CONFIG_SYSCTL */
-void dn_unregister_sysctl(void)
-{
-}
-void dn_register_sysctl(void)
-{
-}
-
-#endif
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index cac48a741f27..ed56c7a554b8 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -469,10 +469,16 @@ static int dsa_port_setup(struct dsa_port *dp)
dsa_port_disable(dp);
break;
case DSA_PORT_TYPE_CPU:
- err = dsa_port_link_register_of(dp);
- if (err)
- break;
- dsa_port_link_registered = true;
+ if (dp->dn) {
+ err = dsa_shared_port_link_register_of(dp);
+ if (err)
+ break;
+ dsa_port_link_registered = true;
+ } else {
+ dev_warn(ds->dev,
+ "skipping link registration for CPU port %d\n",
+ dp->index);
+ }
err = dsa_port_enable(dp, NULL);
if (err)
@@ -481,10 +487,16 @@ static int dsa_port_setup(struct dsa_port *dp)
break;
case DSA_PORT_TYPE_DSA:
- err = dsa_port_link_register_of(dp);
- if (err)
- break;
- dsa_port_link_registered = true;
+ if (dp->dn) {
+ err = dsa_shared_port_link_register_of(dp);
+ if (err)
+ break;
+ dsa_port_link_registered = true;
+ } else {
+ dev_warn(ds->dev,
+ "skipping link registration for DSA port %d\n",
+ dp->index);
+ }
err = dsa_port_enable(dp, NULL);
if (err)
@@ -505,7 +517,7 @@ static int dsa_port_setup(struct dsa_port *dp)
if (err && dsa_port_enabled)
dsa_port_disable(dp);
if (err && dsa_port_link_registered)
- dsa_port_link_unregister_of(dp);
+ dsa_shared_port_link_unregister_of(dp);
if (err) {
if (ds->ops->port_teardown)
ds->ops->port_teardown(ds, dp->index);
@@ -577,11 +589,13 @@ static void dsa_port_teardown(struct dsa_port *dp)
break;
case DSA_PORT_TYPE_CPU:
dsa_port_disable(dp);
- dsa_port_link_unregister_of(dp);
+ if (dp->dn)
+ dsa_shared_port_link_unregister_of(dp);
break;
case DSA_PORT_TYPE_DSA:
dsa_port_disable(dp);
- dsa_port_link_unregister_of(dp);
+ if (dp->dn)
+ dsa_shared_port_link_unregister_of(dp);
break;
case DSA_PORT_TYPE_USER:
if (dp->slave) {
@@ -1046,26 +1060,24 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
{
- struct dsa_port *dp;
+ struct dsa_port *cpu_dp;
int err = 0;
rtnl_lock();
- list_for_each_entry(dp, &dst->ports, list) {
- if (dsa_port_is_cpu(dp)) {
- struct net_device *master = dp->master;
- bool admin_up = (master->flags & IFF_UP) &&
- !qdisc_tx_is_noop(master);
+ dsa_tree_for_each_cpu_port(cpu_dp, dst) {
+ struct net_device *master = cpu_dp->master;
+ bool admin_up = (master->flags & IFF_UP) &&
+ !qdisc_tx_is_noop(master);
- err = dsa_master_setup(master, dp);
- if (err)
- break;
+ err = dsa_master_setup(master, cpu_dp);
+ if (err)
+ break;
- /* Replay master state event */
- dsa_tree_master_admin_state_change(dst, master, admin_up);
- dsa_tree_master_oper_state_change(dst, master,
- netif_oper_up(master));
- }
+ /* Replay master state event */
+ dsa_tree_master_admin_state_change(dst, master, admin_up);
+ dsa_tree_master_oper_state_change(dst, master,
+ netif_oper_up(master));
}
rtnl_unlock();
@@ -1075,22 +1087,20 @@ static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
static void dsa_tree_teardown_master(struct dsa_switch_tree *dst)
{
- struct dsa_port *dp;
+ struct dsa_port *cpu_dp;
rtnl_lock();
- list_for_each_entry(dp, &dst->ports, list) {
- if (dsa_port_is_cpu(dp)) {
- struct net_device *master = dp->master;
+ dsa_tree_for_each_cpu_port(cpu_dp, dst) {
+ struct net_device *master = cpu_dp->master;
- /* Synthesizing an "admin down" state is sufficient for
- * the switches to get a notification if the master is
- * currently up and running.
- */
- dsa_tree_master_admin_state_change(dst, master, false);
+ /* Synthesizing an "admin down" state is sufficient for
+ * the switches to get a notification if the master is
+ * currently up and running.
+ */
+ dsa_tree_master_admin_state_change(dst, master, false);
- dsa_master_teardown(master);
- }
+ dsa_master_teardown(master);
}
rtnl_unlock();
@@ -1238,7 +1248,6 @@ out_disconnect:
* they would have formed disjoint trees (different "dsa,member" values).
*/
int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
- struct net_device *master,
const struct dsa_device_ops *tag_ops,
const struct dsa_device_ops *old_tag_ops)
{
@@ -1254,14 +1263,11 @@ int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
* attempts to change the tagging protocol. If we ever lift the IFF_UP
* restriction, there needs to be another mutex which serializes this.
*/
- if (master->flags & IFF_UP)
- goto out_unlock;
-
list_for_each_entry(dp, &dst->ports, list) {
- if (!dsa_port_is_user(dp))
- continue;
+ if (dsa_port_is_cpu(dp) && (dp->master->flags & IFF_UP))
+ goto out_unlock;
- if (dp->slave->flags & IFF_UP)
+ if (dsa_port_is_user(dp) && (dp->slave->flags & IFF_UP))
goto out_unlock;
}
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index d9722e49864b..614fbba8fe39 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -285,8 +285,8 @@ int dsa_port_mrp_add_ring_role(const struct dsa_port *dp,
int dsa_port_mrp_del_ring_role(const struct dsa_port *dp,
const struct switchdev_obj_ring_role_mrp *mrp);
int dsa_port_phylink_create(struct dsa_port *dp);
-int dsa_port_link_register_of(struct dsa_port *dp);
-void dsa_port_link_unregister_of(struct dsa_port *dp);
+int dsa_shared_port_link_register_of(struct dsa_port *dp);
+void dsa_shared_port_link_unregister_of(struct dsa_port *dp);
int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr);
void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr);
int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast);
@@ -545,7 +545,6 @@ struct dsa_lag *dsa_tree_lag_find(struct dsa_switch_tree *dst,
int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v);
int dsa_broadcast(unsigned long e, void *v);
int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
- struct net_device *master,
const struct dsa_device_ops *tag_ops,
const struct dsa_device_ops *old_tag_ops);
void dsa_tree_master_admin_state_change(struct dsa_switch_tree *dst,
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 2851e44c4cf0..fb810edc8281 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -58,7 +58,7 @@ static void dsa_master_get_regs(struct net_device *dev,
}
cpu_info = (struct ethtool_drvinfo *)data;
- strlcpy(cpu_info->driver, "dsa", sizeof(cpu_info->driver));
+ strscpy(cpu_info->driver, "dsa", sizeof(cpu_info->driver));
data += sizeof(*cpu_info);
cpu_regs = (struct ethtool_regs *)data;
data += sizeof(*cpu_regs);
@@ -307,7 +307,7 @@ static ssize_t tagging_store(struct device *d, struct device_attribute *attr,
*/
goto out;
- err = dsa_tree_change_tag_proto(cpu_dp->ds->dst, dev, new_tag_ops,
+ err = dsa_tree_change_tag_proto(cpu_dp->ds->dst, new_tag_ops,
old_tag_ops);
if (err) {
/* On failure the old tagger is restored, so we don't need the
diff --git a/net/dsa/port.c b/net/dsa/port.c
index a8895ee3cd60..7afc35db0c29 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -1555,7 +1555,7 @@ int dsa_port_phylink_create(struct dsa_port *dp)
return 0;
}
-static int dsa_port_setup_phy_of(struct dsa_port *dp, bool enable)
+static int dsa_shared_port_setup_phy_of(struct dsa_port *dp, bool enable)
{
struct dsa_switch *ds = dp->ds;
struct phy_device *phydev;
@@ -1593,7 +1593,7 @@ err_put_dev:
return err;
}
-static int dsa_port_fixed_link_register_of(struct dsa_port *dp)
+static int dsa_shared_port_fixed_link_register_of(struct dsa_port *dp)
{
struct device_node *dn = dp->dn;
struct dsa_switch *ds = dp->ds;
@@ -1627,7 +1627,7 @@ static int dsa_port_fixed_link_register_of(struct dsa_port *dp)
return 0;
}
-static int dsa_port_phylink_register(struct dsa_port *dp)
+static int dsa_shared_port_phylink_register(struct dsa_port *dp)
{
struct dsa_switch *ds = dp->ds;
struct device_node *port_dn = dp->dn;
@@ -1653,22 +1653,184 @@ err_phy_connect:
return err;
}
-int dsa_port_link_register_of(struct dsa_port *dp)
+/* During the initial DSA driver migration to OF, port nodes were sometimes
+ * added to device trees with no indication of how they should operate from a
+ * link management perspective (phy-handle, fixed-link, etc). Additionally, the
+ * phy-mode may be absent. The interpretation of these port OF nodes depends on
+ * their type.
+ *
+ * User ports with no phy-handle or fixed-link are expected to connect to an
+ * internal PHY located on the ds->slave_mii_bus at an MDIO address equal to
+ * the port number. This description is still actively supported.
+ *
+ * Shared (CPU and DSA) ports with no phy-handle or fixed-link are expected to
+ * operate at the maximum speed that their phy-mode is capable of. If the
+ * phy-mode is absent, they are expected to operate using the phy-mode
+ * supported by the port that gives the highest link speed. It is unspecified
+ * if the port should use flow control or not, half duplex or full duplex, or
+ * if the phy-mode is a SERDES link, whether in-band autoneg is expected to be
+ * enabled or not.
+ *
+ * In the latter case of shared ports, omitting the link management description
+ * from the firmware node is deprecated and strongly discouraged. DSA uses
+ * phylink, which rejects the firmware nodes of these ports for lacking
+ * required properties.
+ *
+ * For switches in this table, DSA will skip enforcing validation and will
+ * later omit registering a phylink instance for the shared ports, if they lack
+ * a fixed-link, a phy-handle, or a managed = "in-band-status" property.
+ * It becomes the responsibility of the driver to ensure that these ports
+ * operate at the maximum speed (whatever this means) and will interoperate
+ * with the DSA master or other cascade port, since phylink methods will not be
+ * invoked for them.
+ *
+ * If you are considering expanding this table for newly introduced switches,
+ * think again. It is OK to remove switches from this table if there aren't DT
+ * blobs in circulation which rely on defaulting the shared ports.
+ */
+static const char * const dsa_switches_apply_workarounds[] = {
+#if IS_ENABLED(CONFIG_NET_DSA_XRS700X)
+ "arrow,xrs7003e",
+ "arrow,xrs7003f",
+ "arrow,xrs7004e",
+ "arrow,xrs7004f",
+#endif
+#if IS_ENABLED(CONFIG_B53)
+ "brcm,bcm5325",
+ "brcm,bcm53115",
+ "brcm,bcm53125",
+ "brcm,bcm53128",
+ "brcm,bcm5365",
+ "brcm,bcm5389",
+ "brcm,bcm5395",
+ "brcm,bcm5397",
+ "brcm,bcm5398",
+ "brcm,bcm53010-srab",
+ "brcm,bcm53011-srab",
+ "brcm,bcm53012-srab",
+ "brcm,bcm53018-srab",
+ "brcm,bcm53019-srab",
+ "brcm,bcm5301x-srab",
+ "brcm,bcm11360-srab",
+ "brcm,bcm58522-srab",
+ "brcm,bcm58525-srab",
+ "brcm,bcm58535-srab",
+ "brcm,bcm58622-srab",
+ "brcm,bcm58623-srab",
+ "brcm,bcm58625-srab",
+ "brcm,bcm88312-srab",
+ "brcm,cygnus-srab",
+ "brcm,nsp-srab",
+ "brcm,omega-srab",
+ "brcm,bcm3384-switch",
+ "brcm,bcm6328-switch",
+ "brcm,bcm6368-switch",
+ "brcm,bcm63xx-switch",
+#endif
+#if IS_ENABLED(CONFIG_NET_DSA_BCM_SF2)
+ "brcm,bcm7445-switch-v4.0",
+ "brcm,bcm7278-switch-v4.0",
+ "brcm,bcm7278-switch-v4.8",
+#endif
+#if IS_ENABLED(CONFIG_NET_DSA_LANTIQ_GSWIP)
+ "lantiq,xrx200-gswip",
+ "lantiq,xrx300-gswip",
+ "lantiq,xrx330-gswip",
+#endif
+#if IS_ENABLED(CONFIG_NET_DSA_MV88E6060)
+ "marvell,mv88e6060",
+#endif
+#if IS_ENABLED(CONFIG_NET_DSA_MV88E6XXX)
+ "marvell,mv88e6085",
+ "marvell,mv88e6190",
+ "marvell,mv88e6250",
+#endif
+#if IS_ENABLED(CONFIG_NET_DSA_MICROCHIP_KSZ_COMMON)
+ "microchip,ksz8765",
+ "microchip,ksz8794",
+ "microchip,ksz8795",
+ "microchip,ksz8863",
+ "microchip,ksz8873",
+ "microchip,ksz9477",
+ "microchip,ksz9897",
+ "microchip,ksz9893",
+ "microchip,ksz9563",
+ "microchip,ksz8563",
+ "microchip,ksz9567",
+#endif
+#if IS_ENABLED(CONFIG_NET_DSA_SMSC_LAN9303_MDIO)
+ "smsc,lan9303-mdio",
+#endif
+#if IS_ENABLED(CONFIG_NET_DSA_SMSC_LAN9303_I2C)
+ "smsc,lan9303-i2c",
+#endif
+ NULL,
+};
+
+static void dsa_shared_port_validate_of(struct dsa_port *dp,
+ bool *missing_phy_mode,
+ bool *missing_link_description)
{
+ struct device_node *dn = dp->dn, *phy_np;
struct dsa_switch *ds = dp->ds;
- struct device_node *phy_np;
+ phy_interface_t mode;
+
+ *missing_phy_mode = false;
+ *missing_link_description = false;
+
+ if (of_get_phy_mode(dn, &mode)) {
+ *missing_phy_mode = true;
+ dev_err(ds->dev,
+ "OF node %pOF of %s port %d lacks the required \"phy-mode\" property\n",
+ dn, dsa_port_is_cpu(dp) ? "CPU" : "DSA", dp->index);
+ }
+
+ /* Note: of_phy_is_fixed_link() also returns true for
+ * managed = "in-band-status"
+ */
+ if (of_phy_is_fixed_link(dn))
+ return;
+
+ phy_np = of_parse_phandle(dn, "phy-handle", 0);
+ if (phy_np) {
+ of_node_put(phy_np);
+ return;
+ }
+
+ *missing_link_description = true;
+
+ dev_err(ds->dev,
+ "OF node %pOF of %s port %d lacks the required \"phy-handle\", \"fixed-link\" or \"managed\" properties\n",
+ dn, dsa_port_is_cpu(dp) ? "CPU" : "DSA", dp->index);
+}
+
+int dsa_shared_port_link_register_of(struct dsa_port *dp)
+{
+ struct dsa_switch *ds = dp->ds;
+ bool missing_link_description;
+ bool missing_phy_mode;
int port = dp->index;
+ dsa_shared_port_validate_of(dp, &missing_phy_mode,
+ &missing_link_description);
+
+ if ((missing_phy_mode || missing_link_description) &&
+ !of_device_compatible_match(ds->dev->of_node,
+ dsa_switches_apply_workarounds))
+ return -EINVAL;
+
if (!ds->ops->adjust_link) {
- phy_np = of_parse_phandle(dp->dn, "phy-handle", 0);
- if (of_phy_is_fixed_link(dp->dn) || phy_np) {
+ if (missing_link_description) {
+ dev_warn(ds->dev,
+ "Skipping phylink registration for %s port %d\n",
+ dsa_port_is_cpu(dp) ? "CPU" : "DSA", dp->index);
+ } else {
if (ds->ops->phylink_mac_link_down)
ds->ops->phylink_mac_link_down(ds, port,
MLO_AN_FIXED, PHY_INTERFACE_MODE_NA);
- of_node_put(phy_np);
- return dsa_port_phylink_register(dp);
+
+ return dsa_shared_port_phylink_register(dp);
}
- of_node_put(phy_np);
return 0;
}
@@ -1676,12 +1838,12 @@ int dsa_port_link_register_of(struct dsa_port *dp)
"Using legacy PHYLIB callbacks. Please migrate to PHYLINK!\n");
if (of_phy_is_fixed_link(dp->dn))
- return dsa_port_fixed_link_register_of(dp);
+ return dsa_shared_port_fixed_link_register_of(dp);
else
- return dsa_port_setup_phy_of(dp, true);
+ return dsa_shared_port_setup_phy_of(dp, true);
}
-void dsa_port_link_unregister_of(struct dsa_port *dp)
+void dsa_shared_port_link_unregister_of(struct dsa_port *dp)
{
struct dsa_switch *ds = dp->ds;
@@ -1697,7 +1859,7 @@ void dsa_port_link_unregister_of(struct dsa_port *dp)
if (of_phy_is_fixed_link(dp->dn))
of_phy_deregister_fixed_link(dp->dn);
else
- dsa_port_setup_phy_of(dp, false);
+ dsa_shared_port_setup_phy_of(dp, false);
}
int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr)
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 1291c2431d44..345106b1ed78 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -826,9 +826,9 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
static void dsa_slave_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *drvinfo)
{
- strlcpy(drvinfo->driver, "dsa", sizeof(drvinfo->driver));
- strlcpy(drvinfo->fw_version, "N/A", sizeof(drvinfo->fw_version));
- strlcpy(drvinfo->bus_info, "platform", sizeof(drvinfo->bus_info));
+ strscpy(drvinfo->driver, "dsa", sizeof(drvinfo->driver));
+ strscpy(drvinfo->fw_version, "N/A", sizeof(drvinfo->fw_version));
+ strscpy(drvinfo->bus_info, "platform", sizeof(drvinfo->bus_info));
}
static int dsa_slave_get_regs_len(struct net_device *dev)
@@ -2476,6 +2476,9 @@ static int dsa_slave_changeupper(struct net_device *dev,
struct netlink_ext_ack *extack;
int err = NOTIFY_DONE;
+ if (!dsa_slave_dev_check(dev))
+ return err;
+
extack = netdev_notifier_info_to_extack(&info->info);
if (netif_is_bridge_master(info->upper_dev)) {
@@ -2531,6 +2534,9 @@ static int dsa_slave_prechangeupper(struct net_device *dev,
{
struct dsa_port *dp = dsa_slave_to_port(dev);
+ if (!dsa_slave_dev_check(dev))
+ return NOTIFY_DONE;
+
if (netif_is_bridge_master(info->upper_dev) && !info->linking)
dsa_port_pre_bridge_leave(dp, info->upper_dev);
else if (netif_is_lag_master(info->upper_dev) && !info->linking)
@@ -2551,6 +2557,9 @@ dsa_slave_lag_changeupper(struct net_device *dev,
int err = NOTIFY_DONE;
struct dsa_port *dp;
+ if (!netif_is_lag_master(dev))
+ return err;
+
netdev_for_each_lower_dev(dev, lower, iter) {
if (!dsa_slave_dev_check(lower))
continue;
@@ -2580,6 +2589,9 @@ dsa_slave_lag_prechangeupper(struct net_device *dev,
int err = NOTIFY_DONE;
struct dsa_port *dp;
+ if (!netif_is_lag_master(dev))
+ return err;
+
netdev_for_each_lower_dev(dev, lower, iter) {
if (!dsa_slave_dev_check(lower))
continue;
@@ -2687,6 +2699,75 @@ dsa_slave_prechangeupper_sanity_check(struct net_device *dev,
return NOTIFY_DONE;
}
+static int
+dsa_master_prechangeupper_sanity_check(struct net_device *master,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct netlink_ext_ack *extack;
+
+ if (!netdev_uses_dsa(master))
+ return NOTIFY_DONE;
+
+ if (!info->linking)
+ return NOTIFY_DONE;
+
+ /* Allow DSA switch uppers */
+ if (dsa_slave_dev_check(info->upper_dev))
+ return NOTIFY_DONE;
+
+ /* Allow bridge uppers of DSA masters, subject to further
+ * restrictions in dsa_bridge_prechangelower_sanity_check()
+ */
+ if (netif_is_bridge_master(info->upper_dev))
+ return NOTIFY_DONE;
+
+ extack = netdev_notifier_info_to_extack(&info->info);
+
+ NL_SET_ERR_MSG_MOD(extack,
+ "DSA master cannot join unknown upper interfaces");
+ return notifier_from_errno(-EBUSY);
+}
+
+/* Don't allow bridging of DSA masters, since the bridge layer rx_handler
+ * prevents the DSA fake ethertype handler to be invoked, so we don't get the
+ * chance to strip off and parse the DSA switch tag protocol header (the bridge
+ * layer just returns RX_HANDLER_CONSUMED, stopping RX processing for these
+ * frames).
+ * The only case where that would not be an issue is when bridging can already
+ * be offloaded, such as when the DSA master is itself a DSA or plain switchdev
+ * port, and is bridged only with other ports from the same hardware device.
+ */
+static int
+dsa_bridge_prechangelower_sanity_check(struct net_device *new_lower,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct net_device *br = info->upper_dev;
+ struct netlink_ext_ack *extack;
+ struct net_device *lower;
+ struct list_head *iter;
+
+ if (!netif_is_bridge_master(br))
+ return NOTIFY_DONE;
+
+ if (!info->linking)
+ return NOTIFY_DONE;
+
+ extack = netdev_notifier_info_to_extack(&info->info);
+
+ netdev_for_each_lower_dev(br, lower, iter) {
+ if (!netdev_uses_dsa(new_lower) && !netdev_uses_dsa(lower))
+ continue;
+
+ if (!netdev_port_same_parent_id(lower, new_lower)) {
+ NL_SET_ERR_MSG(extack,
+ "Cannot do software bridging with a DSA master");
+ return notifier_from_errno(-EINVAL);
+ }
+ }
+
+ return NOTIFY_DONE;
+}
+
static int dsa_slave_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
@@ -2698,25 +2779,40 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
int err;
err = dsa_slave_prechangeupper_sanity_check(dev, info);
- if (err != NOTIFY_DONE)
+ if (notifier_to_errno(err))
+ return err;
+
+ err = dsa_master_prechangeupper_sanity_check(dev, info);
+ if (notifier_to_errno(err))
return err;
- if (dsa_slave_dev_check(dev))
- return dsa_slave_prechangeupper(dev, ptr);
+ err = dsa_bridge_prechangelower_sanity_check(dev, info);
+ if (notifier_to_errno(err))
+ return err;
- if (netif_is_lag_master(dev))
- return dsa_slave_lag_prechangeupper(dev, ptr);
+ err = dsa_slave_prechangeupper(dev, ptr);
+ if (notifier_to_errno(err))
+ return err;
+
+ err = dsa_slave_lag_prechangeupper(dev, ptr);
+ if (notifier_to_errno(err))
+ return err;
break;
}
- case NETDEV_CHANGEUPPER:
- if (dsa_slave_dev_check(dev))
- return dsa_slave_changeupper(dev, ptr);
+ case NETDEV_CHANGEUPPER: {
+ int err;
- if (netif_is_lag_master(dev))
- return dsa_slave_lag_changeupper(dev, ptr);
+ err = dsa_slave_changeupper(dev, ptr);
+ if (notifier_to_errno(err))
+ return err;
+
+ err = dsa_slave_lag_changeupper(dev, ptr);
+ if (notifier_to_errno(err))
+ return err;
break;
+ }
case NETDEV_CHANGELOWERSTATE: {
struct netdev_notifier_changelowerstate_info *info = ptr;
struct dsa_port *dp;
@@ -2777,6 +2873,9 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
if (!dsa_port_is_user(dp))
continue;
+ if (dp->cpu_dp != cpu_dp)
+ continue;
+
list_add(&dp->slave->close_list, &close_list);
}
diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
index 01a427800797..b5f80bc45ceb 100644
--- a/net/dsa/tag_8021q.c
+++ b/net/dsa/tag_8021q.c
@@ -2,9 +2,7 @@
/* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
*
* This module is not a complete tagger implementation. It only provides
- * primitives for taggers that rely on 802.1Q VLAN tags to use. The
- * dsa_8021q_netdev_ops is registered for API compliance and not used
- * directly by callers.
+ * primitives for taggers that rely on 802.1Q VLAN tags to use.
*/
#include <linux/if_vlan.h>
#include <linux/dsa/8021q.h>
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 62b89d6f54fd..e02daa74e833 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -414,12 +414,9 @@ struct sk_buff *eth_gro_receive(struct list_head *head, struct sk_buff *skb)
off_eth = skb_gro_offset(skb);
hlen = off_eth + sizeof(*eh);
- eh = skb_gro_header_fast(skb, off_eth);
- if (skb_gro_header_hard(skb, hlen)) {
- eh = skb_gro_header_slow(skb, hlen, off_eth);
- if (unlikely(!eh))
- goto out;
- }
+ eh = skb_gro_header(skb, hlen, off_eth);
+ if (unlikely(!eh))
+ goto out;
flush = 0;
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 6a7308de192d..9298eb3251cb 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -714,16 +714,16 @@ ethtool_get_drvinfo(struct net_device *dev, struct ethtool_devlink_compat *rsp)
const struct ethtool_ops *ops = dev->ethtool_ops;
rsp->info.cmd = ETHTOOL_GDRVINFO;
- strlcpy(rsp->info.version, UTS_RELEASE, sizeof(rsp->info.version));
+ strscpy(rsp->info.version, UTS_RELEASE, sizeof(rsp->info.version));
if (ops->get_drvinfo) {
ops->get_drvinfo(dev, &rsp->info);
} else if (dev->dev.parent && dev->dev.parent->driver) {
- strlcpy(rsp->info.bus_info, dev_name(dev->dev.parent),
+ strscpy(rsp->info.bus_info, dev_name(dev->dev.parent),
sizeof(rsp->info.bus_info));
- strlcpy(rsp->info.driver, dev->dev.parent->driver->name,
+ strscpy(rsp->info.driver, dev->dev.parent->driver->name,
sizeof(rsp->info.driver));
} else if (dev->rtnl_link_ops) {
- strlcpy(rsp->info.driver, dev->rtnl_link_ops->kind,
+ strscpy(rsp->info.driver, dev->rtnl_link_ops->kind,
sizeof(rsp->info.driver));
} else {
return -EOPNOTSUPP;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 3ca0cc467886..d3ab1ae32ef5 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1219,6 +1219,7 @@ EXPORT_SYMBOL(inet_unregister_protosw);
static int inet_sk_reselect_saddr(struct sock *sk)
{
+ struct inet_bind_hashbucket *prev_addr_hashbucket;
struct inet_sock *inet = inet_sk(sk);
__be32 old_saddr = inet->inet_saddr;
__be32 daddr = inet->inet_daddr;
@@ -1226,6 +1227,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
struct rtable *rt;
__be32 new_saddr;
struct ip_options_rcu *inet_opt;
+ int err;
inet_opt = rcu_dereference_protected(inet->inet_opt,
lockdep_sock_is_held(sk));
@@ -1240,20 +1242,34 @@ static int inet_sk_reselect_saddr(struct sock *sk)
if (IS_ERR(rt))
return PTR_ERR(rt);
- sk_setup_caps(sk, &rt->dst);
-
new_saddr = fl4->saddr;
- if (new_saddr == old_saddr)
+ if (new_saddr == old_saddr) {
+ sk_setup_caps(sk, &rt->dst);
return 0;
+ }
+
+ prev_addr_hashbucket =
+ inet_bhashfn_portaddr(sk->sk_prot->h.hashinfo, sk,
+ sock_net(sk), inet->inet_num);
+
+ inet->inet_saddr = inet->inet_rcv_saddr = new_saddr;
+
+ err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk);
+ if (err) {
+ inet->inet_saddr = old_saddr;
+ inet->inet_rcv_saddr = old_saddr;
+ ip_rt_put(rt);
+ return err;
+ }
+
+ sk_setup_caps(sk, &rt->dst);
if (READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) > 1) {
pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n",
__func__, &old_saddr, &new_saddr);
}
- inet->inet_saddr = inet->inet_rcv_saddr = new_saddr;
-
/*
* XXX The only one ugly spot where we need to
* XXX really change the sockets identity after
@@ -1448,12 +1464,9 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
off = skb_gro_offset(skb);
hlen = off + sizeof(*iph);
- iph = skb_gro_header_fast(skb, off);
- if (skb_gro_header_hard(skb, hlen)) {
- iph = skb_gro_header_slow(skb, hlen, off);
- if (unlikely(!iph))
- goto out;
- }
+ iph = skb_gro_header(skb, hlen, off);
+ if (unlikely(!iph))
+ goto out;
proto = iph->protocol;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 87c7e3fc5197..4f7237661afb 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1129,7 +1129,7 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev)
r->arp_flags = arp_state_to_flags(neigh);
read_unlock_bh(&neigh->lock);
r->arp_ha.sa_family = dev->type;
- strlcpy(r->arp_dev, dev->name, sizeof(r->arp_dev));
+ strscpy(r->arp_dev, dev->name, sizeof(r->arp_dev));
err = 0;
}
neigh_release(neigh);
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 025a33c1b04d..cb5bfb77944c 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -323,12 +323,9 @@ static struct sk_buff *gue_gro_receive(struct sock *sk,
off = skb_gro_offset(skb);
len = off + sizeof(*guehdr);
- guehdr = skb_gro_header_fast(skb, off);
- if (skb_gro_header_hard(skb, len)) {
- guehdr = skb_gro_header_slow(skb, len, off);
- if (unlikely(!guehdr))
- goto out;
- }
+ guehdr = skb_gro_header(skb, len, off);
+ if (unlikely(!guehdr))
+ goto out;
switch (guehdr->version) {
case 0:
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 07073fa35205..2b9cb5398335 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -137,12 +137,9 @@ static struct sk_buff *gre_gro_receive(struct list_head *head,
off = skb_gro_offset(skb);
hlen = off + sizeof(*greh);
- greh = skb_gro_header_fast(skb, off);
- if (skb_gro_header_hard(skb, hlen)) {
- greh = skb_gro_header_slow(skb, hlen, off);
- if (unlikely(!greh))
- goto out;
- }
+ greh = skb_gro_header(skb, hlen, off);
+ if (unlikely(!greh))
+ goto out;
/* Only support version 0 and K (key), C (csum) flags. Note that
* although the support for the S (seq#) flag can be added easily
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index eb31c7158b39..f0038043b661 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -130,14 +130,75 @@ void inet_get_local_port_range(struct net *net, int *low, int *high)
}
EXPORT_SYMBOL(inet_get_local_port_range);
+static bool inet_use_bhash2_on_bind(const struct sock *sk)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6) {
+ int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
+
+ return addr_type != IPV6_ADDR_ANY &&
+ addr_type != IPV6_ADDR_MAPPED;
+ }
+#endif
+ return sk->sk_rcv_saddr != htonl(INADDR_ANY);
+}
+
+static bool inet_bind_conflict(const struct sock *sk, struct sock *sk2,
+ kuid_t sk_uid, bool relax,
+ bool reuseport_cb_ok, bool reuseport_ok)
+{
+ int bound_dev_if2;
+
+ if (sk == sk2)
+ return false;
+
+ bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if);
+
+ if (!sk->sk_bound_dev_if || !bound_dev_if2 ||
+ sk->sk_bound_dev_if == bound_dev_if2) {
+ if (sk->sk_reuse && sk2->sk_reuse &&
+ sk2->sk_state != TCP_LISTEN) {
+ if (!relax || (!reuseport_ok && sk->sk_reuseport &&
+ sk2->sk_reuseport && reuseport_cb_ok &&
+ (sk2->sk_state == TCP_TIME_WAIT ||
+ uid_eq(sk_uid, sock_i_uid(sk2)))))
+ return true;
+ } else if (!reuseport_ok || !sk->sk_reuseport ||
+ !sk2->sk_reuseport || !reuseport_cb_ok ||
+ (sk2->sk_state != TCP_TIME_WAIT &&
+ !uid_eq(sk_uid, sock_i_uid(sk2)))) {
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool inet_bhash2_conflict(const struct sock *sk,
+ const struct inet_bind2_bucket *tb2,
+ kuid_t sk_uid,
+ bool relax, bool reuseport_cb_ok,
+ bool reuseport_ok)
+{
+ struct sock *sk2;
+
+ sk_for_each_bound_bhash2(sk2, &tb2->owners) {
+ if (sk->sk_family == AF_INET && ipv6_only_sock(sk2))
+ continue;
+
+ if (inet_bind_conflict(sk, sk2, sk_uid, relax,
+ reuseport_cb_ok, reuseport_ok))
+ return true;
+ }
+ return false;
+}
+
+/* This should be called only when the tb and tb2 hashbuckets' locks are held */
static int inet_csk_bind_conflict(const struct sock *sk,
const struct inet_bind_bucket *tb,
+ const struct inet_bind2_bucket *tb2, /* may be null */
bool relax, bool reuseport_ok)
{
- struct sock *sk2;
bool reuseport_cb_ok;
- bool reuse = sk->sk_reuse;
- bool reuseport = !!sk->sk_reuseport;
struct sock_reuseport *reuseport_cb;
kuid_t uid = sock_i_uid((struct sock *)sk);
@@ -150,55 +211,87 @@ static int inet_csk_bind_conflict(const struct sock *sk,
/*
* Unlike other sk lookup places we do not check
* for sk_net here, since _all_ the socks listed
- * in tb->owners list belong to the same net - the
- * one this bucket belongs to.
+ * in tb->owners and tb2->owners list belong
+ * to the same net - the one this bucket belongs to.
*/
- sk_for_each_bound(sk2, &tb->owners) {
- int bound_dev_if2;
+ if (!inet_use_bhash2_on_bind(sk)) {
+ struct sock *sk2;
- if (sk == sk2)
- continue;
- bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if);
- if ((!sk->sk_bound_dev_if ||
- !bound_dev_if2 ||
- sk->sk_bound_dev_if == bound_dev_if2)) {
- if (reuse && sk2->sk_reuse &&
- sk2->sk_state != TCP_LISTEN) {
- if ((!relax ||
- (!reuseport_ok &&
- reuseport && sk2->sk_reuseport &&
- reuseport_cb_ok &&
- (sk2->sk_state == TCP_TIME_WAIT ||
- uid_eq(uid, sock_i_uid(sk2))))) &&
- inet_rcv_saddr_equal(sk, sk2, true))
- break;
- } else if (!reuseport_ok ||
- !reuseport || !sk2->sk_reuseport ||
- !reuseport_cb_ok ||
- (sk2->sk_state != TCP_TIME_WAIT &&
- !uid_eq(uid, sock_i_uid(sk2)))) {
- if (inet_rcv_saddr_equal(sk, sk2, true))
- break;
- }
- }
+ sk_for_each_bound(sk2, &tb->owners)
+ if (inet_bind_conflict(sk, sk2, uid, relax,
+ reuseport_cb_ok, reuseport_ok) &&
+ inet_rcv_saddr_equal(sk, sk2, true))
+ return true;
+
+ return false;
+ }
+
+ /* Conflicts with an existing IPV6_ADDR_ANY (if ipv6) or INADDR_ANY (if
+ * ipv4) should have been checked already. We need to do these two
+ * checks separately because their spinlocks have to be acquired/released
+ * independently of each other, to prevent possible deadlocks
+ */
+ return tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,
+ reuseport_ok);
+}
+
+/* Determine if there is a bind conflict with an existing IPV6_ADDR_ANY (if ipv6) or
+ * INADDR_ANY (if ipv4) socket.
+ *
+ * Caller must hold bhash hashbucket lock with local bh disabled, to protect
+ * against concurrent binds on the port for addr any
+ */
+static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l3mdev,
+ bool relax, bool reuseport_ok)
+{
+ kuid_t uid = sock_i_uid((struct sock *)sk);
+ const struct net *net = sock_net(sk);
+ struct sock_reuseport *reuseport_cb;
+ struct inet_bind_hashbucket *head2;
+ struct inet_bind2_bucket *tb2;
+ bool reuseport_cb_ok;
+
+ rcu_read_lock();
+ reuseport_cb = rcu_dereference(sk->sk_reuseport_cb);
+ /* paired with WRITE_ONCE() in __reuseport_(add|detach)_closed_sock */
+ reuseport_cb_ok = !reuseport_cb || READ_ONCE(reuseport_cb->num_closed_socks);
+ rcu_read_unlock();
+
+ head2 = inet_bhash2_addr_any_hashbucket(sk, net, port);
+
+ spin_lock(&head2->lock);
+
+ inet_bind_bucket_for_each(tb2, &head2->chain)
+ if (inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk))
+ break;
+
+ if (tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,
+ reuseport_ok)) {
+ spin_unlock(&head2->lock);
+ return true;
}
- return sk2 != NULL;
+
+ spin_unlock(&head2->lock);
+ return false;
}
/*
* Find an open port number for the socket. Returns with the
- * inet_bind_hashbucket lock held.
+ * inet_bind_hashbucket locks held if successful.
*/
static struct inet_bind_hashbucket *
-inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int *port_ret)
+inet_csk_find_open_port(const struct sock *sk, struct inet_bind_bucket **tb_ret,
+ struct inet_bind2_bucket **tb2_ret,
+ struct inet_bind_hashbucket **head2_ret, int *port_ret)
{
struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
int port = 0;
- struct inet_bind_hashbucket *head;
+ struct inet_bind_hashbucket *head, *head2;
struct net *net = sock_net(sk);
bool relax = false;
int i, low, high, attempt_half;
+ struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
u32 remaining, offset;
int l3mdev;
@@ -239,11 +332,20 @@ other_parity_scan:
head = &hinfo->bhash[inet_bhashfn(net, port,
hinfo->bhash_size)];
spin_lock_bh(&head->lock);
+ if (inet_use_bhash2_on_bind(sk)) {
+ if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, relax, false))
+ goto next_port;
+ }
+
+ head2 = inet_bhashfn_portaddr(hinfo, sk, net, port);
+ spin_lock(&head2->lock);
+ tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk);
inet_bind_bucket_for_each(tb, &head->chain)
- if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
- tb->port == port) {
- if (!inet_csk_bind_conflict(sk, tb, relax, false))
+ if (inet_bind_bucket_match(tb, net, port, l3mdev)) {
+ if (!inet_csk_bind_conflict(sk, tb, tb2,
+ relax, false))
goto success;
+ spin_unlock(&head2->lock);
goto next_port;
}
tb = NULL;
@@ -272,6 +374,8 @@ next_port:
success:
*port_ret = port;
*tb_ret = tb;
+ *tb2_ret = tb2;
+ *head2_ret = head2;
return head;
}
@@ -368,53 +472,95 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
int ret = 1, port = snum;
- struct inet_bind_hashbucket *head;
struct net *net = sock_net(sk);
+ bool found_port = false, check_bind_conflict = true;
+ bool bhash_created = false, bhash2_created = false;
+ struct inet_bind_hashbucket *head, *head2;
+ struct inet_bind2_bucket *tb2 = NULL;
struct inet_bind_bucket *tb = NULL;
+ bool head2_lock_acquired = false;
int l3mdev;
l3mdev = inet_sk_bound_l3mdev(sk);
if (!port) {
- head = inet_csk_find_open_port(sk, &tb, &port);
+ head = inet_csk_find_open_port(sk, &tb, &tb2, &head2, &port);
if (!head)
return ret;
+
+ head2_lock_acquired = true;
+
+ if (tb && tb2)
+ goto success;
+ found_port = true;
+ } else {
+ head = &hinfo->bhash[inet_bhashfn(net, port,
+ hinfo->bhash_size)];
+ spin_lock_bh(&head->lock);
+ inet_bind_bucket_for_each(tb, &head->chain)
+ if (inet_bind_bucket_match(tb, net, port, l3mdev))
+ break;
+ }
+
+ if (!tb) {
+ tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, net,
+ head, port, l3mdev);
if (!tb)
- goto tb_not_found;
- goto success;
+ goto fail_unlock;
+ bhash_created = true;
}
- head = &hinfo->bhash[inet_bhashfn(net, port,
- hinfo->bhash_size)];
- spin_lock_bh(&head->lock);
- inet_bind_bucket_for_each(tb, &head->chain)
- if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
- tb->port == port)
- goto tb_found;
-tb_not_found:
- tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
- net, head, port, l3mdev);
- if (!tb)
- goto fail_unlock;
-tb_found:
- if (!hlist_empty(&tb->owners)) {
- if (sk->sk_reuse == SK_FORCE_REUSE)
- goto success;
- if ((tb->fastreuse > 0 && reuse) ||
- sk_reuseport_match(tb, sk))
- goto success;
- if (inet_csk_bind_conflict(sk, tb, true, true))
+ if (!found_port) {
+ if (!hlist_empty(&tb->owners)) {
+ if (sk->sk_reuse == SK_FORCE_REUSE ||
+ (tb->fastreuse > 0 && reuse) ||
+ sk_reuseport_match(tb, sk))
+ check_bind_conflict = false;
+ }
+
+ if (check_bind_conflict && inet_use_bhash2_on_bind(sk)) {
+ if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, true, true))
+ goto fail_unlock;
+ }
+
+ head2 = inet_bhashfn_portaddr(hinfo, sk, net, port);
+ spin_lock(&head2->lock);
+ head2_lock_acquired = true;
+ tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk);
+ }
+
+ if (!tb2) {
+ tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep,
+ net, head2, port, l3mdev, sk);
+ if (!tb2)
goto fail_unlock;
+ bhash2_created = true;
}
+
+ if (!found_port && check_bind_conflict) {
+ if (inet_csk_bind_conflict(sk, tb, tb2, true, true))
+ goto fail_unlock;
+ }
+
success:
inet_csk_update_fastreuse(tb, sk);
if (!inet_csk(sk)->icsk_bind_hash)
- inet_bind_hash(sk, tb, port);
+ inet_bind_hash(sk, tb, tb2, port);
WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);
+ WARN_ON(inet_csk(sk)->icsk_bind2_hash != tb2);
ret = 0;
fail_unlock:
+ if (ret) {
+ if (bhash_created)
+ inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);
+ if (bhash2_created)
+ inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep,
+ tb2);
+ }
+ if (head2_lock_acquired)
+ spin_unlock(&head2->lock);
spin_unlock_bh(&head->lock);
return ret;
}
@@ -962,6 +1108,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
inet_sk_set_state(newsk, TCP_SYN_RECV);
newicsk->icsk_bind_hash = NULL;
+ newicsk->icsk_bind2_hash = NULL;
inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index b9d995b5ce24..60d77e234a68 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -92,12 +92,75 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket
}
}
+bool inet_bind_bucket_match(const struct inet_bind_bucket *tb, const struct net *net,
+ unsigned short port, int l3mdev)
+{
+ return net_eq(ib_net(tb), net) && tb->port == port &&
+ tb->l3mdev == l3mdev;
+}
+
+static void inet_bind2_bucket_init(struct inet_bind2_bucket *tb,
+ struct net *net,
+ struct inet_bind_hashbucket *head,
+ unsigned short port, int l3mdev,
+ const struct sock *sk)
+{
+ write_pnet(&tb->ib_net, net);
+ tb->l3mdev = l3mdev;
+ tb->port = port;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6)
+ tb->v6_rcv_saddr = sk->sk_v6_rcv_saddr;
+ else
+#endif
+ tb->rcv_saddr = sk->sk_rcv_saddr;
+ INIT_HLIST_HEAD(&tb->owners);
+ hlist_add_head(&tb->node, &head->chain);
+}
+
+struct inet_bind2_bucket *inet_bind2_bucket_create(struct kmem_cache *cachep,
+ struct net *net,
+ struct inet_bind_hashbucket *head,
+ unsigned short port,
+ int l3mdev,
+ const struct sock *sk)
+{
+ struct inet_bind2_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
+
+ if (tb)
+ inet_bind2_bucket_init(tb, net, head, port, l3mdev, sk);
+
+ return tb;
+}
+
+/* Caller must hold hashbucket lock for this tb with local BH disabled */
+void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb)
+{
+ if (hlist_empty(&tb->owners)) {
+ __hlist_del(&tb->node);
+ kmem_cache_free(cachep, tb);
+ }
+}
+
+static bool inet_bind2_bucket_addr_match(const struct inet_bind2_bucket *tb2,
+ const struct sock *sk)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6)
+ return ipv6_addr_equal(&tb2->v6_rcv_saddr,
+ &sk->sk_v6_rcv_saddr);
+#endif
+ return tb2->rcv_saddr == sk->sk_rcv_saddr;
+}
+
void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
- const unsigned short snum)
+ struct inet_bind2_bucket *tb2, unsigned short port)
{
- inet_sk(sk)->inet_num = snum;
+ inet_sk(sk)->inet_num = port;
sk_add_bind_node(sk, &tb->owners);
inet_csk(sk)->icsk_bind_hash = tb;
+ sk_add_bind2_node(sk, &tb2->owners);
+ inet_csk(sk)->icsk_bind2_hash = tb2;
}
/*
@@ -109,6 +172,9 @@ static void __inet_put_port(struct sock *sk)
const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num,
hashinfo->bhash_size);
struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
+ struct inet_bind_hashbucket *head2 =
+ inet_bhashfn_portaddr(hashinfo, sk, sock_net(sk),
+ inet_sk(sk)->inet_num);
struct inet_bind_bucket *tb;
spin_lock(&head->lock);
@@ -117,6 +183,17 @@ static void __inet_put_port(struct sock *sk)
inet_csk(sk)->icsk_bind_hash = NULL;
inet_sk(sk)->inet_num = 0;
inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
+
+ spin_lock(&head2->lock);
+ if (inet_csk(sk)->icsk_bind2_hash) {
+ struct inet_bind2_bucket *tb2 = inet_csk(sk)->icsk_bind2_hash;
+
+ __sk_del_bind2_node(sk);
+ inet_csk(sk)->icsk_bind2_hash = NULL;
+ inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2);
+ }
+ spin_unlock(&head2->lock);
+
spin_unlock(&head->lock);
}
@@ -135,12 +212,21 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
const int bhash = inet_bhashfn(sock_net(sk), port,
table->bhash_size);
struct inet_bind_hashbucket *head = &table->bhash[bhash];
+ struct inet_bind_hashbucket *head2 =
+ inet_bhashfn_portaddr(table, child, sock_net(sk), port);
+ bool created_inet_bind_bucket = false;
+ bool update_fastreuse = false;
+ struct net *net = sock_net(sk);
+ struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
int l3mdev;
spin_lock(&head->lock);
+ spin_lock(&head2->lock);
tb = inet_csk(sk)->icsk_bind_hash;
- if (unlikely(!tb)) {
+ tb2 = inet_csk(sk)->icsk_bind2_hash;
+ if (unlikely(!tb || !tb2)) {
+ spin_unlock(&head2->lock);
spin_unlock(&head->lock);
return -ENOENT;
}
@@ -153,25 +239,49 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
* as that of the child socket. We have to look up or
* create a new bind bucket for the child here. */
inet_bind_bucket_for_each(tb, &head->chain) {
- if (net_eq(ib_net(tb), sock_net(sk)) &&
- tb->l3mdev == l3mdev && tb->port == port)
+ if (inet_bind_bucket_match(tb, net, port, l3mdev))
break;
}
if (!tb) {
tb = inet_bind_bucket_create(table->bind_bucket_cachep,
- sock_net(sk), head, port,
- l3mdev);
+ net, head, port, l3mdev);
if (!tb) {
+ spin_unlock(&head2->lock);
spin_unlock(&head->lock);
return -ENOMEM;
}
+ created_inet_bind_bucket = true;
+ }
+ update_fastreuse = true;
+
+ goto bhash2_find;
+ } else if (!inet_bind2_bucket_addr_match(tb2, child)) {
+ l3mdev = inet_sk_bound_l3mdev(sk);
+
+bhash2_find:
+ tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, child);
+ if (!tb2) {
+ tb2 = inet_bind2_bucket_create(table->bind2_bucket_cachep,
+ net, head2, port,
+ l3mdev, child);
+ if (!tb2)
+ goto error;
}
- inet_csk_update_fastreuse(tb, child);
}
- inet_bind_hash(child, tb, port);
+ if (update_fastreuse)
+ inet_csk_update_fastreuse(tb, child);
+ inet_bind_hash(child, tb, tb2, port);
+ spin_unlock(&head2->lock);
spin_unlock(&head->lock);
return 0;
+
+error:
+ if (created_inet_bind_bucket)
+ inet_bind_bucket_destroy(table->bind_bucket_cachep, tb);
+ spin_unlock(&head2->lock);
+ spin_unlock(&head->lock);
+ return -ENOMEM;
}
EXPORT_SYMBOL_GPL(__inet_inherit_port);
@@ -675,6 +785,112 @@ void inet_unhash(struct sock *sk)
}
EXPORT_SYMBOL_GPL(inet_unhash);
+static bool inet_bind2_bucket_match(const struct inet_bind2_bucket *tb,
+ const struct net *net, unsigned short port,
+ int l3mdev, const struct sock *sk)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6)
+ return net_eq(ib2_net(tb), net) && tb->port == port &&
+ tb->l3mdev == l3mdev &&
+ ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr);
+ else
+#endif
+ return net_eq(ib2_net(tb), net) && tb->port == port &&
+ tb->l3mdev == l3mdev && tb->rcv_saddr == sk->sk_rcv_saddr;
+}
+
+bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const struct net *net,
+ unsigned short port, int l3mdev, const struct sock *sk)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ struct in6_addr addr_any = {};
+
+ if (sk->sk_family == AF_INET6)
+ return net_eq(ib2_net(tb), net) && tb->port == port &&
+ tb->l3mdev == l3mdev &&
+ ipv6_addr_equal(&tb->v6_rcv_saddr, &addr_any);
+ else
+#endif
+ return net_eq(ib2_net(tb), net) && tb->port == port &&
+ tb->l3mdev == l3mdev && tb->rcv_saddr == 0;
+}
+
+/* The socket's bhash2 hashbucket spinlock must be held when this is called */
+struct inet_bind2_bucket *
+inet_bind2_bucket_find(const struct inet_bind_hashbucket *head, const struct net *net,
+ unsigned short port, int l3mdev, const struct sock *sk)
+{
+ struct inet_bind2_bucket *bhash2 = NULL;
+
+ inet_bind_bucket_for_each(bhash2, &head->chain)
+ if (inet_bind2_bucket_match(bhash2, net, port, l3mdev, sk))
+ break;
+
+ return bhash2;
+}
+
+struct inet_bind_hashbucket *
+inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, int port)
+{
+ struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
+ u32 hash;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct in6_addr addr_any = {};
+
+ if (sk->sk_family == AF_INET6)
+ hash = ipv6_portaddr_hash(net, &addr_any, port);
+ else
+#endif
+ hash = ipv4_portaddr_hash(net, 0, port);
+
+ return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];
+}
+
+int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct sock *sk)
+{
+ struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
+ struct inet_bind2_bucket *tb2, *new_tb2;
+ int l3mdev = inet_sk_bound_l3mdev(sk);
+ struct inet_bind_hashbucket *head2;
+ int port = inet_sk(sk)->inet_num;
+ struct net *net = sock_net(sk);
+
+ /* Allocate a bind2 bucket ahead of time to avoid permanently putting
+ * the bhash2 table in an inconsistent state if a new tb2 bucket
+ * allocation fails.
+ */
+ new_tb2 = kmem_cache_alloc(hinfo->bind2_bucket_cachep, GFP_ATOMIC);
+ if (!new_tb2)
+ return -ENOMEM;
+
+ head2 = inet_bhashfn_portaddr(hinfo, sk, net, port);
+
+ if (prev_saddr) {
+ spin_lock_bh(&prev_saddr->lock);
+ __sk_del_bind2_node(sk);
+ inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep,
+ inet_csk(sk)->icsk_bind2_hash);
+ spin_unlock_bh(&prev_saddr->lock);
+ }
+
+ spin_lock_bh(&head2->lock);
+ tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk);
+ if (!tb2) {
+ tb2 = new_tb2;
+ inet_bind2_bucket_init(tb2, net, head2, port, l3mdev, sk);
+ }
+ sk_add_bind2_node(sk, &tb2->owners);
+ inet_csk(sk)->icsk_bind2_hash = tb2;
+ spin_unlock_bh(&head2->lock);
+
+ if (tb2 != new_tb2)
+ kmem_cache_free(hinfo->bind2_bucket_cachep, new_tb2);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(inet_bhash2_update_saddr);
+
/* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm
* Note that we use 32bit integers (vs RFC 'short integers')
* because 2^16 is not a multiple of num_ephemeral and this
@@ -694,11 +910,13 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *, __u16, struct inet_timewait_sock **))
{
struct inet_hashinfo *hinfo = death_row->hashinfo;
+ struct inet_bind_hashbucket *head, *head2;
struct inet_timewait_sock *tw = NULL;
- struct inet_bind_hashbucket *head;
int port = inet_sk(sk)->inet_num;
struct net *net = sock_net(sk);
+ struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
+ bool tb_created = false;
u32 remaining, offset;
int ret, i, low, high;
int l3mdev;
@@ -755,8 +973,7 @@ other_parity_scan:
* the established check is already unique enough.
*/
inet_bind_bucket_for_each(tb, &head->chain) {
- if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
- tb->port == port) {
+ if (inet_bind_bucket_match(tb, net, port, l3mdev)) {
if (tb->fastreuse >= 0 ||
tb->fastreuseport >= 0)
goto next_port;
@@ -774,6 +991,7 @@ other_parity_scan:
spin_unlock_bh(&head->lock);
return -ENOMEM;
}
+ tb_created = true;
tb->fastreuse = -1;
tb->fastreuseport = -1;
goto ok;
@@ -789,6 +1007,20 @@ next_port:
return -EADDRNOTAVAIL;
ok:
+ /* Find the corresponding tb2 bucket since we need to
+ * add the socket to the bhash2 table as well
+ */
+ head2 = inet_bhashfn_portaddr(hinfo, sk, net, port);
+ spin_lock(&head2->lock);
+
+ tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk);
+ if (!tb2) {
+ tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep, net,
+ head2, port, l3mdev, sk);
+ if (!tb2)
+ goto error;
+ }
+
/* Here we want to add a little bit of randomness to the next source
* port that will be chosen. We use a max() with a random here so that
* on low contention the randomness is maximal and on high contention
@@ -798,7 +1030,10 @@ ok:
WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
/* Head lock still held and bh's disabled */
- inet_bind_hash(sk, tb, port);
+ inet_bind_hash(sk, tb, tb2, port);
+
+ spin_unlock(&head2->lock);
+
if (sk_unhashed(sk)) {
inet_sk(sk)->inet_sport = htons(port);
inet_ehash_nolisten(sk, (struct sock *)tw, NULL);
@@ -810,6 +1045,13 @@ ok:
inet_twsk_deschedule_put(tw);
local_bh_enable();
return 0;
+
+error:
+ spin_unlock(&head2->lock);
+ if (tb_created)
+ inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);
+ spin_unlock_bh(&head->lock);
+ return -ENOMEM;
}
/*
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e5011c136fdb..306b94dedc8d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -4433,12 +4433,16 @@ static void __tcp_alloc_md5sig_pool(void)
* to memory. See smp_rmb() in tcp_get_md5sig_pool()
*/
smp_wmb();
- tcp_md5sig_pool_populated = true;
+ /* Paired with READ_ONCE() from tcp_alloc_md5sig_pool()
+ * and tcp_get_md5sig_pool().
+ */
+ WRITE_ONCE(tcp_md5sig_pool_populated, true);
}
bool tcp_alloc_md5sig_pool(void)
{
- if (unlikely(!tcp_md5sig_pool_populated)) {
+ /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
+ if (unlikely(!READ_ONCE(tcp_md5sig_pool_populated))) {
mutex_lock(&tcp_md5sig_mutex);
if (!tcp_md5sig_pool_populated) {
@@ -4449,7 +4453,8 @@ bool tcp_alloc_md5sig_pool(void)
mutex_unlock(&tcp_md5sig_mutex);
}
- return tcp_md5sig_pool_populated;
+ /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
+ return READ_ONCE(tcp_md5sig_pool_populated);
}
EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
@@ -4465,7 +4470,8 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
{
local_bh_disable();
- if (tcp_md5sig_pool_populated) {
+ /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
+ if (READ_ONCE(tcp_md5sig_pool_populated)) {
/* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */
smp_rmb();
return this_cpu_ptr(&tcp_md5sig_pool);
@@ -4736,6 +4742,12 @@ void __init tcp_init(void)
SLAB_HWCACHE_ALIGN | SLAB_PANIC |
SLAB_ACCOUNT,
NULL);
+ tcp_hashinfo.bind2_bucket_cachep =
+ kmem_cache_create("tcp_bind2_bucket",
+ sizeof(struct inet_bind2_bucket), 0,
+ SLAB_HWCACHE_ALIGN | SLAB_PANIC |
+ SLAB_ACCOUNT,
+ NULL);
/* Size and allocate the main established and bind bucket
* hash tables.
@@ -4759,7 +4771,7 @@ void __init tcp_init(void)
panic("TCP: failed to alloc ehash_locks");
tcp_hashinfo.bhash =
alloc_large_system_hash("TCP bind",
- sizeof(struct inet_bind_hashbucket),
+ 2 * sizeof(struct inet_bind_hashbucket),
tcp_hashinfo.ehash_mask + 1,
17, /* one slot per 128 KB of memory */
0,
@@ -4768,9 +4780,12 @@ void __init tcp_init(void)
0,
64 * 1024);
tcp_hashinfo.bhash_size = 1U << tcp_hashinfo.bhash_size;
+ tcp_hashinfo.bhash2 = tcp_hashinfo.bhash + tcp_hashinfo.bhash_size;
for (i = 0; i < tcp_hashinfo.bhash_size; i++) {
spin_lock_init(&tcp_hashinfo.bhash[i].lock);
INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
+ spin_lock_init(&tcp_hashinfo.bhash2[i].lock);
+ INIT_HLIST_HEAD(&tcp_hashinfo.bhash2[i].chain);
}
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 825b216d11f5..45cc7f1ca296 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -272,8 +272,9 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
* The request socket is not added to the ehash
* because it's been added to the accept queue directly.
*/
+ req->timeout = tcp_timeout_init(child);
inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
- TCP_TIMEOUT_INIT, TCP_RTO_MAX);
+ req->timeout, TCP_RTO_MAX);
refcount_set(&req->rsk_refcnt, 2);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0c83780dc9bf..cc2ad67f75be 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -199,11 +199,12 @@ static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr,
/* This will initiate an outgoing connection. */
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
+ struct inet_bind_hashbucket *prev_addr_hashbucket = NULL;
struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
+ __be32 daddr, nexthop, prev_sk_rcv_saddr;
struct inet_sock *inet = inet_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
__be16 orig_sport, orig_dport;
- __be32 daddr, nexthop;
struct flowi4 *fl4;
struct rtable *rt;
int err;
@@ -246,10 +247,28 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (!inet_opt || !inet_opt->opt.srr)
daddr = fl4->daddr;
- if (!inet->inet_saddr)
+ if (!inet->inet_saddr) {
+ if (inet_csk(sk)->icsk_bind2_hash) {
+ prev_addr_hashbucket = inet_bhashfn_portaddr(&tcp_hashinfo,
+ sk, sock_net(sk),
+ inet->inet_num);
+ prev_sk_rcv_saddr = sk->sk_rcv_saddr;
+ }
inet->inet_saddr = fl4->saddr;
+ }
+
sk_rcv_saddr_set(sk, inet->inet_saddr);
+ if (prev_addr_hashbucket) {
+ err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk);
+ if (err) {
+ inet->inet_saddr = 0;
+ sk_rcv_saddr_set(sk, prev_sk_rcv_saddr);
+ ip_rt_put(rt);
+ return err;
+ }
+ }
+
if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
/* Reset inherited state */
tp->rx_opt.ts_recent = 0;
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 30abde86db45..a844a0d38482 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -195,12 +195,9 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
off = skb_gro_offset(skb);
hlen = off + sizeof(*th);
- th = skb_gro_header_fast(skb, off);
- if (skb_gro_header_hard(skb, hlen)) {
- th = skb_gro_header_slow(skb, hlen, off);
- if (unlikely(!th))
- goto out;
- }
+ th = skb_gro_header(skb, hlen, off);
+ if (unlikely(!th))
+ goto out;
thlen = th->doff * 4;
if (thlen < sizeof(*th))
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index b4dfb82d6ecb..cb79127f45c3 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -428,7 +428,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
if (!tp->retrans_stamp)
tp->retrans_stamp = tcp_time_stamp(tp);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
- TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
+ req->timeout << req->num_timeout, TCP_RTO_MAX);
}
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 80cb50d459e4..48b4ff0294f6 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -360,7 +360,7 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
if (parms->name[0]) {
if (!dev_valid_name(parms->name))
return NULL;
- strlcpy(name, parms->name, IFNAMSIZ);
+ strscpy(name, parms->name, IFNAMSIZ);
} else {
strcpy(name, "ip6gre%d");
}
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index d12dba2dd535..d37a8c97e6de 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -219,12 +219,9 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
off = skb_gro_offset(skb);
hlen = off + sizeof(*iph);
- iph = skb_gro_header_fast(skb, off);
- if (skb_gro_header_hard(skb, hlen)) {
- iph = skb_gro_header_slow(skb, hlen, off);
- if (unlikely(!iph))
- goto out;
- }
+ iph = skb_gro_header_slow(skb, hlen, off);
+ if (unlikely(!iph))
+ goto out;
skb_set_network_header(skb, off);
skb_gro_pull(skb, sizeof(*iph));
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 79c6a827dea9..9e97f3b4c7e8 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -293,7 +293,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
if (p->name[0]) {
if (!dev_valid_name(p->name))
goto failed;
- strlcpy(name, p->name, IFNAMSIZ);
+ strscpy(name, p->name, IFNAMSIZ);
} else {
sprintf(name, "ip6tnl%%d");
}
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 8fe59a79e800..1ddd60d06830 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -211,7 +211,7 @@ static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p
if (p->name[0]) {
if (!dev_valid_name(p->name))
goto failed;
- strlcpy(name, p->name, IFNAMSIZ);
+ strscpy(name, p->name, IFNAMSIZ);
} else {
sprintf(name, "ip6_vti%%d");
}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 6b73b7a5f175..98f1cf40746f 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -254,7 +254,7 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
if (parms->name[0]) {
if (!dev_valid_name(parms->name))
goto failed;
- strlcpy(name, parms->name, IFNAMSIZ);
+ strscpy(name, parms->name, IFNAMSIZ);
} else {
strcpy(name, "sit%d");
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e54eee80ce5f..ff5c4fc135fc 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -287,8 +287,25 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
}
if (!saddr) {
+ struct inet_bind_hashbucket *prev_addr_hashbucket = NULL;
+ struct in6_addr prev_v6_rcv_saddr;
+
+ if (icsk->icsk_bind2_hash) {
+ prev_addr_hashbucket = inet_bhashfn_portaddr(&tcp_hashinfo,
+ sk, sock_net(sk),
+ inet->inet_num);
+ prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
+ }
saddr = &fl6.saddr;
sk->sk_v6_rcv_saddr = *saddr;
+
+ if (prev_addr_hashbucket) {
+ err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk);
+ if (err) {
+ sk->sk_v6_rcv_saddr = prev_v6_rcv_saddr;
+ goto failure;
+ }
+ }
}
/* set the source address */
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 6cd97c75445c..f2ae03c40473 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -254,7 +254,7 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel,
int rc;
if (cfg->ifname) {
- strlcpy(name, cfg->ifname, IFNAMSIZ);
+ strscpy(name, cfg->ifname, IFNAMSIZ);
name_assign_type = NET_NAME_USER;
} else {
strcpy(name, L2TP_ETH_DEV_NAME);
@@ -314,7 +314,7 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel,
return rc;
}
- strlcpy(session->ifname, dev->name, IFNAMSIZ);
+ strscpy(session->ifname, dev->name, IFNAMSIZ);
rcu_assign_pointer(spriv->dev, dev);
rtnl_unlock();
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index dcf752b55a52..5a6705a0e4ec 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -300,12 +300,6 @@ nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum,
if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv6) <= hooknum))
return NULL;
return net->nf.hooks_ipv6 + hooknum;
-#if IS_ENABLED(CONFIG_DECNET)
- case NFPROTO_DECNET:
- if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_decnet) <= hooknum))
- return NULL;
- return net->nf.hooks_decnet + hooknum;
-#endif
default:
WARN_ON_ONCE(1);
return NULL;
@@ -750,10 +744,6 @@ static int __net_init netfilter_net_init(struct net *net)
#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
__netfilter_net_init(net->nf.hooks_bridge, ARRAY_SIZE(net->nf.hooks_bridge));
#endif
-#if IS_ENABLED(CONFIG_DECNET)
- __netfilter_net_init(net->nf.hooks_decnet, ARRAY_SIZE(net->nf.hooks_decnet));
-#endif
-
#ifdef CONFIG_PROC_FS
net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
net->proc_net);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 71c2f4f95d36..da65c6e8eeeb 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -2807,10 +2807,6 @@ err_expect:
return ret;
}
-#if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
- (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES) || \
- IS_ENABLED(CONFIG_NF_CT_NETLINK))
-
/* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */
int __nf_ct_change_timeout(struct nf_conn *ct, u64 timeout)
@@ -2866,5 +2862,3 @@ int nf_ct_change_status_common(struct nf_conn *ct, unsigned int status)
return 0;
}
EXPORT_SYMBOL_GPL(nf_ct_change_status_common);
-
-#endif
diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c
index 71e29adac48b..8120aadf6a0f 100644
--- a/net/netfilter/nfnetlink_hook.c
+++ b/net/netfilter/nfnetlink_hook.c
@@ -215,13 +215,6 @@ nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *de
hook_head = rcu_dereference(net->nf.hooks_bridge[hook]);
#endif
break;
-#if IS_ENABLED(CONFIG_DECNET)
- case NFPROTO_DECNET:
- if (hook >= ARRAY_SIZE(net->nf.hooks_decnet))
- return ERR_PTR(-EINVAL);
- hook_head = rcu_dereference(net->nf.hooks_decnet[hook]);
- break;
-#endif
#if defined(CONFIG_NETFILTER_INGRESS) || defined(CONFIG_NETFILTER_EGRESS)
case NFPROTO_NETDEV:
if (hook >= NF_NETDEV_NUMHOOKS)
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 7e8a39a35627..45f9a7b3410e 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -252,10 +252,17 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
upcall.mru = OVS_CB(skb)->mru;
error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
- if (unlikely(error))
- kfree_skb(skb);
- else
+ switch (error) {
+ case 0:
+ case -EAGAIN:
+ case -ERESTARTSYS:
+ case -EINTR:
consume_skb(skb);
+ break;
+ default:
+ kfree_skb(skb);
+ break;
+ }
stats_counter = &stats->n_missed;
goto out;
}
@@ -551,8 +558,9 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
out:
if (err)
skb_tx_error(skb);
- kfree_skb(user_skb);
- kfree_skb(nskb);
+ consume_skb(user_skb);
+ consume_skb(nskb);
+
return err;
}
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 5b2ee9c1c00b..134bc8503461 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -65,7 +65,7 @@ static int internal_dev_stop(struct net_device *netdev)
static void internal_dev_getinfo(struct net_device *netdev,
struct ethtool_drvinfo *info)
{
- strlcpy(info->driver, "openvswitch", sizeof(info->driver));
+ strscpy(info->driver, "openvswitch", sizeof(info->driver));
}
static const struct ethtool_ops internal_dev_ethtool_ops = {
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 5cbe07116e04..7de46d831349 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1905,7 +1905,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
*/
spkt->spkt_family = dev->type;
- strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
+ strscpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
spkt->spkt_protocol = skb->protocol;
/*
@@ -3565,7 +3565,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
rcu_read_lock();
dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex));
if (dev)
- strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
+ strscpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
rcu_read_unlock();
return sizeof(*uaddr);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 790d6809be81..1ebab4b11262 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1977,9 +1977,6 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
bool rtnl_held = false;
u32 flags;
- if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
replay:
tp_created = 0;
@@ -2208,9 +2205,6 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
int err;
bool rtnl_held = false;
- if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
rtm_tca_policy, extack);
if (err < 0)
@@ -2826,10 +2820,6 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
unsigned long cl;
int err;
- if (n->nlmsg_type != RTM_GETCHAIN &&
- !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
replay:
q = NULL;
err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index bf87b50837a8..db1569fac57c 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -171,7 +171,7 @@ out_einval:
}
EXPORT_SYMBOL(register_qdisc);
-int unregister_qdisc(struct Qdisc_ops *qops)
+void unregister_qdisc(struct Qdisc_ops *qops)
{
struct Qdisc_ops *q, **qp;
int err = -ENOENT;
@@ -186,7 +186,8 @@ int unregister_qdisc(struct Qdisc_ops *qops)
err = 0;
}
write_unlock(&qdisc_mod_lock);
- return err;
+
+ WARN(err, "unregister qdisc(%s) failed\n", qops->id);
}
EXPORT_SYMBOL(unregister_qdisc);
@@ -194,7 +195,7 @@ EXPORT_SYMBOL(unregister_qdisc);
void qdisc_get_default(char *name, size_t len)
{
read_lock(&qdisc_mod_lock);
- strlcpy(name, default_qdisc_ops->id, len);
+ strscpy(name, default_qdisc_ops->id, len);
read_unlock(&qdisc_mod_lock);
}
@@ -1163,7 +1164,7 @@ static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
static struct Qdisc *qdisc_create(struct net_device *dev,
struct netdev_queue *dev_queue,
- struct Qdisc *p, u32 parent, u32 handle,
+ u32 parent, u32 handle,
struct nlattr **tca, int *errp,
struct netlink_ext_ack *extack)
{
@@ -1424,10 +1425,6 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
struct Qdisc *p = NULL;
int err;
- if ((n->nlmsg_type != RTM_GETQDISC) &&
- !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
rtm_tca_policy, extack);
if (err < 0)
@@ -1508,9 +1505,6 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
struct Qdisc *q, *p;
int err;
- if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
replay:
/* Reinit, just in case something touches this. */
err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
@@ -1640,7 +1634,7 @@ create_n_graft:
}
if (clid == TC_H_INGRESS) {
if (dev_ingress_queue(dev)) {
- q = qdisc_create(dev, dev_ingress_queue(dev), p,
+ q = qdisc_create(dev, dev_ingress_queue(dev),
tcm->tcm_parent, tcm->tcm_parent,
tca, &err, extack);
} else {
@@ -1657,7 +1651,7 @@ create_n_graft:
else
dev_queue = netdev_get_tx_queue(dev, 0);
- q = qdisc_create(dev, dev_queue, p,
+ q = qdisc_create(dev, dev_queue,
tcm->tcm_parent, tcm->tcm_handle,
tca, &err, extack);
}
@@ -1992,10 +1986,6 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
u32 qid;
int err;
- if ((n->nlmsg_type != RTM_GETTCLASS) &&
- !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
rtm_tca_policy, extack);
if (err < 0)
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 4c8e994cf0a5..816fd0d7ba38 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -577,7 +577,6 @@ static void atm_tc_reset(struct Qdisc *sch)
pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p);
list_for_each_entry(flow, &p->flows, list)
qdisc_reset(flow->q);
- sch->q.qlen = 0;
}
static void atm_tc_destroy(struct Qdisc *sch)
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 91a0dc463c48..ba99ce05cd52 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -975,7 +975,6 @@ cbq_reset(struct Qdisc *sch)
cl->cpriority = cl->priority;
}
}
- sch->q.qlen = 0;
}
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 2adbd945bf15..25d2daaa8122 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -315,8 +315,6 @@ static void choke_reset(struct Qdisc *sch)
rtnl_qdisc_drop(skb, sch);
}
- sch->q.qlen = 0;
- sch->qstats.backlog = 0;
if (q->tab)
memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
q->head = q->tail = 0;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 18e4f7a0b291..4e5b1cf11b85 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -441,8 +441,6 @@ static void drr_reset_qdisc(struct Qdisc *sch)
qdisc_reset(cl->qdisc);
}
}
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
}
static void drr_destroy_qdisc(struct Qdisc *sch)
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 4c100d105269..7da6dc38a382 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -409,8 +409,6 @@ static void dsmark_reset(struct Qdisc *sch)
pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
if (p->q)
qdisc_reset(p->q);
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
}
static void dsmark_destroy(struct Qdisc *sch)
diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
index c48f91075b5c..d96103b0e2bf 100644
--- a/net/sched/sch_etf.c
+++ b/net/sched/sch_etf.c
@@ -445,9 +445,6 @@ static void etf_reset(struct Qdisc *sch)
timesortedlist_clear(sch);
__qdisc_reset_queue(&sch->q);
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
-
q->last = 0;
}
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index d73393493553..8de4365886e8 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -727,8 +727,6 @@ static void ets_qdisc_reset(struct Qdisc *sch)
}
for (band = 0; band < q->nbands; band++)
qdisc_reset(q->classes[band].qdisc);
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
}
static void ets_qdisc_destroy(struct Qdisc *sch)
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 839e1235db05..23a042adb74d 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -347,8 +347,6 @@ static void fq_codel_reset(struct Qdisc *sch)
codel_vars_init(&flow->cvars);
}
memset(q->backlogs, 0, q->flows_cnt * sizeof(u32));
- sch->q.qlen = 0;
- sch->qstats.backlog = 0;
q->memory_usage = 0;
}
diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index d6aba6edd16e..35c35465226b 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -521,9 +521,6 @@ static void fq_pie_reset(struct Qdisc *sch)
INIT_LIST_HEAD(&flow->flowchain);
pie_vars_init(&flow->vars);
}
-
- sch->q.qlen = 0;
- sch->qstats.backlog = 0;
}
static void fq_pie_destroy(struct Qdisc *sch)
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index d3979a6000e7..03efc40e42fc 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1484,8 +1484,6 @@ hfsc_reset_qdisc(struct Qdisc *sch)
}
q->eligible = RB_ROOT;
qdisc_watchdog_cancel(&q->watchdog);
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
}
static void
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 23a9d6242429..cb5872d22ecf 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1008,8 +1008,6 @@ static void htb_reset(struct Qdisc *sch)
}
qdisc_watchdog_cancel(&q->watchdog);
__qdisc_reset_queue(&q->direct_queue);
- sch->q.qlen = 0;
- sch->qstats.backlog = 0;
memset(q->hlevel, 0, sizeof(q->hlevel));
memset(q->row_mask, 0, sizeof(q->row_mask));
}
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index cd8ab90c4765..f28050c7f12d 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -152,7 +152,6 @@ multiq_reset(struct Qdisc *sch)
for (band = 0; band < q->bands; band++)
qdisc_reset(q->queues[band]);
- sch->q.qlen = 0;
q->curband = 0;
}
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 3b8d7197c06b..c03a11dd990f 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -135,8 +135,6 @@ prio_reset(struct Qdisc *sch)
for (prio = 0; prio < q->bands; prio++)
qdisc_reset(q->queues[prio]);
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
}
static int prio_offload(struct Qdisc *sch, struct tc_prio_qopt *qopt)
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index d4ce58c90f9f..13246a9dc5c1 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -1458,8 +1458,6 @@ static void qfq_reset_qdisc(struct Qdisc *sch)
qdisc_reset(cl->qdisc);
}
}
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
}
static void qfq_destroy_qdisc(struct Qdisc *sch)
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 40adf1f07a82..f1e013e3f04a 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -176,8 +176,6 @@ static void red_reset(struct Qdisc *sch)
struct red_sched_data *q = qdisc_priv(sch);
qdisc_reset(q->qdisc);
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
red_restart(&q->vars);
}
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 3d061a13d7ed..31717fa45a4f 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -453,8 +453,6 @@ static void sfb_reset(struct Qdisc *sch)
struct sfb_sched_data *q = qdisc_priv(sch);
qdisc_reset(q->qdisc);
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
q->slot = 0;
q->double_buffering = false;
sfb_zero_all_buckets(q);
diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
index 7a5e4c454715..df72fb83d9c7 100644
--- a/net/sched/sch_skbprio.c
+++ b/net/sched/sch_skbprio.c
@@ -213,9 +213,6 @@ static void skbprio_reset(struct Qdisc *sch)
struct skbprio_sched_data *q = qdisc_priv(sch);
int prio;
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
-
for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
__skb_queue_purge(&q->qdiscs[prio]);
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 0b941dd63d26..db88a692ef81 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -1636,8 +1636,6 @@ static void taprio_reset(struct Qdisc *sch)
if (q->qdiscs[i])
qdisc_reset(q->qdiscs[i]);
}
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
}
static void taprio_destroy(struct Qdisc *sch)
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 72102277449e..d0288e223542 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -330,8 +330,6 @@ static void tbf_reset(struct Qdisc *sch)
struct tbf_sched_data *q = qdisc_priv(sch);
qdisc_reset(q->qdisc);
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
q->t_c = ktime_get_ns();
q->tokens = q->buffer;
q->ptokens = q->mtu;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 6af6b95bdb67..16f9238aa51d 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -124,7 +124,6 @@ teql_reset(struct Qdisc *sch)
struct teql_sched_data *dat = qdisc_priv(sch);
skb_queue_purge(&dat->q);
- sch->q.qlen = 0;
}
static void
@@ -492,7 +491,7 @@ static int __init teql_init(void)
master = netdev_priv(dev);
- strlcpy(master->qops.id, dev->name, IFNAMSIZ);
+ strscpy(master->qops.id, dev->name, IFNAMSIZ);
err = register_qdisc(&master->qops);
if (err) {
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index bf338b782fc4..dea2972c8178 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -785,15 +785,45 @@ static int unix_set_peek_off(struct sock *sk, int val)
}
#ifdef CONFIG_PROC_FS
+static int unix_count_nr_fds(struct sock *sk)
+{
+ struct sk_buff *skb;
+ struct unix_sock *u;
+ int nr_fds = 0;
+
+ spin_lock(&sk->sk_receive_queue.lock);
+ skb = skb_peek(&sk->sk_receive_queue);
+ while (skb) {
+ u = unix_sk(skb->sk);
+ nr_fds += atomic_read(&u->scm_stat.nr_fds);
+ skb = skb_peek_next(skb, &sk->sk_receive_queue);
+ }
+ spin_unlock(&sk->sk_receive_queue.lock);
+
+ return nr_fds;
+}
+
static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
{
struct sock *sk = sock->sk;
struct unix_sock *u;
+ int nr_fds;
if (sk) {
- u = unix_sk(sock->sk);
- seq_printf(m, "scm_fds: %u\n",
- atomic_read(&u->scm_stat.nr_fds));
+ u = unix_sk(sk);
+ if (sock->type == SOCK_DGRAM) {
+ nr_fds = atomic_read(&u->scm_stat.nr_fds);
+ goto out_print;
+ }
+
+ unix_state_lock(sk);
+ if (sk->sk_state != TCP_LISTEN)
+ nr_fds = atomic_read(&u->scm_stat.nr_fds);
+ else
+ nr_fds = unix_count_nr_fds(sk);
+ unix_state_unlock(sk);
+out_print:
+ seq_printf(m, "scm_fds: %u\n", nr_fds);
}
}
#else
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index b4ee163154a6..ee418701cdee 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -882,6 +882,16 @@ s64 vsock_stream_has_space(struct vsock_sock *vsk)
}
EXPORT_SYMBOL_GPL(vsock_stream_has_space);
+void vsock_data_ready(struct sock *sk)
+{
+ struct vsock_sock *vsk = vsock_sk(sk);
+
+ if (vsock_stream_has_data(vsk) >= sk->sk_rcvlowat ||
+ sock_flag(sk, SOCK_DONE))
+ sk->sk_data_ready(sk);
+}
+EXPORT_SYMBOL_GPL(vsock_data_ready);
+
static int vsock_release(struct socket *sock)
{
__vsock_release(sock->sk, 0);
@@ -1066,8 +1076,9 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
if (transport && transport->stream_is_active(vsk) &&
!(sk->sk_shutdown & RCV_SHUTDOWN)) {
bool data_ready_now = false;
+ int target = sock_rcvlowat(sk, 0, INT_MAX);
int ret = transport->notify_poll_in(
- vsk, 1, &data_ready_now);
+ vsk, target, &data_ready_now);
if (ret < 0) {
mask |= EPOLLERR;
} else {
@@ -2137,6 +2148,25 @@ out:
return err;
}
+static int vsock_set_rcvlowat(struct sock *sk, int val)
+{
+ const struct vsock_transport *transport;
+ struct vsock_sock *vsk;
+
+ vsk = vsock_sk(sk);
+
+ if (val > vsk->buffer_size)
+ return -EINVAL;
+
+ transport = vsk->transport;
+
+ if (transport && transport->set_rcvlowat)
+ return transport->set_rcvlowat(vsk, val);
+
+ WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
+ return 0;
+}
+
static const struct proto_ops vsock_stream_ops = {
.family = PF_VSOCK,
.owner = THIS_MODULE,
@@ -2156,6 +2186,7 @@ static const struct proto_ops vsock_stream_ops = {
.recvmsg = vsock_connectible_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
+ .set_rcvlowat = vsock_set_rcvlowat,
};
static const struct proto_ops vsock_seqpacket_ops = {
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index fd98229e3db3..59c3e2697069 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -815,6 +815,12 @@ int hvs_notify_send_post_enqueue(struct vsock_sock *vsk, ssize_t written,
return 0;
}
+static
+int hvs_set_rcvlowat(struct vsock_sock *vsk, int val)
+{
+ return -EOPNOTSUPP;
+}
+
static struct vsock_transport hvs_transport = {
.module = THIS_MODULE,
@@ -850,6 +856,7 @@ static struct vsock_transport hvs_transport = {
.notify_send_pre_enqueue = hvs_notify_send_pre_enqueue,
.notify_send_post_enqueue = hvs_notify_send_post_enqueue,
+ .set_rcvlowat = hvs_set_rcvlowat
};
static bool hvs_check_transport(struct vsock_sock *vsk)
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index ec2c2afbf0d0..35863132f4f1 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -634,10 +634,7 @@ virtio_transport_notify_poll_in(struct vsock_sock *vsk,
size_t target,
bool *data_ready_now)
{
- if (vsock_stream_has_data(vsk))
- *data_ready_now = true;
- else
- *data_ready_now = false;
+ *data_ready_now = vsock_stream_has_data(vsk) >= target;
return 0;
}
@@ -1084,7 +1081,7 @@ virtio_transport_recv_connected(struct sock *sk,
switch (le16_to_cpu(pkt->hdr.op)) {
case VIRTIO_VSOCK_OP_RW:
virtio_transport_recv_enqueue(vsk, pkt);
- sk->sk_data_ready(sk);
+ vsock_data_ready(sk);
return err;
case VIRTIO_VSOCK_OP_CREDIT_REQUEST:
virtio_transport_send_credit_update(vsk);
diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c
index d69fc4b595ad..7c3a7db134b2 100644
--- a/net/vmw_vsock/vmci_transport_notify.c
+++ b/net/vmw_vsock/vmci_transport_notify.c
@@ -307,7 +307,7 @@ vmci_transport_handle_wrote(struct sock *sk,
struct vsock_sock *vsk = vsock_sk(sk);
PKT_FIELD(vsk, sent_waiting_read) = false;
#endif
- sk->sk_data_ready(sk);
+ vsock_data_ready(sk);
}
static void vmci_transport_notify_pkt_socket_init(struct sock *sk)
@@ -340,12 +340,12 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk,
{
struct vsock_sock *vsk = vsock_sk(sk);
- if (vsock_stream_has_data(vsk)) {
+ if (vsock_stream_has_data(vsk) >= target) {
*data_ready_now = true;
} else {
- /* We can't read right now because there is nothing in the
- * queue. Ask for notifications when there is something to
- * read.
+ /* We can't read right now because there is not enough data
+ * in the queue. Ask for notifications when there is something
+ * to read.
*/
if (sk->sk_state == TCP_ESTABLISHED) {
if (!send_waiting_read(sk, 1))
diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c
index 0f36d7c45db3..e96a88d850a8 100644
--- a/net/vmw_vsock/vmci_transport_notify_qstate.c
+++ b/net/vmw_vsock/vmci_transport_notify_qstate.c
@@ -84,7 +84,7 @@ vmci_transport_handle_wrote(struct sock *sk,
bool bottom_half,
struct sockaddr_vm *dst, struct sockaddr_vm *src)
{
- sk->sk_data_ready(sk);
+ vsock_data_ready(sk);
}
static void vsock_block_update_write_window(struct sock *sk)
@@ -161,12 +161,12 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk,
{
struct vsock_sock *vsk = vsock_sk(sk);
- if (vsock_stream_has_data(vsk)) {
+ if (vsock_stream_has_data(vsk) >= target) {
*data_ready_now = true;
} else {
- /* We can't read right now because there is nothing in the
- * queue. Ask for notifications when there is something to
- * read.
+ /* We can't read right now because there is not enough data
+ * in the queue. Ask for notifications when there is something
+ * to read.
*/
if (sk->sk_state == TCP_ESTABLISHED)
vsock_block_update_write_window(sk);
@@ -282,7 +282,7 @@ vmci_transport_notify_pkt_recv_post_dequeue(
/* See the comment in
* vmci_transport_notify_pkt_send_post_enqueue().
*/
- sk->sk_data_ready(sk);
+ vsock_data_ready(sk);
}
return err;
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 067e9ea59e3b..8727765add88 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -722,6 +722,7 @@ print_all_levels(__maybe_unused enum libbpf_print_level level,
static int prog_fd_by_nametag(void *nametag, int **fds, bool tag)
{
+ char prog_name[MAX_PROG_FULL_NAME];
unsigned int id = 0;
int fd, nb_fds = 0;
void *tmp;
@@ -754,12 +755,20 @@ static int prog_fd_by_nametag(void *nametag, int **fds, bool tag)
goto err_close_fd;
}
- if ((tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) ||
- (!tag && strncmp(nametag, info.name, BPF_OBJ_NAME_LEN))) {
+ if (tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) {
close(fd);
continue;
}
+ if (!tag) {
+ get_prog_full_name(&info, fd, prog_name,
+ sizeof(prog_name));
+ if (strncmp(nametag, prog_name, sizeof(prog_name))) {
+ close(fd);
+ continue;
+ }
+ }
+
if (nb_fds > 0) {
tmp = realloc(*fds, (nb_fds + 1) * sizeof(int));
if (!tmp) {
@@ -820,7 +829,7 @@ int prog_parse_fds(int *argc, char ***argv, int **fds)
NEXT_ARGP();
name = **argv;
- if (strlen(name) > BPF_OBJ_NAME_LEN - 1) {
+ if (strlen(name) > MAX_PROG_FULL_NAME - 1) {
p_err("can't parse name");
return -1;
}
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 7ecabf7947fb..36cf0f1517c9 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -1147,7 +1147,7 @@ exit_free:
return res;
#else
/* Detection assumes user has specific privileges.
- * We do not use libpcap so let's approximate, and restrict usage to
+ * We do not use libcap so let's approximate, and restrict usage to
* root user only.
*/
if (geteuid()) {
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 451cefc2d0da..ccd7457f92bf 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -435,6 +435,16 @@ int main(int argc, char **argv)
setlinebuf(stdout);
+#ifdef USE_LIBCAP
+ /* Libcap < 2.63 hooks before main() to compute the number of
+ * capabilities of the running kernel, and doing so it calls prctl()
+ * which may fail and set errno to non-zero.
+ * Let's reset errno to make sure this does not interfere with the
+ * batch mode.
+ */
+ errno = 0;
+#endif
+
last_do_help = do_help;
pretty_output = false;
json_output = false;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 59a217ca2dfd..1d6085e15fc8 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2573,10 +2573,12 @@ union bpf_attr {
* There are two supported modes at this time:
*
* * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
- * (room space is added or removed below the layer 2 header).
+ * (room space is added or removed between the layer 2 and
+ * layer 3 headers).
*
* * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
- * (room space is added or removed below the layer 3 header).
+ * (room space is added or removed between the layer 3 and
+ * layer 4 headers).
*
* The following flags are supported at this time:
*
@@ -3008,8 +3010,18 @@ union bpf_attr {
* **BPF_F_USER_STACK**
* Collect a user space stack instead of a kernel stack.
* **BPF_F_USER_BUILD_ID**
- * Collect buildid+offset instead of ips for user stack,
- * only valid if **BPF_F_USER_STACK** is also specified.
+ * Collect (build_id, file_offset) instead of ips for user
+ * stack, only valid if **BPF_F_USER_STACK** is also
+ * specified.
+ *
+ * *file_offset* is an offset relative to the beginning
+ * of the executable or shared object file backing the vma
+ * which the *ip* falls in. It is *not* an offset relative
+ * to that object's base address. Accordingly, it must be
+ * adjusted by adding (sh_addr - sh_offset), where
+ * sh_{addr,offset} correspond to the executable section
+ * containing *file_offset* in the object, for comparisons
+ * to symbols' st_value to be valid.
*
* **bpf_get_stack**\ () can collect up to
* **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
@@ -5331,6 +5343,18 @@ union bpf_attr {
* **-EACCES** if the SYN cookie is not valid.
*
* **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin.
+ *
+ * u64 bpf_ktime_get_tai_ns(void)
+ * Description
+ * A nonsettable system-wide clock derived from wall-clock time but
+ * ignoring leap seconds. This clock does not experience
+ * discontinuities and backwards jumps caused by NTP inserting leap
+ * seconds as CLOCK_REALTIME does.
+ *
+ * See: **clock_gettime**\ (**CLOCK_TAI**)
+ * Return
+ * Current *ktime*.
+ *
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -5541,6 +5565,7 @@ union bpf_attr {
FN(tcp_raw_gen_syncookie_ipv6), \
FN(tcp_raw_check_syncookie_ipv4), \
FN(tcp_raw_check_syncookie_ipv6), \
+ FN(ktime_get_tai_ns), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index efcc06dafbd9..1d49a0352836 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -84,9 +84,7 @@ static inline int sys_bpf_fd(enum bpf_cmd cmd, union bpf_attr *attr,
return ensure_good_fd(fd);
}
-#define PROG_LOAD_ATTEMPTS 5
-
-static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts)
+int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts)
{
int fd;
@@ -107,7 +105,7 @@ static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int
*/
int probe_memcg_account(void)
{
- const size_t prog_load_attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd);
+ const size_t attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd);
struct bpf_insn insns[] = {
BPF_EMIT_CALL(BPF_FUNC_ktime_get_coarse_ns),
BPF_EXIT_INSN(),
@@ -117,13 +115,13 @@ int probe_memcg_account(void)
int prog_fd;
/* attempt loading freplace trying to use custom BTF */
- memset(&attr, 0, prog_load_attr_sz);
+ memset(&attr, 0, attr_sz);
attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
attr.insns = ptr_to_u64(insns);
attr.insn_cnt = insn_cnt;
attr.license = ptr_to_u64("GPL");
- prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, prog_load_attr_sz);
+ prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, attr_sz);
if (prog_fd >= 0) {
close(prog_fd);
return 1;
@@ -183,7 +181,7 @@ int bpf_map_create(enum bpf_map_type map_type,
return libbpf_err(-EINVAL);
attr.map_type = map_type;
- if (map_name)
+ if (map_name && kernel_supports(NULL, FEAT_PROG_NAME))
libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name));
attr.key_size = key_size;
attr.value_size = value_size;
@@ -234,6 +232,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
const struct bpf_insn *insns, size_t insn_cnt,
const struct bpf_prog_load_opts *opts)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, fd_array);
void *finfo = NULL, *linfo = NULL;
const char *func_info, *line_info;
__u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd;
@@ -253,7 +252,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
if (attempts == 0)
attempts = PROG_LOAD_ATTEMPTS;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.prog_type = prog_type;
attr.expected_attach_type = OPTS_GET(opts, expected_attach_type, 0);
@@ -263,7 +262,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0);
attr.kern_version = OPTS_GET(opts, kern_version, 0);
- if (prog_name)
+ if (prog_name && kernel_supports(NULL, FEAT_PROG_NAME))
libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name));
attr.license = ptr_to_u64(license);
@@ -316,7 +315,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
attr.log_level = log_level;
}
- fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts);
+ fd = sys_bpf_prog_load(&attr, attr_sz, attempts);
if (fd >= 0)
return fd;
@@ -356,7 +355,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
break;
}
- fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts);
+ fd = sys_bpf_prog_load(&attr, attr_sz, attempts);
if (fd >= 0)
goto done;
}
@@ -370,7 +369,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
attr.log_size = log_size;
attr.log_level = 1;
- fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts);
+ fd = sys_bpf_prog_load(&attr, attr_sz, attempts);
}
done:
/* free() doesn't affect errno, so we don't need to restore it */
@@ -382,127 +381,136 @@ done:
int bpf_map_update_elem(int fd, const void *key, const void *value,
__u64 flags)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, flags);
union bpf_attr attr;
int ret;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.map_fd = fd;
attr.key = ptr_to_u64(key);
attr.value = ptr_to_u64(value);
attr.flags = flags;
- ret = sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, attr_sz);
return libbpf_err_errno(ret);
}
int bpf_map_lookup_elem(int fd, const void *key, void *value)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, flags);
union bpf_attr attr;
int ret;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.map_fd = fd;
attr.key = ptr_to_u64(key);
attr.value = ptr_to_u64(value);
- ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, attr_sz);
return libbpf_err_errno(ret);
}
int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, flags);
union bpf_attr attr;
int ret;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.map_fd = fd;
attr.key = ptr_to_u64(key);
attr.value = ptr_to_u64(value);
attr.flags = flags;
- ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, attr_sz);
return libbpf_err_errno(ret);
}
int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, flags);
union bpf_attr attr;
int ret;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.map_fd = fd;
attr.key = ptr_to_u64(key);
attr.value = ptr_to_u64(value);
- ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, attr_sz);
return libbpf_err_errno(ret);
}
int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key, void *value, __u64 flags)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, flags);
union bpf_attr attr;
int ret;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.map_fd = fd;
attr.key = ptr_to_u64(key);
attr.value = ptr_to_u64(value);
attr.flags = flags;
- ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, attr_sz);
return libbpf_err_errno(ret);
}
int bpf_map_delete_elem(int fd, const void *key)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, flags);
union bpf_attr attr;
int ret;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.map_fd = fd;
attr.key = ptr_to_u64(key);
- ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, attr_sz);
return libbpf_err_errno(ret);
}
int bpf_map_delete_elem_flags(int fd, const void *key, __u64 flags)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, flags);
union bpf_attr attr;
int ret;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.map_fd = fd;
attr.key = ptr_to_u64(key);
attr.flags = flags;
- ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, attr_sz);
return libbpf_err_errno(ret);
}
int bpf_map_get_next_key(int fd, const void *key, void *next_key)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, next_key);
union bpf_attr attr;
int ret;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.map_fd = fd;
attr.key = ptr_to_u64(key);
attr.next_key = ptr_to_u64(next_key);
- ret = sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, attr_sz);
return libbpf_err_errno(ret);
}
int bpf_map_freeze(int fd)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, map_fd);
union bpf_attr attr;
int ret;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.map_fd = fd;
- ret = sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_MAP_FREEZE, &attr, attr_sz);
return libbpf_err_errno(ret);
}
@@ -511,13 +519,14 @@ static int bpf_map_batch_common(int cmd, int fd, void *in_batch,
__u32 *count,
const struct bpf_map_batch_opts *opts)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, batch);
union bpf_attr attr;
int ret;
if (!OPTS_VALID(opts, bpf_map_batch_opts))
return libbpf_err(-EINVAL);
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.batch.map_fd = fd;
attr.batch.in_batch = ptr_to_u64(in_batch);
attr.batch.out_batch = ptr_to_u64(out_batch);
@@ -527,7 +536,7 @@ static int bpf_map_batch_common(int cmd, int fd, void *in_batch,
attr.batch.elem_flags = OPTS_GET(opts, elem_flags, 0);
attr.batch.flags = OPTS_GET(opts, flags, 0);
- ret = sys_bpf(cmd, &attr, sizeof(attr));
+ ret = sys_bpf(cmd, &attr, attr_sz);
*count = attr.batch.count;
return libbpf_err_errno(ret);
@@ -566,14 +575,15 @@ int bpf_map_update_batch(int fd, const void *keys, const void *values, __u32 *co
int bpf_obj_pin(int fd, const char *pathname)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, file_flags);
union bpf_attr attr;
int ret;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.pathname = ptr_to_u64((void *)pathname);
attr.bpf_fd = fd;
- ret = sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_OBJ_PIN, &attr, attr_sz);
return libbpf_err_errno(ret);
}
@@ -584,17 +594,18 @@ int bpf_obj_get(const char *pathname)
int bpf_obj_get_opts(const char *pathname, const struct bpf_obj_get_opts *opts)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, file_flags);
union bpf_attr attr;
int fd;
if (!OPTS_VALID(opts, bpf_obj_get_opts))
return libbpf_err(-EINVAL);
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.pathname = ptr_to_u64((void *)pathname);
attr.file_flags = OPTS_GET(opts, file_flags, 0);
- fd = sys_bpf_fd(BPF_OBJ_GET, &attr, sizeof(attr));
+ fd = sys_bpf_fd(BPF_OBJ_GET, &attr, attr_sz);
return libbpf_err_errno(fd);
}
@@ -612,52 +623,50 @@ int bpf_prog_attach_opts(int prog_fd, int target_fd,
enum bpf_attach_type type,
const struct bpf_prog_attach_opts *opts)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, replace_bpf_fd);
union bpf_attr attr;
int ret;
if (!OPTS_VALID(opts, bpf_prog_attach_opts))
return libbpf_err(-EINVAL);
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.target_fd = target_fd;
attr.attach_bpf_fd = prog_fd;
attr.attach_type = type;
attr.attach_flags = OPTS_GET(opts, flags, 0);
attr.replace_bpf_fd = OPTS_GET(opts, replace_prog_fd, 0);
- ret = sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_PROG_ATTACH, &attr, attr_sz);
return libbpf_err_errno(ret);
}
-__attribute__((alias("bpf_prog_attach_opts")))
-int bpf_prog_attach_xattr(int prog_fd, int target_fd,
- enum bpf_attach_type type,
- const struct bpf_prog_attach_opts *opts);
-
int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, replace_bpf_fd);
union bpf_attr attr;
int ret;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.target_fd = target_fd;
attr.attach_type = type;
- ret = sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_PROG_DETACH, &attr, attr_sz);
return libbpf_err_errno(ret);
}
int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, replace_bpf_fd);
union bpf_attr attr;
int ret;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.target_fd = target_fd;
attr.attach_bpf_fd = prog_fd;
attr.attach_type = type;
- ret = sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_PROG_DETACH, &attr, attr_sz);
return libbpf_err_errno(ret);
}
@@ -665,6 +674,7 @@ int bpf_link_create(int prog_fd, int target_fd,
enum bpf_attach_type attach_type,
const struct bpf_link_create_opts *opts)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, link_create);
__u32 target_btf_id, iter_info_len;
union bpf_attr attr;
int fd, err;
@@ -683,7 +693,7 @@ int bpf_link_create(int prog_fd, int target_fd,
return libbpf_err(-EINVAL);
}
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.link_create.prog_fd = prog_fd;
attr.link_create.target_fd = target_fd;
attr.link_create.attach_type = attach_type;
@@ -727,7 +737,7 @@ int bpf_link_create(int prog_fd, int target_fd,
break;
}
proceed:
- fd = sys_bpf_fd(BPF_LINK_CREATE, &attr, sizeof(attr));
+ fd = sys_bpf_fd(BPF_LINK_CREATE, &attr, attr_sz);
if (fd >= 0)
return fd;
/* we'll get EINVAL if LINK_CREATE doesn't support attaching fentry
@@ -763,44 +773,47 @@ proceed:
int bpf_link_detach(int link_fd)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, link_detach);
union bpf_attr attr;
int ret;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.link_detach.link_fd = link_fd;
- ret = sys_bpf(BPF_LINK_DETACH, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_LINK_DETACH, &attr, attr_sz);
return libbpf_err_errno(ret);
}
int bpf_link_update(int link_fd, int new_prog_fd,
const struct bpf_link_update_opts *opts)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, link_update);
union bpf_attr attr;
int ret;
if (!OPTS_VALID(opts, bpf_link_update_opts))
return libbpf_err(-EINVAL);
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.link_update.link_fd = link_fd;
attr.link_update.new_prog_fd = new_prog_fd;
attr.link_update.flags = OPTS_GET(opts, flags, 0);
attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
- ret = sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_LINK_UPDATE, &attr, attr_sz);
return libbpf_err_errno(ret);
}
int bpf_iter_create(int link_fd)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, iter_create);
union bpf_attr attr;
int fd;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.iter_create.link_fd = link_fd;
- fd = sys_bpf_fd(BPF_ITER_CREATE, &attr, sizeof(attr));
+ fd = sys_bpf_fd(BPF_ITER_CREATE, &attr, attr_sz);
return libbpf_err_errno(fd);
}
@@ -808,13 +821,14 @@ int bpf_prog_query_opts(int target_fd,
enum bpf_attach_type type,
struct bpf_prog_query_opts *opts)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, query);
union bpf_attr attr;
int ret;
if (!OPTS_VALID(opts, bpf_prog_query_opts))
return libbpf_err(-EINVAL);
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.query.target_fd = target_fd;
attr.query.attach_type = type;
@@ -823,7 +837,7 @@ int bpf_prog_query_opts(int target_fd,
attr.query.prog_ids = ptr_to_u64(OPTS_GET(opts, prog_ids, NULL));
attr.query.prog_attach_flags = ptr_to_u64(OPTS_GET(opts, prog_attach_flags, NULL));
- ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_PROG_QUERY, &attr, attr_sz);
OPTS_SET(opts, attach_flags, attr.query.attach_flags);
OPTS_SET(opts, prog_cnt, attr.query.prog_cnt);
@@ -852,13 +866,14 @@ int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, test);
union bpf_attr attr;
int ret;
if (!OPTS_VALID(opts, bpf_test_run_opts))
return libbpf_err(-EINVAL);
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.test.prog_fd = prog_fd;
attr.test.batch_size = OPTS_GET(opts, batch_size, 0);
attr.test.cpu = OPTS_GET(opts, cpu, 0);
@@ -874,7 +889,7 @@ int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts)
attr.test.data_in = ptr_to_u64(OPTS_GET(opts, data_in, NULL));
attr.test.data_out = ptr_to_u64(OPTS_GET(opts, data_out, NULL));
- ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, attr_sz);
OPTS_SET(opts, data_size_out, attr.test.data_size_out);
OPTS_SET(opts, ctx_size_out, attr.test.ctx_size_out);
@@ -886,13 +901,14 @@ int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts)
static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
union bpf_attr attr;
int err;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.start_id = start_id;
- err = sys_bpf(cmd, &attr, sizeof(attr));
+ err = sys_bpf(cmd, &attr, attr_sz);
if (!err)
*next_id = attr.next_id;
@@ -921,80 +937,84 @@ int bpf_link_get_next_id(__u32 start_id, __u32 *next_id)
int bpf_prog_get_fd_by_id(__u32 id)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
union bpf_attr attr;
int fd;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.prog_id = id;
- fd = sys_bpf_fd(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
+ fd = sys_bpf_fd(BPF_PROG_GET_FD_BY_ID, &attr, attr_sz);
return libbpf_err_errno(fd);
}
int bpf_map_get_fd_by_id(__u32 id)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
union bpf_attr attr;
int fd;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.map_id = id;
- fd = sys_bpf_fd(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
+ fd = sys_bpf_fd(BPF_MAP_GET_FD_BY_ID, &attr, attr_sz);
return libbpf_err_errno(fd);
}
int bpf_btf_get_fd_by_id(__u32 id)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
union bpf_attr attr;
int fd;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.btf_id = id;
- fd = sys_bpf_fd(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
+ fd = sys_bpf_fd(BPF_BTF_GET_FD_BY_ID, &attr, attr_sz);
return libbpf_err_errno(fd);
}
int bpf_link_get_fd_by_id(__u32 id)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
union bpf_attr attr;
int fd;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.link_id = id;
- fd = sys_bpf_fd(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr));
+ fd = sys_bpf_fd(BPF_LINK_GET_FD_BY_ID, &attr, attr_sz);
return libbpf_err_errno(fd);
}
int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, info);
union bpf_attr attr;
int err;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.info.bpf_fd = bpf_fd;
attr.info.info_len = *info_len;
attr.info.info = ptr_to_u64(info);
- err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
-
+ err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, attr_sz);
if (!err)
*info_len = attr.info.info_len;
-
return libbpf_err_errno(err);
}
int bpf_raw_tracepoint_open(const char *name, int prog_fd)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, raw_tracepoint);
union bpf_attr attr;
int fd;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.raw_tracepoint.name = ptr_to_u64(name);
attr.raw_tracepoint.prog_fd = prog_fd;
- fd = sys_bpf_fd(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
+ fd = sys_bpf_fd(BPF_RAW_TRACEPOINT_OPEN, &attr, attr_sz);
return libbpf_err_errno(fd);
}
@@ -1050,16 +1070,18 @@ int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
__u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
__u64 *probe_addr)
{
- union bpf_attr attr = {};
+ const size_t attr_sz = offsetofend(union bpf_attr, task_fd_query);
+ union bpf_attr attr;
int err;
+ memset(&attr, 0, attr_sz);
attr.task_fd_query.pid = pid;
attr.task_fd_query.fd = fd;
attr.task_fd_query.flags = flags;
attr.task_fd_query.buf = ptr_to_u64(buf);
attr.task_fd_query.buf_len = *buf_len;
- err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr));
+ err = sys_bpf(BPF_TASK_FD_QUERY, &attr, attr_sz);
*buf_len = attr.task_fd_query.buf_len;
*prog_id = attr.task_fd_query.prog_id;
@@ -1072,30 +1094,32 @@ int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
int bpf_enable_stats(enum bpf_stats_type type)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, enable_stats);
union bpf_attr attr;
int fd;
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.enable_stats.type = type;
- fd = sys_bpf_fd(BPF_ENABLE_STATS, &attr, sizeof(attr));
+ fd = sys_bpf_fd(BPF_ENABLE_STATS, &attr, attr_sz);
return libbpf_err_errno(fd);
}
int bpf_prog_bind_map(int prog_fd, int map_fd,
const struct bpf_prog_bind_opts *opts)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, prog_bind_map);
union bpf_attr attr;
int ret;
if (!OPTS_VALID(opts, bpf_prog_bind_opts))
return libbpf_err(-EINVAL);
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, attr_sz);
attr.prog_bind_map.prog_fd = prog_fd;
attr.prog_bind_map.map_fd = map_fd;
attr.prog_bind_map.flags = OPTS_GET(opts, flags, 0);
- ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, sizeof(attr));
+ ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, attr_sz);
return libbpf_err_errno(ret);
}
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index 43ca3aff2292..5fdb93da423b 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -426,7 +426,7 @@ struct pt_regs;
*/
#define BPF_PROG(name, args...) \
name(unsigned long long *ctx); \
-static __attribute__((always_inline)) typeof(name(0)) \
+static __always_inline typeof(name(0)) \
____##name(unsigned long long *ctx, ##args); \
typeof(name(0)) name(unsigned long long *ctx) \
{ \
@@ -435,7 +435,7 @@ typeof(name(0)) name(unsigned long long *ctx) \
return ____##name(___bpf_ctx_cast(args)); \
_Pragma("GCC diagnostic pop") \
} \
-static __attribute__((always_inline)) typeof(name(0)) \
+static __always_inline typeof(name(0)) \
____##name(unsigned long long *ctx, ##args)
struct pt_regs;
@@ -460,7 +460,7 @@ struct pt_regs;
*/
#define BPF_KPROBE(name, args...) \
name(struct pt_regs *ctx); \
-static __attribute__((always_inline)) typeof(name(0)) \
+static __always_inline typeof(name(0)) \
____##name(struct pt_regs *ctx, ##args); \
typeof(name(0)) name(struct pt_regs *ctx) \
{ \
@@ -469,7 +469,7 @@ typeof(name(0)) name(struct pt_regs *ctx) \
return ____##name(___bpf_kprobe_args(args)); \
_Pragma("GCC diagnostic pop") \
} \
-static __attribute__((always_inline)) typeof(name(0)) \
+static __always_inline typeof(name(0)) \
____##name(struct pt_regs *ctx, ##args)
#define ___bpf_kretprobe_args0() ctx
@@ -484,7 +484,7 @@ ____##name(struct pt_regs *ctx, ##args)
*/
#define BPF_KRETPROBE(name, args...) \
name(struct pt_regs *ctx); \
-static __attribute__((always_inline)) typeof(name(0)) \
+static __always_inline typeof(name(0)) \
____##name(struct pt_regs *ctx, ##args); \
typeof(name(0)) name(struct pt_regs *ctx) \
{ \
@@ -540,7 +540,7 @@ static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
#define BPF_KSYSCALL(name, args...) \
name(struct pt_regs *ctx); \
extern _Bool LINUX_HAS_SYSCALL_WRAPPER __kconfig; \
-static __attribute__((always_inline)) typeof(name(0)) \
+static __always_inline typeof(name(0)) \
____##name(struct pt_regs *ctx, ##args); \
typeof(name(0)) name(struct pt_regs *ctx) \
{ \
@@ -555,7 +555,7 @@ typeof(name(0)) name(struct pt_regs *ctx) \
return ____##name(___bpf_syscall_args(args)); \
_Pragma("GCC diagnostic pop") \
} \
-static __attribute__((always_inline)) typeof(name(0)) \
+static __always_inline typeof(name(0)) \
____##name(struct pt_regs *ctx, ##args)
#define BPF_KPROBE_SYSCALL BPF_KSYSCALL
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 2d14f1a52d7a..361131518d63 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1225,8 +1225,6 @@ int btf__load_into_kernel(struct btf *btf)
return btf_load_into_kernel(btf, NULL, 0, 0);
}
-int btf__load(struct btf *) __attribute__((alias("btf__load_into_kernel")));
-
int btf__fd(const struct btf *btf)
{
return btf->fd;
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 583760df83b4..ae543144ee30 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -116,7 +116,6 @@ LIBBPF_API struct btf *btf__parse_raw_split(const char *path, struct btf *base_b
LIBBPF_API struct btf *btf__load_vmlinux_btf(void);
LIBBPF_API struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_btf);
-LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
LIBBPF_API struct btf *btf__load_from_kernel_by_id(__u32 id);
LIBBPF_API struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 50d41815f431..3ad139285fad 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -223,13 +223,18 @@ __printf(2, 3)
void libbpf_print(enum libbpf_print_level level, const char *format, ...)
{
va_list args;
+ int old_errno;
if (!__libbpf_pr)
return;
+ old_errno = errno;
+
va_start(args, format);
__libbpf_pr(level, format, args);
va_end(args);
+
+ errno = old_errno;
}
static void pr_perm_msg(int err)
@@ -412,6 +417,7 @@ struct bpf_program {
int fd;
bool autoload;
+ bool autoattach;
bool mark_btf_static;
enum bpf_prog_type type;
enum bpf_attach_type expected_attach_type;
@@ -591,7 +597,6 @@ struct elf_state {
size_t strtabidx;
struct elf_sec_desc *secs;
int sec_cnt;
- int maps_shndx;
int btf_maps_shndx;
__u32 btf_maps_sec_btf_id;
int text_shndx;
@@ -751,6 +756,8 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
prog->autoload = true;
}
+ prog->autoattach = true;
+
/* inherit object's log_level */
prog->log_level = obj->log_level;
@@ -1272,7 +1279,6 @@ static struct bpf_object *bpf_object__new(const char *path,
*/
obj->efile.obj_buf = obj_buf;
obj->efile.obj_buf_sz = obj_buf_sz;
- obj->efile.maps_shndx = -1;
obj->efile.btf_maps_shndx = -1;
obj->efile.st_ops_shndx = -1;
obj->kconfig_map_idx = -1;
@@ -1642,6 +1648,10 @@ static int bpf_object__init_global_data_maps(struct bpf_object *obj)
for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) {
sec_desc = &obj->efile.secs[sec_idx];
+ /* Skip recognized sections with size 0. */
+ if (!sec_desc->data || sec_desc->data->d_size == 0)
+ continue;
+
switch (sec_desc->sec_type) {
case SEC_DATA:
sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
@@ -3359,7 +3369,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
if (err)
return err;
} else if (strcmp(name, "maps") == 0) {
- obj->efile.maps_shndx = idx;
+ pr_warn("elf: legacy map definitions in 'maps' section are not supported by libbpf v1.0+\n");
+ return -ENOTSUP;
} else if (strcmp(name, MAPS_ELF_SEC) == 0) {
obj->efile.btf_maps_shndx = idx;
} else if (strcmp(name, BTF_ELF_SEC) == 0) {
@@ -3891,8 +3902,7 @@ static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
int shndx)
{
- return shndx == obj->efile.maps_shndx ||
- shndx == obj->efile.btf_maps_shndx;
+ return shndx == obj->efile.btf_maps_shndx;
}
static enum libbpf_map_type
@@ -4277,11 +4287,12 @@ int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate)
int bpf_map__reuse_fd(struct bpf_map *map, int fd)
{
- struct bpf_map_info info = {};
+ struct bpf_map_info info;
__u32 len = sizeof(info), name_len;
int new_fd, err;
char *new_name;
+ memset(&info, 0, len);
err = bpf_obj_get_info_by_fd(fd, &info, &len);
if (err && errno == EINVAL)
err = bpf_get_map_info_from_fdinfo(fd, &info);
@@ -4408,14 +4419,23 @@ static int probe_fd(int fd)
static int probe_kern_prog_name(void)
{
+ const size_t attr_sz = offsetofend(union bpf_attr, prog_name);
struct bpf_insn insns[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
};
- int ret, insn_cnt = ARRAY_SIZE(insns);
+ union bpf_attr attr;
+ int ret;
+
+ memset(&attr, 0, attr_sz);
+ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ attr.license = ptr_to_u64("GPL");
+ attr.insns = ptr_to_u64(insns);
+ attr.insn_cnt = (__u32)ARRAY_SIZE(insns);
+ libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name));
/* make sure loading with name works */
- ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "test", "GPL", insns, insn_cnt, NULL);
+ ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS);
return probe_fd(ret);
}
@@ -4430,7 +4450,7 @@ static int probe_kern_global_data(void)
};
int ret, map, insn_cnt = ARRAY_SIZE(insns);
- map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL);
+ map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, NULL);
if (map < 0) {
ret = -errno;
cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
@@ -4563,7 +4583,7 @@ static int probe_kern_array_mmap(void)
LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE);
int fd;
- fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), sizeof(int), 1, &opts);
+ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts);
return probe_fd(fd);
}
@@ -4610,7 +4630,7 @@ static int probe_prog_bind_map(void)
};
int ret, map, prog, insn_cnt = ARRAY_SIZE(insns);
- map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL);
+ map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, NULL);
if (map < 0) {
ret = -errno;
cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
@@ -4814,13 +4834,12 @@ bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
{
- struct bpf_map_info map_info = {};
+ struct bpf_map_info map_info;
char msg[STRERR_BUFSIZE];
- __u32 map_info_len;
+ __u32 map_info_len = sizeof(map_info);
int err;
- map_info_len = sizeof(map_info);
-
+ memset(&map_info, 0, map_info_len);
err = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len);
if (err && errno == EINVAL)
err = bpf_get_map_info_from_fdinfo(map_fd, &map_info);
@@ -7244,8 +7263,6 @@ static int bpf_object_unload(struct bpf_object *obj)
return 0;
}
-int bpf_object__unload(struct bpf_object *obj) __attribute__((alias("bpf_object_unload")));
-
static int bpf_object__sanitize_maps(struct bpf_object *obj)
{
struct bpf_map *m;
@@ -8298,6 +8315,16 @@ int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
return 0;
}
+bool bpf_program__autoattach(const struct bpf_program *prog)
+{
+ return prog->autoattach;
+}
+
+void bpf_program__set_autoattach(struct bpf_program *prog, bool autoattach)
+{
+ prog->autoattach = autoattach;
+}
+
const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog)
{
return prog->insns;
@@ -8978,11 +9005,12 @@ int libbpf_find_vmlinux_btf_id(const char *name,
static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
{
- struct bpf_prog_info info = {};
+ struct bpf_prog_info info;
__u32 info_len = sizeof(info);
struct btf *btf;
int err;
+ memset(&info, 0, info_len);
err = bpf_obj_get_info_by_fd(attach_prog_fd, &info, &info_len);
if (err) {
pr_warn("failed bpf_obj_get_info_by_fd for FD %d: %d\n",
@@ -9810,13 +9838,16 @@ static int determine_uprobe_retprobe_bit(void)
static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
uint64_t offset, int pid, size_t ref_ctr_off)
{
- struct perf_event_attr attr = {};
+ const size_t attr_sz = sizeof(struct perf_event_attr);
+ struct perf_event_attr attr;
char errmsg[STRERR_BUFSIZE];
int type, pfd;
if (ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS))
return -EINVAL;
+ memset(&attr, 0, attr_sz);
+
type = uprobe ? determine_uprobe_perf_type()
: determine_kprobe_perf_type();
if (type < 0) {
@@ -9837,7 +9868,7 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
}
attr.config |= 1 << bit;
}
- attr.size = sizeof(attr);
+ attr.size = attr_sz;
attr.type = type;
attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
@@ -9936,7 +9967,8 @@ static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retpro
static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe,
const char *kfunc_name, size_t offset, int pid)
{
- struct perf_event_attr attr = {};
+ const size_t attr_sz = sizeof(struct perf_event_attr);
+ struct perf_event_attr attr;
char errmsg[STRERR_BUFSIZE];
int type, pfd, err;
@@ -9955,7 +9987,9 @@ static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe,
libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
goto err_clean_legacy;
}
- attr.size = sizeof(attr);
+
+ memset(&attr, 0, attr_sz);
+ attr.size = attr_sz;
attr.config = type;
attr.type = PERF_TYPE_TRACEPOINT;
@@ -10412,6 +10446,7 @@ static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retpro
static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe,
const char *binary_path, size_t offset, int pid)
{
+ const size_t attr_sz = sizeof(struct perf_event_attr);
struct perf_event_attr attr;
int type, pfd, err;
@@ -10429,8 +10464,8 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe,
goto err_clean_legacy;
}
- memset(&attr, 0, sizeof(attr));
- attr.size = sizeof(attr);
+ memset(&attr, 0, attr_sz);
+ attr.size = attr_sz;
attr.config = type;
attr.type = PERF_TYPE_TRACEPOINT;
@@ -10662,15 +10697,17 @@ static const char *arch_specific_lib_paths(void)
static int resolve_full_path(const char *file, char *result, size_t result_sz)
{
const char *search_paths[3] = {};
- int i;
+ int i, perm;
if (str_has_sfx(file, ".so") || strstr(file, ".so.")) {
search_paths[0] = getenv("LD_LIBRARY_PATH");
search_paths[1] = "/usr/lib64:/usr/lib";
search_paths[2] = arch_specific_lib_paths();
+ perm = R_OK;
} else {
search_paths[0] = getenv("PATH");
search_paths[1] = "/usr/bin:/usr/sbin";
+ perm = R_OK | X_OK;
}
for (i = 0; i < ARRAY_SIZE(search_paths); i++) {
@@ -10689,8 +10726,8 @@ static int resolve_full_path(const char *file, char *result, size_t result_sz)
if (!seg_len)
continue;
snprintf(result, result_sz, "%.*s/%s", seg_len, s, file);
- /* ensure it is an executable file/link */
- if (access(result, R_OK | X_OK) < 0)
+ /* ensure it has required permissions */
+ if (access(result, perm) < 0)
continue;
pr_debug("resolved '%s' to '%s'\n", file, result);
return 0;
@@ -10967,7 +11004,8 @@ static int determine_tracepoint_id(const char *tp_category,
static int perf_event_open_tracepoint(const char *tp_category,
const char *tp_name)
{
- struct perf_event_attr attr = {};
+ const size_t attr_sz = sizeof(struct perf_event_attr);
+ struct perf_event_attr attr;
char errmsg[STRERR_BUFSIZE];
int tp_id, pfd, err;
@@ -10979,8 +11017,9 @@ static int perf_event_open_tracepoint(const char *tp_category,
return tp_id;
}
+ memset(&attr, 0, attr_sz);
attr.type = PERF_TYPE_TRACEPOINT;
- attr.size = sizeof(attr);
+ attr.size = attr_sz;
attr.config = tp_id;
pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
@@ -11600,12 +11639,15 @@ struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
void *ctx,
const struct perf_buffer_opts *opts)
{
+ const size_t attr_sz = sizeof(struct perf_event_attr);
struct perf_buffer_params p = {};
- struct perf_event_attr attr = {};
+ struct perf_event_attr attr;
if (!OPTS_VALID(opts, perf_buffer_opts))
return libbpf_err_ptr(-EINVAL);
+ memset(&attr, 0, attr_sz);
+ attr.size = attr_sz;
attr.config = PERF_COUNT_SW_BPF_OUTPUT;
attr.type = PERF_TYPE_SOFTWARE;
attr.sample_type = PERF_SAMPLE_RAW;
@@ -12328,7 +12370,7 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
struct bpf_program *prog = *s->progs[i].prog;
struct bpf_link **link = s->progs[i].link;
- if (!prog->autoload)
+ if (!prog->autoload || !prog->autoattach)
continue;
/* auto-attaching not supported for this program */
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 61493c4cddac..88a1ac34b12a 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -260,6 +260,8 @@ LIBBPF_API const char *bpf_program__name(const struct bpf_program *prog);
LIBBPF_API const char *bpf_program__section_name(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__autoload(const struct bpf_program *prog);
LIBBPF_API int bpf_program__set_autoload(struct bpf_program *prog, bool autoload);
+LIBBPF_API bool bpf_program__autoattach(const struct bpf_program *prog);
+LIBBPF_API void bpf_program__set_autoattach(struct bpf_program *prog, bool autoattach);
struct bpf_insn;
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 119e6e1ea7f1..2b928dc21af0 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -358,6 +358,8 @@ LIBBPF_1.0.0 {
bpf_obj_get_opts;
bpf_prog_query_opts;
bpf_program__attach_ksyscall;
+ bpf_program__autoattach;
+ bpf_program__set_autoattach;
btf__add_enum64;
btf__add_enum64_value;
libbpf_bpf_attach_type_str;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 4135ae0a2bc3..377642ff51fc 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -573,4 +573,7 @@ static inline bool is_pow_of_2(size_t x)
return x && (x & (x - 1)) == 0;
}
+#define PROG_LOAD_ATTEMPTS 5
+int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts);
+
#endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h
index 5b7e0155db6a..1e1be467bede 100644
--- a/tools/lib/bpf/libbpf_legacy.h
+++ b/tools/lib/bpf/libbpf_legacy.h
@@ -125,6 +125,8 @@ struct bpf_map;
struct btf;
struct btf_ext;
+LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
+
LIBBPF_API enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog);
LIBBPF_API enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog);
LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map);
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 0b5398786bf3..6d495656f554 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -193,7 +193,7 @@ static int probe_map_create(enum bpf_map_type map_type)
LIBBPF_OPTS(bpf_map_create_opts, opts);
int key_size, value_size, max_entries;
__u32 btf_key_type_id = 0, btf_value_type_id = 0;
- int fd = -1, btf_fd = -1, fd_inner = -1, exp_err = 0, err;
+ int fd = -1, btf_fd = -1, fd_inner = -1, exp_err = 0, err = 0;
key_size = sizeof(__u32);
value_size = sizeof(__u32);
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index 6c013168032d..35104580870c 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -587,11 +587,12 @@ static int get_tc_info(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn,
static int tc_add_fd_and_name(struct libbpf_nla_req *req, int fd)
{
- struct bpf_prog_info info = {};
+ struct bpf_prog_info info;
__u32 info_len = sizeof(info);
char name[256];
int len, ret;
+ memset(&info, 0, info_len);
ret = bpf_obj_get_info_by_fd(fd, &info, &info_len);
if (ret < 0)
return ret;
diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h
index 70adf7b119b9..00c5f94b43be 100644
--- a/tools/lib/bpf/skel_internal.h
+++ b/tools/lib/bpf/skel_internal.h
@@ -285,6 +285,8 @@ static inline int skel_link_create(int prog_fd, int target_fd,
static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
{
+ const size_t prog_load_attr_sz = offsetofend(union bpf_attr, fd_array);
+ const size_t test_run_attr_sz = offsetofend(union bpf_attr, test);
int map_fd = -1, prog_fd = -1, key = 0, err;
union bpf_attr attr;
@@ -302,7 +304,7 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
goto out;
}
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, prog_load_attr_sz);
attr.prog_type = BPF_PROG_TYPE_SYSCALL;
attr.insns = (long) opts->insns;
attr.insn_cnt = opts->insns_sz / sizeof(struct bpf_insn);
@@ -313,18 +315,18 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
attr.log_size = opts->ctx->log_size;
attr.log_buf = opts->ctx->log_buf;
attr.prog_flags = BPF_F_SLEEPABLE;
- err = prog_fd = skel_sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+ err = prog_fd = skel_sys_bpf(BPF_PROG_LOAD, &attr, prog_load_attr_sz);
if (prog_fd < 0) {
opts->errstr = "failed to load loader prog";
set_err;
goto out;
}
- memset(&attr, 0, sizeof(attr));
+ memset(&attr, 0, test_run_attr_sz);
attr.test.prog_fd = prog_fd;
attr.test.ctx_in = (long) opts->ctx;
attr.test.ctx_size_in = opts->ctx->sz;
- err = skel_sys_bpf(BPF_PROG_RUN, &attr, sizeof(attr));
+ err = skel_sys_bpf(BPF_PROG_RUN, &attr, test_run_attr_sz);
if (err < 0 || (int)attr.test.retval < 0) {
opts->errstr = "failed to execute loader prog";
if (err < 0) {
diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h
index 4f2adc0bd6ca..fdfd235e52c4 100644
--- a/tools/lib/bpf/usdt.bpf.h
+++ b/tools/lib/bpf/usdt.bpf.h
@@ -232,7 +232,7 @@ long bpf_usdt_cookie(struct pt_regs *ctx)
*/
#define BPF_USDT(name, args...) \
name(struct pt_regs *ctx); \
-static __attribute__((always_inline)) typeof(name(0)) \
+static __always_inline typeof(name(0)) \
____##name(struct pt_regs *ctx, ##args); \
typeof(name(0)) name(struct pt_regs *ctx) \
{ \
@@ -241,7 +241,7 @@ typeof(name(0)) name(struct pt_regs *ctx) \
return ____##name(___bpf_usdt_args(args)); \
_Pragma("GCC diagnostic pop") \
} \
-static __attribute__((always_inline)) typeof(name(0)) \
+static __always_inline typeof(name(0)) \
____##name(struct pt_regs *ctx, ##args)
#endif /* __USDT_BPF_H__ */
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index e33cab34d22f..9d8de15e725e 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -43,7 +43,7 @@ test_bpffs # bpffs test failed 255
test_bprm_opts # failed to auto-attach program 'secure_exec': -524 (trampoline)
test_ima # failed to auto-attach program 'ima': -524 (trampoline)
test_local_storage # failed to auto-attach program 'unlink_hook': -524 (trampoline)
-test_lsm # failed to find kernel BTF type ID of '__x64_sys_setdomainname': -3 (?)
+test_lsm # attach unexpected error: -524 (trampoline)
test_overhead # attach_fentry unexpected error: -524 (trampoline)
test_profiler # unknown func bpf_probe_read_str#45 (overlapping)
timer # failed to auto-attach program 'test1': -524 (trampoline)
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index fabf0c014349..3fc46f9cfb22 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -50,9 +50,11 @@ CONFIG_NET_SCHED=y
CONFIG_NETDEVSIM=m
CONFIG_NETFILTER=y
CONFIG_NETFILTER_SYNPROXY=y
+CONFIG_NETFILTER_XT_CONNMARK=y
CONFIG_NETFILTER_XT_MATCH_STATE=y
CONFIG_NETFILTER_XT_TARGET_CT=y
CONFIG_NF_CONNTRACK=y
+CONFIG_NF_CONNTRACK_MARK=y
CONFIG_NF_DEFRAG_IPV4=y
CONFIG_NF_DEFRAG_IPV6=y
CONFIG_RC_CORE=y
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index 0b899d2d8ea7..9566d9d2f6ee 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -6,19 +6,19 @@
volatile unsigned short uprobe_ref_ctr __attribute__((unused)) __attribute((section(".probes")));
/* uprobe attach point */
-static void trigger_func(void)
+static noinline void trigger_func(void)
{
asm volatile ("");
}
/* attach point for byname uprobe */
-static void trigger_func2(void)
+static noinline void trigger_func2(void)
{
asm volatile ("");
}
/* attach point for byname sleepable uprobe */
-static void trigger_func3(void)
+static noinline void trigger_func3(void)
{
asm volatile ("");
}
diff --git a/tools/testing/selftests/bpf/prog_tests/autoattach.c b/tools/testing/selftests/bpf/prog_tests/autoattach.c
new file mode 100644
index 000000000000..dc5e01d279bd
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/autoattach.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Google */
+
+#include <test_progs.h>
+#include "test_autoattach.skel.h"
+
+void test_autoattach(void)
+{
+ struct test_autoattach *skel;
+
+ skel = test_autoattach__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ goto cleanup;
+
+ /* disable auto-attach for prog2 */
+ bpf_program__set_autoattach(skel->progs.prog2, false);
+ ASSERT_TRUE(bpf_program__autoattach(skel->progs.prog1), "autoattach_prog1");
+ ASSERT_FALSE(bpf_program__autoattach(skel->progs.prog2), "autoattach_prog2");
+ if (!ASSERT_OK(test_autoattach__attach(skel), "skel_attach"))
+ goto cleanup;
+
+ usleep(1);
+
+ ASSERT_TRUE(skel->bss->prog1_called, "attached_prog1");
+ ASSERT_FALSE(skel->bss->prog2_called, "attached_prog2");
+
+cleanup:
+ test_autoattach__destroy(skel);
+}
+
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
index 2974b44f80fa..2be2d61954bc 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
@@ -13,7 +13,7 @@
#include "kprobe_multi.skel.h"
/* uprobe attach point */
-static void trigger_func(void)
+static noinline void trigger_func(void)
{
asm volatile ("");
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
index 7a74a1579076..544bf90ac2a7 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -24,10 +24,34 @@ enum {
TEST_TC_BPF,
};
+#define TIMEOUT_MS 3000
+
+static int connect_to_server(int srv_fd)
+{
+ int fd = -1;
+
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (!ASSERT_GE(fd, 0, "socket"))
+ goto out;
+
+ if (!ASSERT_EQ(connect_fd_to_fd(fd, srv_fd, TIMEOUT_MS), 0, "connect_fd_to_fd")) {
+ close(fd);
+ fd = -1;
+ }
+out:
+ return fd;
+}
+
static void test_bpf_nf_ct(int mode)
{
+ const char *iptables = "iptables -t raw %s PREROUTING -j CONNMARK --set-mark 42/0";
+ int srv_fd = -1, client_fd = -1, srv_client_fd = -1;
+ struct sockaddr_in peer_addr = {};
struct test_bpf_nf *skel;
int prog_fd, err;
+ socklen_t len;
+ u16 srv_port;
+ char cmd[64];
LIBBPF_OPTS(bpf_test_run_opts, topts,
.data_in = &pkt_v4,
.data_size_in = sizeof(pkt_v4),
@@ -38,6 +62,32 @@ static void test_bpf_nf_ct(int mode)
if (!ASSERT_OK_PTR(skel, "test_bpf_nf__open_and_load"))
return;
+ /* Enable connection tracking */
+ snprintf(cmd, sizeof(cmd), iptables, "-A");
+ if (!ASSERT_OK(system(cmd), "iptables"))
+ goto end;
+
+ srv_port = (mode == TEST_XDP) ? 5005 : 5006;
+ srv_fd = start_server(AF_INET, SOCK_STREAM, "127.0.0.1", srv_port, TIMEOUT_MS);
+ if (!ASSERT_GE(srv_fd, 0, "start_server"))
+ goto end;
+
+ client_fd = connect_to_server(srv_fd);
+ if (!ASSERT_GE(client_fd, 0, "connect_to_server"))
+ goto end;
+
+ len = sizeof(peer_addr);
+ srv_client_fd = accept(srv_fd, (struct sockaddr *)&peer_addr, &len);
+ if (!ASSERT_GE(srv_client_fd, 0, "accept"))
+ goto end;
+ if (!ASSERT_EQ(len, sizeof(struct sockaddr_in), "sockaddr len"))
+ goto end;
+
+ skel->bss->saddr = peer_addr.sin_addr.s_addr;
+ skel->bss->sport = peer_addr.sin_port;
+ skel->bss->daddr = peer_addr.sin_addr.s_addr;
+ skel->bss->dport = htons(srv_port);
+
if (mode == TEST_XDP)
prog_fd = bpf_program__fd(skel->progs.nf_xdp_ct_test);
else
@@ -63,7 +113,17 @@ static void test_bpf_nf_ct(int mode)
ASSERT_LE(skel->bss->test_delta_timeout, 10, "Test for max ct timeout update");
/* expected status is IPS_SEEN_REPLY */
ASSERT_EQ(skel->bss->test_status, 2, "Test for ct status update ");
+ ASSERT_EQ(skel->data->test_exist_lookup, 0, "Test existing connection lookup");
+ ASSERT_EQ(skel->bss->test_exist_lookup_mark, 43, "Test existing connection lookup ctmark");
end:
+ if (srv_client_fd != -1)
+ close(srv_client_fd);
+ if (client_fd != -1)
+ close(client_fd);
+ if (srv_fd != -1)
+ close(srv_fd);
+ snprintf(cmd, sizeof(cmd), iptables, "-D");
+ system(cmd);
test_bpf_nf__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
index 3c7aa82b98e2..bcf80b9f7c27 100644
--- a/tools/testing/selftests/bpf/prog_tests/dynptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -22,7 +22,8 @@ static struct {
{"add_dynptr_to_map2", "invalid indirect read from stack"},
{"data_slice_out_of_bounds_ringbuf", "value is outside of the allowed memory range"},
{"data_slice_out_of_bounds_map_value", "value is outside of the allowed memory range"},
- {"data_slice_use_after_release", "invalid mem access 'scalar'"},
+ {"data_slice_use_after_release1", "invalid mem access 'scalar'"},
+ {"data_slice_use_after_release2", "invalid mem access 'scalar'"},
{"data_slice_missing_null_check1", "invalid mem access 'mem_or_null'"},
{"data_slice_missing_null_check2", "invalid mem access 'mem_or_null'"},
{"invalid_helper1", "invalid indirect read from stack"},
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
index c00eb974eb85..351fafa006fb 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -5,6 +5,9 @@
#include "kfunc_call_test.lskel.h"
#include "kfunc_call_test_subprog.skel.h"
#include "kfunc_call_test_subprog.lskel.h"
+#include "kfunc_call_destructive.skel.h"
+
+#include "cap_helpers.h"
static void test_main(void)
{
@@ -86,6 +89,36 @@ static void test_subprog_lskel(void)
kfunc_call_test_subprog_lskel__destroy(skel);
}
+static int test_destructive_open_and_load(void)
+{
+ struct kfunc_call_destructive *skel;
+ int err;
+
+ skel = kfunc_call_destructive__open();
+ if (!ASSERT_OK_PTR(skel, "prog_open"))
+ return -1;
+
+ err = kfunc_call_destructive__load(skel);
+
+ kfunc_call_destructive__destroy(skel);
+
+ return err;
+}
+
+static void test_destructive(void)
+{
+ __u64 save_caps = 0;
+
+ ASSERT_OK(test_destructive_open_and_load(), "succesful_load");
+
+ if (!ASSERT_OK(cap_disable_effective(1ULL << CAP_SYS_BOOT, &save_caps), "drop_caps"))
+ return;
+
+ ASSERT_EQ(test_destructive_open_and_load(), -13, "no_caps_failure");
+
+ cap_enable_effective(save_caps, NULL);
+}
+
void test_kfunc_call(void)
{
if (test__start_subtest("main"))
@@ -96,4 +129,7 @@ void test_kfunc_call(void)
if (test__start_subtest("subprog_lskel"))
test_subprog_lskel();
+
+ if (test__start_subtest("destructive"))
+ test_destructive();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
index 61935e7e056a..f000734a3d1f 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
@@ -4,7 +4,7 @@
#include "test_task_pt_regs.skel.h"
/* uprobe attach point */
-static void trigger_func(void)
+static noinline void trigger_func(void)
{
asm volatile ("");
}
diff --git a/tools/testing/selftests/bpf/prog_tests/time_tai.c b/tools/testing/selftests/bpf/prog_tests/time_tai.c
new file mode 100644
index 000000000000..a31119823666
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/time_tai.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2022 Linutronix GmbH */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "test_time_tai.skel.h"
+
+#include <time.h>
+#include <stdint.h>
+
+#define TAI_THRESHOLD 1000000000ULL /* 1s */
+#define NSEC_PER_SEC 1000000000ULL
+
+static __u64 ts_to_ns(const struct timespec *ts)
+{
+ return ts->tv_sec * NSEC_PER_SEC + ts->tv_nsec;
+}
+
+void test_time_tai(void)
+{
+ struct __sk_buff skb = {
+ .cb[0] = 0,
+ .cb[1] = 0,
+ .tstamp = 0,
+ };
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .ctx_in = &skb,
+ .ctx_size_in = sizeof(skb),
+ .ctx_out = &skb,
+ .ctx_size_out = sizeof(skb),
+ );
+ struct test_time_tai *skel;
+ struct timespec now_tai;
+ __u64 ts1, ts2, now;
+ int ret, prog_fd;
+
+ /* Open and load */
+ skel = test_time_tai__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "tai_open"))
+ return;
+
+ /* Run test program */
+ prog_fd = bpf_program__fd(skel->progs.time_tai);
+ ret = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(ret, "test_run");
+
+ /* Retrieve generated TAI timestamps */
+ ts1 = skb.tstamp;
+ ts2 = skb.cb[0] | ((__u64)skb.cb[1] << 32);
+
+ /* TAI != 0 */
+ ASSERT_NEQ(ts1, 0, "tai_ts1");
+ ASSERT_NEQ(ts2, 0, "tai_ts2");
+
+ /* TAI is moving forward only */
+ ASSERT_GT(ts2, ts1, "tai_forward");
+
+ /* Check for future */
+ ret = clock_gettime(CLOCK_TAI, &now_tai);
+ ASSERT_EQ(ret, 0, "tai_gettime");
+ now = ts_to_ns(&now_tai);
+
+ ASSERT_TRUE(now > ts1, "tai_future_ts1");
+ ASSERT_TRUE(now > ts2, "tai_future_ts2");
+
+ /* Check for reasonable range */
+ ASSERT_TRUE(now - ts1 < TAI_THRESHOLD, "tai_range_ts1");
+ ASSERT_TRUE(now - ts2 < TAI_THRESHOLD, "tai_range_ts2");
+
+ test_time_tai__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index 0a26c243e6e9..b0f08ff024fb 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -65,7 +65,7 @@ static int get_map_val_dynptr(struct bpf_dynptr *ptr)
/* Every bpf_ringbuf_reserve_dynptr call must have a corresponding
* bpf_ringbuf_submit/discard_dynptr call
*/
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
int ringbuf_missing_release1(void *ctx)
{
struct bpf_dynptr ptr;
@@ -77,7 +77,7 @@ int ringbuf_missing_release1(void *ctx)
return 0;
}
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
int ringbuf_missing_release2(void *ctx)
{
struct bpf_dynptr ptr1, ptr2;
@@ -112,7 +112,7 @@ static int missing_release_callback_fn(__u32 index, void *data)
}
/* Any dynptr initialized within a callback must have bpf_dynptr_put called */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
int ringbuf_missing_release_callback(void *ctx)
{
bpf_loop(10, missing_release_callback_fn, NULL, 0);
@@ -120,7 +120,7 @@ int ringbuf_missing_release_callback(void *ctx)
}
/* Can't call bpf_ringbuf_submit/discard_dynptr on a non-initialized dynptr */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
int ringbuf_release_uninit_dynptr(void *ctx)
{
struct bpf_dynptr ptr;
@@ -132,7 +132,7 @@ int ringbuf_release_uninit_dynptr(void *ctx)
}
/* A dynptr can't be used after it has been invalidated */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
int use_after_invalid(void *ctx)
{
struct bpf_dynptr ptr;
@@ -151,7 +151,7 @@ int use_after_invalid(void *ctx)
}
/* Can't call non-dynptr ringbuf APIs on a dynptr ringbuf sample */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
int ringbuf_invalid_api(void *ctx)
{
struct bpf_dynptr ptr;
@@ -173,7 +173,7 @@ done:
}
/* Can't add a dynptr to a map */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
int add_dynptr_to_map1(void *ctx)
{
struct bpf_dynptr ptr;
@@ -190,7 +190,7 @@ int add_dynptr_to_map1(void *ctx)
}
/* Can't add a struct with an embedded dynptr to a map */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
int add_dynptr_to_map2(void *ctx)
{
struct test_info x;
@@ -207,7 +207,7 @@ int add_dynptr_to_map2(void *ctx)
}
/* A data slice can't be accessed out of bounds */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
int data_slice_out_of_bounds_ringbuf(void *ctx)
{
struct bpf_dynptr ptr;
@@ -227,7 +227,7 @@ done:
return 0;
}
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
int data_slice_out_of_bounds_map_value(void *ctx)
{
__u32 key = 0, map_val;
@@ -247,8 +247,8 @@ int data_slice_out_of_bounds_map_value(void *ctx)
}
/* A data slice can't be used after it has been released */
-SEC("?raw_tp/sys_nanosleep")
-int data_slice_use_after_release(void *ctx)
+SEC("?raw_tp")
+int data_slice_use_after_release1(void *ctx)
{
struct bpf_dynptr ptr;
struct sample *sample;
@@ -272,8 +272,44 @@ done:
return 0;
}
+/* A data slice can't be used after it has been released.
+ *
+ * This tests the case where the data slice tracks a dynptr (ptr2)
+ * that is at a non-zero offset from the frame pointer (ptr1 is at fp,
+ * ptr2 is at fp - 16).
+ */
+SEC("?raw_tp")
+int data_slice_use_after_release2(void *ctx)
+{
+ struct bpf_dynptr ptr1, ptr2;
+ struct sample *sample;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr1);
+ bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr2);
+
+ sample = bpf_dynptr_data(&ptr2, 0, sizeof(*sample));
+ if (!sample)
+ goto done;
+
+ sample->pid = 23;
+
+ bpf_ringbuf_submit_dynptr(&ptr2, 0);
+
+ /* this should fail */
+ sample->pid = 23;
+
+ bpf_ringbuf_submit_dynptr(&ptr1, 0);
+
+ return 0;
+
+done:
+ bpf_ringbuf_discard_dynptr(&ptr2, 0);
+ bpf_ringbuf_discard_dynptr(&ptr1, 0);
+ return 0;
+}
+
/* A data slice must be first checked for NULL */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
int data_slice_missing_null_check1(void *ctx)
{
struct bpf_dynptr ptr;
@@ -293,7 +329,7 @@ int data_slice_missing_null_check1(void *ctx)
}
/* A data slice can't be dereferenced if it wasn't checked for null */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
int data_slice_missing_null_check2(void *ctx)
{
struct bpf_dynptr ptr;
@@ -315,7 +351,7 @@ done:
/* Can't pass in a dynptr as an arg to a helper function that doesn't take in a
* dynptr argument
*/
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
int invalid_helper1(void *ctx)
{
struct bpf_dynptr ptr;
@@ -329,7 +365,7 @@ int invalid_helper1(void *ctx)
}
/* A dynptr can't be passed into a helper function at a non-zero offset */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
int invalid_helper2(void *ctx)
{
struct bpf_dynptr ptr;
@@ -344,7 +380,7 @@ int invalid_helper2(void *ctx)
}
/* A bpf_dynptr is invalidated if it's been written into */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
int invalid_write1(void *ctx)
{
struct bpf_dynptr ptr;
@@ -365,7 +401,7 @@ int invalid_write1(void *ctx)
* A bpf_dynptr can't be used as a dynptr if it has been written into at a fixed
* offset
*/
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
int invalid_write2(void *ctx)
{
struct bpf_dynptr ptr;
@@ -388,7 +424,7 @@ int invalid_write2(void *ctx)
* A bpf_dynptr can't be used as a dynptr if it has been written into at a
* non-const offset
*/
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
int invalid_write3(void *ctx)
{
struct bpf_dynptr ptr;
@@ -419,7 +455,7 @@ static int invalid_write4_callback(__u32 index, void *data)
/* If the dynptr is written into in a callback function, it should
* be invalidated as a dynptr
*/
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
int invalid_write4(void *ctx)
{
struct bpf_dynptr ptr;
@@ -436,7 +472,7 @@ int invalid_write4(void *ctx)
/* A globally-defined bpf_dynptr can't be used (it must reside as a stack frame) */
struct bpf_dynptr global_dynptr;
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
int global(void *ctx)
{
/* this should fail */
@@ -448,7 +484,7 @@ int global(void *ctx)
}
/* A direct read should fail */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
int invalid_read1(void *ctx)
{
struct bpf_dynptr ptr;
@@ -464,7 +500,7 @@ int invalid_read1(void *ctx)
}
/* A direct read at an offset should fail */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
int invalid_read2(void *ctx)
{
struct bpf_dynptr ptr;
@@ -479,7 +515,7 @@ int invalid_read2(void *ctx)
}
/* A direct read at an offset into the lower stack slot should fail */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
int invalid_read3(void *ctx)
{
struct bpf_dynptr ptr1, ptr2;
@@ -505,7 +541,7 @@ static int invalid_read4_callback(__u32 index, void *data)
}
/* A direct read within a callback function should fail */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
int invalid_read4(void *ctx)
{
struct bpf_dynptr ptr;
@@ -520,7 +556,7 @@ int invalid_read4(void *ctx)
}
/* Initializing a dynptr on an offset should fail */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
int invalid_offset(void *ctx)
{
struct bpf_dynptr ptr;
@@ -534,7 +570,7 @@ int invalid_offset(void *ctx)
}
/* Can't release a dynptr twice */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
int release_twice(void *ctx)
{
struct bpf_dynptr ptr;
@@ -560,7 +596,7 @@ static int release_twice_callback_fn(__u32 index, void *data)
/* Test that releasing a dynptr twice, where one of the releases happens
* within a calback function, fails
*/
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
int release_twice_callback(void *ctx)
{
struct bpf_dynptr ptr;
@@ -575,7 +611,7 @@ int release_twice_callback(void *ctx)
}
/* Reject unsupported local mem types for dynptr_from_mem API */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
int dynptr_from_mem_invalid_api(void *ctx)
{
struct bpf_dynptr ptr;
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c b/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c
new file mode 100644
index 000000000000..767472bc5a97
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+extern void bpf_kfunc_call_test_destructive(void) __ksym;
+
+SEC("tc")
+int kfunc_destructive_test(void)
+{
+ bpf_kfunc_call_test_destructive();
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c
index 33694ef8acfa..d8d8af623bc2 100644
--- a/tools/testing/selftests/bpf/progs/lsm.c
+++ b/tools/testing/selftests/bpf/progs/lsm.c
@@ -4,6 +4,7 @@
* Copyright 2020 Google LLC.
*/
+#include "bpf_misc.h"
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
@@ -160,7 +161,7 @@ int BPF_PROG(test_task_free, struct task_struct *task)
int copy_test = 0;
-SEC("fentry.s/__x64_sys_setdomainname")
+SEC("fentry.s/" SYS_PREFIX "sys_setdomainname")
int BPF_PROG(test_sys_setdomainname, struct pt_regs *regs)
{
void *ptr = (void *)PT_REGS_PARM1(regs);
diff --git a/tools/testing/selftests/bpf/progs/test_autoattach.c b/tools/testing/selftests/bpf/progs/test_autoattach.c
new file mode 100644
index 000000000000..11a44493ebce
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_autoattach.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Google */
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+
+bool prog1_called = false;
+bool prog2_called = false;
+
+SEC("raw_tp/sys_enter")
+int prog1(const void *ctx)
+{
+ prog1_called = true;
+ return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int prog2(const void *ctx)
+{
+ prog2_called = true;
+ return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
index 22d0ac8709b4..5a3a80f751c4 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
@@ -28,14 +28,14 @@ static void update(void *ctx, __u64 *res)
*res |= bpf_get_attach_cookie(ctx);
}
-SEC("kprobe/sys_nanosleep")
+SEC("kprobe")
int handle_kprobe(struct pt_regs *ctx)
{
update(ctx, &kprobe_res);
return 0;
}
-SEC("kretprobe/sys_nanosleep")
+SEC("kretprobe")
int handle_kretprobe(struct pt_regs *ctx)
{
update(ctx, &kretprobe_res);
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
index 196cd8dfe42a..2722441850cc 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
@@ -23,6 +23,12 @@ int test_insert_entry = -EAFNOSUPPORT;
int test_succ_lookup = -ENOENT;
u32 test_delta_timeout = 0;
u32 test_status = 0;
+__be32 saddr = 0;
+__be16 sport = 0;
+__be32 daddr = 0;
+__be16 dport = 0;
+int test_exist_lookup = -ENOENT;
+u32 test_exist_lookup_mark = 0;
struct nf_conn;
@@ -160,6 +166,21 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
}
test_alloc_entry = 0;
}
+
+ bpf_tuple.ipv4.saddr = saddr;
+ bpf_tuple.ipv4.daddr = daddr;
+ bpf_tuple.ipv4.sport = sport;
+ bpf_tuple.ipv4.dport = dport;
+ ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+ sizeof(opts_def));
+ if (ct) {
+ test_exist_lookup = 0;
+ if (ct->mark == 42)
+ test_exist_lookup_mark = 43;
+ bpf_ct_release(ct);
+ } else {
+ test_exist_lookup = opts_def.error;
+ }
}
SEC("xdp")
diff --git a/tools/testing/selftests/bpf/progs/test_helper_restricted.c b/tools/testing/selftests/bpf/progs/test_helper_restricted.c
index 20ef9d433b97..5715c569ec03 100644
--- a/tools/testing/selftests/bpf/progs/test_helper_restricted.c
+++ b/tools/testing/selftests/bpf/progs/test_helper_restricted.c
@@ -72,7 +72,7 @@ int tp_timer(void *ctx)
return 0;
}
-SEC("?kprobe/sys_nanosleep")
+SEC("?kprobe")
int kprobe_timer(void *ctx)
{
timer_work();
@@ -104,7 +104,7 @@ int tp_spin_lock(void *ctx)
return 0;
}
-SEC("?kprobe/sys_nanosleep")
+SEC("?kprobe")
int kprobe_spin_lock(void *ctx)
{
spin_lock_work();
diff --git a/tools/testing/selftests/bpf/progs/test_time_tai.c b/tools/testing/selftests/bpf/progs/test_time_tai.c
new file mode 100644
index 000000000000..7ea0863f3ddb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_time_tai.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2022 Linutronix GmbH */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("tc")
+int time_tai(struct __sk_buff *skb)
+{
+ __u64 ts1, ts2;
+
+ /* Get TAI timestamps */
+ ts1 = bpf_ktime_get_tai_ns();
+ ts2 = bpf_ktime_get_tai_ns();
+
+ /* Save TAI timestamps (Note: skb->hwtstamp is read-only) */
+ skb->tstamp = ts1;
+ skb->cb[0] = ts2 & 0xffffffff;
+ skb->cb[1] = ts2 >> 32;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh
index b86ae4a2e5c5..a29aa05ebb3e 100755
--- a/tools/testing/selftests/bpf/vmtest.sh
+++ b/tools/testing/selftests/bpf/vmtest.sh
@@ -307,6 +307,20 @@ update_kconfig()
fi
}
+catch()
+{
+ local exit_code=$1
+ local exit_status_file="${OUTPUT_DIR}/${EXIT_STATUS_FILE}"
+ # This is just a cleanup and the directory may
+ # have already been unmounted. So, don't let this
+ # clobber the error code we intend to return.
+ unmount_image || true
+ if [[ -f "${exit_status_file}" ]]; then
+ exit_code="$(cat ${exit_status_file})"
+ fi
+ exit ${exit_code}
+}
+
main()
{
local script_dir="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"
@@ -319,7 +333,7 @@ main()
local exit_command="poweroff -f"
local debug_shell="no"
- while getopts 'hskid:j:' opt; do
+ while getopts ':hskid:j:' opt; do
case ${opt} in
i)
update_image="yes"
@@ -353,6 +367,8 @@ main()
done
shift $((OPTIND -1))
+ trap 'catch "$?"' EXIT
+
if [[ $# -eq 0 && "${debug_shell}" == "no" ]]; then
echo "No command specified, will run ${DEFAULT_COMMAND} in the vm"
else
@@ -409,20 +425,4 @@ main()
fi
}
-catch()
-{
- local exit_code=$1
- local exit_status_file="${OUTPUT_DIR}/${EXIT_STATUS_FILE}"
- # This is just a cleanup and the directory may
- # have already been unmounted. So, don't let this
- # clobber the error code we intend to return.
- unmount_image || true
- if [[ -f "${exit_status_file}" ]]; then
- exit_code="$(cat ${exit_status_file})"
- fi
- exit ${exit_code}
-}
-
-trap 'catch "$?"' EXIT
-
main "$@"
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 74d56d971baf..14b4737b223c 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -244,6 +244,11 @@ static void gen_udp_hdr(u32 payload, void *pkt, struct ifobject *ifobject,
memset32_htonl(pkt + PKT_HDR_SIZE, payload, UDP_PKT_DATA_SIZE);
}
+static bool is_umem_valid(struct ifobject *ifobj)
+{
+ return !!ifobj->umem->umem;
+}
+
static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr)
{
udp_hdr->check = 0;
@@ -817,12 +822,13 @@ static int complete_pkts(struct xsk_socket_info *xsk, int batch_size)
return TEST_PASS;
}
-static int receive_pkts(struct ifobject *ifobj, struct pollfd *fds)
+static int receive_pkts(struct test_spec *test, struct pollfd *fds)
{
- struct timeval tv_end, tv_now, tv_timeout = {RECV_TMOUT, 0};
+ struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0};
+ struct pkt_stream *pkt_stream = test->ifobj_rx->pkt_stream;
u32 idx_rx = 0, idx_fq = 0, rcvd, i, pkts_sent = 0;
- struct pkt_stream *pkt_stream = ifobj->pkt_stream;
- struct xsk_socket_info *xsk = ifobj->xsk;
+ struct xsk_socket_info *xsk = test->ifobj_rx->xsk;
+ struct ifobject *ifobj = test->ifobj_rx;
struct xsk_umem_info *umem = xsk->umem;
struct pkt *pkt;
int ret;
@@ -843,17 +849,28 @@ static int receive_pkts(struct ifobject *ifobj, struct pollfd *fds)
}
kick_rx(xsk);
+ if (ifobj->use_poll) {
+ ret = poll(fds, 1, POLL_TMOUT);
+ if (ret < 0)
+ exit_with_error(-ret);
+
+ if (!ret) {
+ if (!is_umem_valid(test->ifobj_tx))
+ return TEST_PASS;
+
+ ksft_print_msg("ERROR: [%s] Poll timed out\n", __func__);
+ return TEST_FAILURE;
- rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
- if (!rcvd) {
- if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
- ret = poll(fds, 1, POLL_TMOUT);
- if (ret < 0)
- exit_with_error(-ret);
}
- continue;
+
+ if (!(fds->revents & POLLIN))
+ continue;
}
+ rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
+ if (!rcvd)
+ continue;
+
if (ifobj->use_fill_ring) {
ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
while (ret != rcvd) {
@@ -900,13 +917,35 @@ static int receive_pkts(struct ifobject *ifobj, struct pollfd *fds)
return TEST_PASS;
}
-static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb)
+static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb, struct pollfd *fds,
+ bool timeout)
{
struct xsk_socket_info *xsk = ifobject->xsk;
- u32 i, idx, valid_pkts = 0;
+ bool use_poll = ifobject->use_poll;
+ u32 i, idx = 0, ret, valid_pkts = 0;
+
+ while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) {
+ if (use_poll) {
+ ret = poll(fds, 1, POLL_TMOUT);
+ if (timeout) {
+ if (ret < 0) {
+ ksft_print_msg("ERROR: [%s] Poll error %d\n",
+ __func__, ret);
+ return TEST_FAILURE;
+ }
+ if (ret == 0)
+ return TEST_PASS;
+ break;
+ }
+ if (ret <= 0) {
+ ksft_print_msg("ERROR: [%s] Poll error %d\n",
+ __func__, ret);
+ return TEST_FAILURE;
+ }
+ }
- while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE)
complete_pkts(xsk, BATCH_SIZE);
+ }
for (i = 0; i < BATCH_SIZE; i++) {
struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
@@ -933,11 +972,27 @@ static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb)
xsk_ring_prod__submit(&xsk->tx, i);
xsk->outstanding_tx += valid_pkts;
- if (complete_pkts(xsk, i))
- return TEST_FAILURE;
- usleep(10);
- return TEST_PASS;
+ if (use_poll) {
+ ret = poll(fds, 1, POLL_TMOUT);
+ if (ret <= 0) {
+ if (ret == 0 && timeout)
+ return TEST_PASS;
+
+ ksft_print_msg("ERROR: [%s] Poll error %d\n", __func__, ret);
+ return TEST_FAILURE;
+ }
+ }
+
+ if (!timeout) {
+ if (complete_pkts(xsk, i))
+ return TEST_FAILURE;
+
+ usleep(10);
+ return TEST_PASS;
+ }
+
+ return TEST_CONTINUE;
}
static void wait_for_tx_completion(struct xsk_socket_info *xsk)
@@ -948,29 +1003,19 @@ static void wait_for_tx_completion(struct xsk_socket_info *xsk)
static int send_pkts(struct test_spec *test, struct ifobject *ifobject)
{
+ bool timeout = !is_umem_valid(test->ifobj_rx);
struct pollfd fds = { };
- u32 pkt_cnt = 0;
+ u32 pkt_cnt = 0, ret;
fds.fd = xsk_socket__fd(ifobject->xsk->xsk);
fds.events = POLLOUT;
while (pkt_cnt < ifobject->pkt_stream->nb_pkts) {
- int err;
-
- if (ifobject->use_poll) {
- int ret;
-
- ret = poll(&fds, 1, POLL_TMOUT);
- if (ret <= 0)
- continue;
-
- if (!(fds.revents & POLLOUT))
- continue;
- }
-
- err = __send_pkts(ifobject, &pkt_cnt);
- if (err || test->fail)
+ ret = __send_pkts(ifobject, &pkt_cnt, &fds, timeout);
+ if ((ret || test->fail) && !timeout)
return TEST_FAILURE;
+ else if (ret == TEST_PASS && timeout)
+ return ret;
}
wait_for_tx_completion(ifobject->xsk);
@@ -1235,7 +1280,7 @@ static void *worker_testapp_validate_rx(void *arg)
pthread_barrier_wait(&barr);
- err = receive_pkts(ifobject, &fds);
+ err = receive_pkts(test, &fds);
if (!err && ifobject->validation_func)
err = ifobject->validation_func(ifobject);
@@ -1251,6 +1296,33 @@ static void *worker_testapp_validate_rx(void *arg)
pthread_exit(NULL);
}
+static int testapp_validate_traffic_single_thread(struct test_spec *test, struct ifobject *ifobj,
+ enum test_type type)
+{
+ pthread_t t0;
+
+ if (pthread_barrier_init(&barr, NULL, 2))
+ exit_with_error(errno);
+
+ test->current_step++;
+ if (type == TEST_TYPE_POLL_RXQ_TMOUT)
+ pkt_stream_reset(ifobj->pkt_stream);
+ pkts_in_flight = 0;
+
+ /*Spawn thread */
+ pthread_create(&t0, NULL, ifobj->func_ptr, test);
+
+ if (type != TEST_TYPE_POLL_TXQ_TMOUT)
+ pthread_barrier_wait(&barr);
+
+ if (pthread_barrier_destroy(&barr))
+ exit_with_error(errno);
+
+ pthread_join(t0, NULL);
+
+ return !!test->fail;
+}
+
static int testapp_validate_traffic(struct test_spec *test)
{
struct ifobject *ifobj_tx = test->ifobj_tx;
@@ -1548,12 +1620,30 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_
pkt_stream_restore_default(test);
break;
- case TEST_TYPE_POLL:
- test->ifobj_tx->use_poll = true;
+ case TEST_TYPE_RX_POLL:
test->ifobj_rx->use_poll = true;
- test_spec_set_name(test, "POLL");
+ test_spec_set_name(test, "POLL_RX");
testapp_validate_traffic(test);
break;
+ case TEST_TYPE_TX_POLL:
+ test->ifobj_tx->use_poll = true;
+ test_spec_set_name(test, "POLL_TX");
+ testapp_validate_traffic(test);
+ break;
+ case TEST_TYPE_POLL_TXQ_TMOUT:
+ test_spec_set_name(test, "POLL_TXQ_FULL");
+ test->ifobj_tx->use_poll = true;
+ /* create invalid frame by set umem frame_size and pkt length equal to 2048 */
+ test->ifobj_tx->umem->frame_size = 2048;
+ pkt_stream_replace(test, 2 * DEFAULT_PKT_CNT, 2048);
+ testapp_validate_traffic_single_thread(test, test->ifobj_tx, type);
+ pkt_stream_restore_default(test);
+ break;
+ case TEST_TYPE_POLL_RXQ_TMOUT:
+ test_spec_set_name(test, "POLL_RXQ_EMPTY");
+ test->ifobj_rx->use_poll = true;
+ testapp_validate_traffic_single_thread(test, test->ifobj_rx, type);
+ break;
case TEST_TYPE_ALIGNED_INV_DESC:
test_spec_set_name(test, "ALIGNED_INV_DESC");
testapp_invalid_desc(test);
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index 3d17053f98e5..ee97576757a9 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -27,6 +27,7 @@
#define TEST_PASS 0
#define TEST_FAILURE -1
+#define TEST_CONTINUE 1
#define MAX_INTERFACES 2
#define MAX_INTERFACE_NAME_CHARS 7
#define MAX_INTERFACES_NAMESPACE_CHARS 10
@@ -48,7 +49,7 @@
#define SOCK_RECONF_CTR 10
#define BATCH_SIZE 64
#define POLL_TMOUT 1000
-#define RECV_TMOUT 3
+#define THREAD_TMOUT 3
#define DEFAULT_PKT_CNT (4 * 1024)
#define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4)
#define UMEM_SIZE (DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE)
@@ -68,7 +69,10 @@ enum test_type {
TEST_TYPE_RUN_TO_COMPLETION,
TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME,
TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT,
- TEST_TYPE_POLL,
+ TEST_TYPE_RX_POLL,
+ TEST_TYPE_TX_POLL,
+ TEST_TYPE_POLL_RXQ_TMOUT,
+ TEST_TYPE_POLL_TXQ_TMOUT,
TEST_TYPE_UNALIGNED,
TEST_TYPE_ALIGNED_INV_DESC,
TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME,
diff --git a/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh b/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh
new file mode 100755
index 000000000000..0cf9e47e3209
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh
@@ -0,0 +1,273 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test VLAN classification after routing and verify that the order of
+# configuration does not impact switch behavior. Verify that {RIF, Port}->VID
+# mapping is added correctly for existing {Port, VID}->FID mapping and that
+# {RIF, Port}->VID mapping is added correctly for new {Port, VID}->FID mapping.
+
+# +-------------------+ +--------------------+
+# | H1 | | H2 |
+# | | | |
+# | $h1.10 + | | + $h2.10 |
+# | 192.0.2.1/28 | | | | 192.0.2.3/28 |
+# | | | | | |
+# | $h1 + | | + $h2 |
+# +----------------|--+ +--|-----------------+
+# | |
+# +----------------|-------------------------|-----------------+
+# | SW | | |
+# | +--------------|-------------------------|---------------+ |
+# | | $swp1 + + $swp2 | |
+# | | | | | |
+# | | $swp1.10 + + $swp2.10 | |
+# | | | |
+# | | br0 | |
+# | | 192.0.2.2/28 | |
+# | +--------------------------------------------------------+ |
+# | |
+# | $swp3.20 + |
+# | 192.0.2.17/28 | |
+# | | |
+# | $swp3 + |
+# +---------------|--------------------------------------------+
+# |
+# +---------------|--+
+# | $h3 + |
+# | | |
+# | $h3.20 + |
+# | 192.0.2.18/28 |
+# | |
+# | H3 |
+# +------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ port_vid_map_rif
+ rif_port_vid_map
+"
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ vlan_create $h1 10 v$h1 192.0.2.1/28
+
+ ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+ ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+
+ vlan_destroy $h1 10
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ vlan_create $h2 10 v$h2 192.0.2.3/28
+}
+
+h2_destroy()
+{
+ vlan_destroy $h2 10
+ simple_if_fini $h2
+}
+
+h3_create()
+{
+ simple_if_init $h3
+ vlan_create $h3 20 v$h3 192.0.2.18/28
+
+ ip route add 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17
+}
+
+h3_destroy()
+{
+ ip route del 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17
+
+ vlan_destroy $h3 20
+ simple_if_fini $h3
+}
+
+switch_create()
+{
+ ip link set dev $swp1 up
+ tc qdisc add dev $swp1 clsact
+
+ ip link add dev br0 type bridge mcast_snooping 0
+
+ # By default, a link-local address is generated when netdevice becomes
+ # up. Adding an address to the bridge will cause creating a RIF for it.
+ # Prevent generating link-local address to be able to control when the
+ # RIF is added.
+ sysctl_set net.ipv6.conf.br0.addr_gen_mode 1
+ ip link set dev br0 up
+
+ ip link set dev $swp2 up
+ vlan_create $swp2 10
+ ip link set dev $swp2.10 master br0
+
+ ip link set dev $swp3 up
+ vlan_create $swp3 20 "" 192.0.2.17/28
+
+ # Replace neighbor to avoid 1 packet which is forwarded in software due
+ # to "unresolved neigh".
+ ip neigh replace dev $swp3.20 192.0.2.18 lladdr $(mac_get $h3.20)
+}
+
+switch_destroy()
+{
+ vlan_destroy $swp3 20
+ ip link set dev $swp3 down
+
+ ip link set dev $swp2.10 nomaster
+ vlan_destroy $swp2 10
+ ip link set dev $swp2 down
+
+ ip link set dev br0 down
+ sysctl_restore net.ipv6.conf.br0.addr_gen_mode
+ ip link del dev br0
+
+ tc qdisc del dev $swp1 clsact
+ ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+ h3_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+bridge_rif_add()
+{
+ rifs_occ_t0=$(devlink_resource_occ_get rifs)
+ __addr_add_del br0 add 192.0.2.2/28
+ rifs_occ_t1=$(devlink_resource_occ_get rifs)
+
+ expected_rifs=$((rifs_occ_t0 + 1))
+
+ [[ $expected_rifs -eq $rifs_occ_t1 ]]
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ sleep 1
+}
+
+bridge_rif_del()
+{
+ __addr_add_del br0 del 192.0.2.2/28
+}
+
+port_vid_map_rif()
+{
+ RET=0
+
+ # First add {port, VID}->FID for swp1.10, then add a RIF and verify that
+ # packets get the correct VID after routing.
+ vlan_create $swp1 10
+ ip link set dev $swp1.10 master br0
+ bridge_rif_add
+
+ # Replace neighbor to avoid 1 packet which is forwarded in software due
+ # to "unresolved neigh".
+ ip neigh replace dev br0 192.0.2.1 lladdr $(mac_get $h1.10)
+
+ # The hardware matches on the first ethertype which is not VLAN,
+ # so the protocol should be IP.
+ tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+ flower skip_sw dst_ip 192.0.2.1 action pass
+
+ ping_do $h1.10 192.0.2.18
+ check_err $? "Ping failed"
+
+ tc_check_at_least_x_packets "dev $swp1 egress" 101 10
+ check_err $? "Packets were not routed in hardware"
+
+ log_test "Add RIF for existing {port, VID}->FID mapping"
+
+ tc filter del dev $swp1 egress
+
+ bridge_rif_del
+ ip link set dev $swp1.10 nomaster
+ vlan_destroy $swp1 10
+}
+
+rif_port_vid_map()
+{
+ RET=0
+
+ # First add an address to the bridge, which will create a RIF on top of
+ # it, then add a new {port, VID}->FID mapping and verify that packets
+ # get the correct VID after routing.
+ bridge_rif_add
+ vlan_create $swp1 10
+ ip link set dev $swp1.10 master br0
+
+ # Replace neighbor to avoid 1 packet which is forwarded in software due
+ # to "unresolved neigh".
+ ip neigh replace dev br0 192.0.2.1 lladdr $(mac_get $h1.10)
+
+ # The hardware matches on the first ethertype which is not VLAN,
+ # so the protocol should be IP.
+ tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+ flower skip_sw dst_ip 192.0.2.1 action pass
+
+ ping_do $h1.10 192.0.2.18
+ check_err $? "Ping failed"
+
+ tc_check_at_least_x_packets "dev $swp1 egress" 101 10
+ check_err $? "Packets were not routed in hardware"
+
+ log_test "Add {port, VID}->FID mapping for FID with a RIF"
+
+ tc filter del dev $swp1 egress
+
+ ip link set dev $swp1.10 nomaster
+ vlan_destroy $swp1 10
+ bridge_rif_del
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh
new file mode 100755
index 000000000000..df2b09966886
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh
@@ -0,0 +1,264 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test routing over bridge and verify that the order of configuration does not
+# impact switch behavior. Verify that RIF is added correctly for existing
+# mappings and that new mappings use the correct RIF.
+
+# +-------------------+ +--------------------+
+# | H1 | | H2 |
+# | | | |
+# | $h1.10 + | | + $h2.10 |
+# | 192.0.2.1/28 | | | | 192.0.2.3/28 |
+# | | | | | |
+# | $h1 + | | + $h2 |
+# +----------------|--+ +--|-----------------+
+# | |
+# +----------------|-------------------------|-----------------+
+# | SW | | |
+# | +--------------|-------------------------|---------------+ |
+# | | $swp1 + + $swp2 | |
+# | | | | | |
+# | | $swp1.10 + + $swp2.10 | |
+# | | | |
+# | | br0 | |
+# | | 192.0.2.2/28 | |
+# | +--------------------------------------------------------+ |
+# | |
+# | $swp3.10 + |
+# | 192.0.2.17/28 | |
+# | | |
+# | $swp3 + |
+# +---------------|--------------------------------------------+
+# |
+# +---------------|--+
+# | $h3 + |
+# | | |
+# | $h3.10 + |
+# | 192.0.2.18/28 |
+# | |
+# | H3 |
+# +------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ port_vid_map_rif
+ rif_port_vid_map
+"
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ vlan_create $h1 10 v$h1 192.0.2.1/28
+
+ ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+ ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+
+ vlan_destroy $h1 10
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ vlan_create $h2 10 v$h2 192.0.2.3/28
+}
+
+h2_destroy()
+{
+ vlan_destroy $h2 10
+ simple_if_fini $h2
+}
+
+h3_create()
+{
+ simple_if_init $h3
+ vlan_create $h3 10 v$h3 192.0.2.18/28
+
+ ip route add 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17
+}
+
+h3_destroy()
+{
+ ip route del 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17
+
+ vlan_destroy $h3 10
+ simple_if_fini $h3
+}
+
+switch_create()
+{
+ ip link set dev $swp1 up
+
+ ip link add dev br0 type bridge mcast_snooping 0
+
+ # By default, a link-local address is generated when netdevice becomes
+ # up. Adding an address to the bridge will cause creating a RIF for it.
+ # Prevent generating link-local address to be able to control when the
+ # RIF is added.
+ sysctl_set net.ipv6.conf.br0.addr_gen_mode 1
+ ip link set dev br0 up
+
+ ip link set dev $swp2 up
+ vlan_create $swp2 10
+ ip link set dev $swp2.10 master br0
+
+ ip link set dev $swp3 up
+ vlan_create $swp3 10 "" 192.0.2.17/28
+ tc qdisc add dev $swp3 clsact
+
+ # Replace neighbor to avoid 1 packet which is forwarded in software due
+ # to "unresolved neigh".
+ ip neigh replace dev $swp3.10 192.0.2.18 lladdr $(mac_get $h3.10)
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp3 clsact
+ vlan_destroy $swp3 10
+ ip link set dev $swp3 down
+
+ ip link set dev $swp2.10 nomaster
+ vlan_destroy $swp2 10
+ ip link set dev $swp2 down
+
+ ip link set dev br0 down
+ sysctl_restore net.ipv6.conf.br0.addr_gen_mode
+ ip link del dev br0
+
+ ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+ h3_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+bridge_rif_add()
+{
+ rifs_occ_t0=$(devlink_resource_occ_get rifs)
+ __addr_add_del br0 add 192.0.2.2/28
+ rifs_occ_t1=$(devlink_resource_occ_get rifs)
+
+ expected_rifs=$((rifs_occ_t0 + 1))
+
+ [[ $expected_rifs -eq $rifs_occ_t1 ]]
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ sleep 1
+}
+
+bridge_rif_del()
+{
+ __addr_add_del br0 del 192.0.2.2/28
+}
+
+port_vid_map_rif()
+{
+ RET=0
+
+ # First add {port, VID}->FID for $swp1.10, then add a RIF and verify
+ # that packets can be routed via the existing mapping.
+ vlan_create $swp1 10
+ ip link set dev $swp1.10 master br0
+ bridge_rif_add
+
+ # The hardware matches on the first ethertype which is not VLAN,
+ # so the protocol should be IP.
+ tc filter add dev $swp3 egress protocol ip pref 1 handle 101 \
+ flower skip_sw dst_ip 192.0.2.18 action pass
+
+ ping_do $h1.10 192.0.2.18
+ check_err $? "Ping failed"
+
+ tc_check_at_least_x_packets "dev $swp3 egress" 101 10
+ check_err $? "Packets were not routed in hardware"
+
+ log_test "Add RIF for existing {port, VID}->FID mapping"
+
+ tc filter del dev $swp3 egress
+
+ bridge_rif_del
+ ip link set dev $swp1.10 nomaster
+ vlan_destroy $swp1 10
+}
+
+rif_port_vid_map()
+{
+ RET=0
+
+ # First add an address to the bridge, which will create a RIF on top of
+ # it, then add a new {port, VID}->FID mapping and verify that packets
+ # can be routed via the new mapping.
+ bridge_rif_add
+ vlan_create $swp1 10
+ ip link set dev $swp1.10 master br0
+
+ # The hardware matches on the first ethertype which is not VLAN,
+ # so the protocol should be IP.
+ tc filter add dev $swp3 egress protocol ip pref 1 handle 101 \
+ flower skip_sw dst_ip 192.0.2.18 action pass
+
+ ping_do $h1.10 192.0.2.18
+ check_err $? "Ping failed"
+
+ tc_check_at_least_x_packets "dev $swp3 egress" 101 10
+ check_err $? "Packets were not routed in hardware"
+
+ log_test "Add {port, VID}->FID mapping for FID with a RIF"
+
+ tc filter del dev $swp3 egress
+
+ ip link set dev $swp1.10 nomaster
+ vlan_destroy $swp1 10
+ bridge_rif_del
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1q.sh b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1q.sh
new file mode 100755
index 000000000000..577293bab88b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1q.sh
@@ -0,0 +1,264 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test routing over bridge and verify that the order of configuration does not
+# impact switch behavior. Verify that RIF is added correctly for existing
+# mapping and that packets can be routed via port which is added after the FID
+# already has a RIF.
+
+# +-------------------+ +--------------------+
+# | H1 | | H2 |
+# | | | |
+# | $h1.10 + | | + $h2.10 |
+# | 192.0.2.1/28 | | | | 192.0.2.3/28 |
+# | | | | | |
+# | $h1 + | | + $h2 |
+# +----------------|--+ +--|-----------------+
+# | |
+# +----------------|-------------------------|-----------------+
+# | SW | | |
+# | +--------------|-------------------------|---------------+ |
+# | | $swp1 + + $swp2 | |
+# | | | |
+# | | br0 | |
+# | +--------------------------------------------------------+ |
+# | | |
+# | br0.10 |
+# | 192.0.2.2/28 |
+# | |
+# | |
+# | $swp3 + |
+# | 192.0.2.17/28 | |
+# +----------------|-------------------------------------------+
+# |
+# +----------------|--+
+# | $h3 + |
+# | 192.0.2.18/28 |
+# | |
+# | H3 |
+# +-------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ vid_map_rif
+ rif_vid_map
+"
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ vlan_create $h1 10 v$h1 192.0.2.1/28
+
+ ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+ ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+
+ vlan_destroy $h1 10
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ vlan_create $h2 10 v$h2 192.0.2.3/28
+}
+
+h2_destroy()
+{
+ vlan_destroy $h2 10
+ simple_if_fini $h2
+}
+
+h3_create()
+{
+ simple_if_init $h3 192.0.2.18/28
+ ip route add 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17
+}
+
+h3_destroy()
+{
+ ip route del 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17
+ simple_if_fini $h3 192.0.2.18/28
+}
+
+switch_create()
+{
+ ip link set dev $swp1 up
+
+ ip link add dev br0 type bridge vlan_filtering 1 mcast_snooping 0
+
+ # By default, a link-local address is generated when netdevice becomes
+ # up. Adding an address to the bridge will cause creating a RIF for it.
+ # Prevent generating link-local address to be able to control when the
+ # RIF is added.
+ sysctl_set net.ipv6.conf.br0.addr_gen_mode 1
+ ip link set dev br0 up
+
+ ip link set dev $swp2 up
+ ip link set dev $swp2 master br0
+ bridge vlan add vid 10 dev $swp2
+
+ ip link set dev $swp3 up
+ __addr_add_del $swp3 add 192.0.2.17/28
+ tc qdisc add dev $swp3 clsact
+
+ # Replace neighbor to avoid 1 packet which is forwarded in software due
+ # to "unresolved neigh".
+ ip neigh replace dev $swp3 192.0.2.18 lladdr $(mac_get $h3)
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp3 clsact
+ __addr_add_del $swp3 del 192.0.2.17/28
+ ip link set dev $swp3 down
+
+ bridge vlan del vid 10 dev $swp2
+ ip link set dev $swp2 nomaster
+ ip link set dev $swp2 down
+
+ ip link set dev br0 down
+ sysctl_restore net.ipv6.conf.br0.addr_gen_mode
+ ip link del dev br0
+
+ ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+ h3_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+bridge_rif_add()
+{
+ rifs_occ_t0=$(devlink_resource_occ_get rifs)
+ vlan_create br0 10 "" 192.0.2.2/28
+ rifs_occ_t1=$(devlink_resource_occ_get rifs)
+
+ expected_rifs=$((rifs_occ_t0 + 1))
+
+ [[ $expected_rifs -eq $rifs_occ_t1 ]]
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ sleep 1
+}
+
+bridge_rif_del()
+{
+ vlan_destroy br0 10
+}
+
+vid_map_rif()
+{
+ RET=0
+
+ # First add VID->FID for vlan 10, then add a RIF and verify that
+ # packets can be routed via the existing mapping.
+ bridge vlan add vid 10 dev br0 self
+ ip link set dev $swp1 master br0
+ bridge vlan add vid 10 dev $swp1
+
+ bridge_rif_add
+
+ tc filter add dev $swp3 egress protocol ip pref 1 handle 101 \
+ flower skip_sw dst_ip 192.0.2.18 action pass
+
+ ping_do $h1.10 192.0.2.18
+ check_err $? "Ping failed"
+
+ tc_check_at_least_x_packets "dev $swp3 egress" 101 10
+ check_err $? "Packets were not routed in hardware"
+
+ log_test "Add RIF for existing VID->FID mapping"
+
+ tc filter del dev $swp3 egress
+
+ bridge_rif_del
+
+ bridge vlan del vid 10 dev $swp1
+ ip link set dev $swp1 nomaster
+ bridge vlan del vid 10 dev br0 self
+}
+
+rif_vid_map()
+{
+ RET=0
+
+ # Using 802.1Q, there is only one VID->FID map for each VID. That means
+ # that we cannot really check adding a new map for existing FID with a
+ # RIF. Verify that packets can be routed via port which is added after
+ # the FID already has a RIF, although in practice there is no new
+ # mapping in the hardware.
+ bridge vlan add vid 10 dev br0 self
+ bridge_rif_add
+
+ ip link set dev $swp1 master br0
+ bridge vlan add vid 10 dev $swp1
+
+ tc filter add dev $swp3 egress protocol ip pref 1 handle 101 \
+ flower skip_sw dst_ip 192.0.2.18 action pass
+
+ ping_do $h1.10 192.0.2.18
+ check_err $? "Ping failed"
+
+ tc_check_at_least_x_packets "dev $swp3 egress" 101 10
+ check_err $? "Packets were not routed in hardware"
+
+ log_test "Add port to VID->FID mapping for FID with a RIF"
+
+ tc filter del dev $swp3 egress
+
+ bridge vlan del vid 10 dev $swp1
+ ip link set dev $swp1 nomaster
+
+ bridge_rif_del
+ bridge vlan del vid 10 dev br0 self
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_vxlan.sh
new file mode 100755
index 000000000000..90450216a10d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_vxlan.sh
@@ -0,0 +1,311 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test routing after VXLAN decapsulation and verify that the order of
+# configuration does not impact switch behavior. Verify that RIF is added
+# correctly for existing mapping and that new mapping uses the correct RIF.
+
+# +---------------------------+
+# | H1 |
+# | + $h1 |
+# | | 192.0.2.1/28 |
+# +----|----------------------+
+# |
+# +----|----------------------------------------------------------------------+
+# | SW | |
+# | +--|--------------------------------------------------------------------+ |
+# | | + $swp1 br1 | |
+# | | vid 10 pvid untagged | |
+# | | | |
+# | | | |
+# | | + vx4001 | |
+# | | local 192.0.2.17 | |
+# | | remote 192.0.2.18 | |
+# | | id 104001 | |
+# | | dstport $VXPORT | |
+# | | vid 4001 pvid untagged | |
+# | | | |
+# | +----------------------------------+------------------------------------+ |
+# | | |
+# | +----------------------------------|------------------------------------+ |
+# | | | | |
+# | | +-------------------------------+---------------------------------+ | |
+# | | | | | |
+# | | + vlan10 vlan4001 + | |
+# | | 192.0.2.2/28 | |
+# | | | |
+# | | vrf-green | |
+# | +-----------------------------------------------------------------------+ |
+# | |
+# | + $rp1 +lo |
+# | | 198.51.100.1/24 192.0.2.17/32 |
+# +----|----------------------------------------------------------------------+
+# |
+# +----|--------------------------------------------------------+
+# | | v$rp2 |
+# | + $rp2 |
+# | 198.51.100.2/24 |
+# | |
+# +-------------------------------------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ vni_fid_map_rif
+ rif_vni_fid_map
+"
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+: ${VXPORT:=4789}
+export VXPORT
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+switch_create()
+{
+ ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+ mcast_snooping 0
+ # Make sure the bridge uses the MAC address of the local port and not
+ # that of the VxLAN's device.
+ ip link set dev br1 address $(mac_get $swp1)
+ ip link set dev br1 up
+
+ ip link set dev $rp1 up
+ ip address add dev $rp1 198.51.100.1/24
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ bridge vlan add vid 10 dev $swp1 pvid untagged
+
+ tc qdisc add dev $swp1 clsact
+
+ ip link add name vx4001 type vxlan id 104001 \
+ local 192.0.2.17 dstport $VXPORT \
+ nolearning noudpcsum tos inherit ttl 100
+ ip link set dev vx4001 up
+
+ ip link set dev vx4001 master br1
+
+ ip address add 192.0.2.17/32 dev lo
+
+ # Create SVIs.
+ vrf_create "vrf-green"
+ ip link set dev vrf-green up
+
+ ip link add link br1 name vlan10 up master vrf-green type vlan id 10
+
+ # Replace neighbor to avoid 1 packet which is forwarded in software due
+ # to "unresolved neigh".
+ ip neigh replace dev vlan10 192.0.2.1 lladdr $(mac_get $h1)
+
+ ip address add 192.0.2.2/28 dev vlan10
+
+ bridge vlan add vid 10 dev br1 self
+ bridge vlan add vid 4001 dev br1 self
+
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+}
+
+switch_destroy()
+{
+ sysctl_restore net.ipv4.conf.all.rp_filter
+
+ bridge vlan del vid 4001 dev br1 self
+ bridge vlan del vid 10 dev br1 self
+
+ ip link del dev vlan10
+
+ vrf_destroy "vrf-green"
+
+ ip address del 192.0.2.17/32 dev lo
+
+ tc qdisc del dev $swp1 clsact
+
+ bridge vlan del vid 10 dev $swp1
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+
+ ip link set dev vx4001 nomaster
+
+ ip link set dev vx4001 down
+ ip link del dev vx4001
+
+ ip address del dev $rp1 198.51.100.1/24
+ ip link set dev $rp1 down
+
+ ip link set dev br1 down
+ ip link del dev br1
+}
+
+vrp2_create()
+{
+ simple_if_init $rp2 198.51.100.2/24
+
+ ip route add 192.0.2.17/32 vrf v$rp2 nexthop via 198.51.100.1
+}
+
+vrp2_destroy()
+{
+ ip route del 192.0.2.17/32 vrf v$rp2 nexthop via 198.51.100.1
+
+ simple_if_fini $rp2 198.51.100.2/24
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ rp1=${NETIFS[p3]}
+ rp2=${NETIFS[p4]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ switch_create
+
+ vrp2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ vrp2_destroy
+
+ switch_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+payload_get()
+{
+ local dest_mac=$(mac_get vlan4001)
+ local src_mac=$(mac_get $rp1)
+
+ p=$(:
+ )"08:"$( : VXLAN flags
+ )"00:00:00:"$( : VXLAN reserved
+ )"01:96:41:"$( : VXLAN VNI : 104001
+ )"00:"$( : VXLAN reserved
+ )"$dest_mac:"$( : ETH daddr
+ )"$src_mac:"$( : ETH saddr
+ )"08:00:"$( : ETH type
+ )"45:"$( : IP version + IHL
+ )"00:"$( : IP TOS
+ )"00:54:"$( : IP total length
+ )"3f:49:"$( : IP identification
+ )"00:00:"$( : IP flags + frag off
+ )"3f:"$( : IP TTL
+ )"01:"$( : IP proto
+ )"50:21:"$( : IP header csum
+ )"c6:33:64:0a:"$( : IP saddr: 198.51.100.10
+ )"c0:00:02:01:"$( : IP daddr: 192.0.2.1
+ )
+ echo $p
+}
+
+vlan_rif_add()
+{
+ rifs_occ_t0=$(devlink_resource_occ_get rifs)
+
+ ip link add link br1 name vlan4001 up master vrf-green \
+ type vlan id 4001
+
+ rifs_occ_t1=$(devlink_resource_occ_get rifs)
+ expected_rifs=$((rifs_occ_t0 + 1))
+
+ [[ $expected_rifs -eq $rifs_occ_t1 ]]
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+}
+
+vlan_rif_del()
+{
+ ip link del dev vlan4001
+}
+
+vni_fid_map_rif()
+{
+ local rp1_mac=$(mac_get $rp1)
+
+ RET=0
+
+ # First add VNI->FID mapping to the FID of VLAN 4001
+ bridge vlan add vid 4001 dev vx4001 pvid untagged
+
+ # Add a RIF to the FID with VNI->FID mapping
+ vlan_rif_add
+
+ tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+ flower skip_sw dst_ip 192.0.2.1 action pass
+
+ payload=$(payload_get)
+ ip vrf exec v$rp2 $MZ $rp2 -c 10 -d 1msec -b $rp1_mac \
+ -B 192.0.2.17 -A 192.0.2.18 \
+ -t udp sp=12345,dp=$VXPORT,p=$payload -q
+
+ tc_check_at_least_x_packets "dev $swp1 egress" 101 10
+ check_err $? "Packets were not routed in hardware"
+
+ log_test "Add RIF for existing VNI->FID mapping"
+
+ tc filter del dev $swp1 egress
+
+ bridge vlan del vid 4001 dev vx4001 pvid untagged
+ vlan_rif_del
+}
+
+rif_vni_fid_map()
+{
+ local rp1_mac=$(mac_get $rp1)
+
+ RET=0
+
+ # First add a RIF to the FID of VLAN 4001
+ vlan_rif_add
+
+ # Add VNI->FID mapping to FID with a RIF
+ bridge vlan add vid 4001 dev vx4001 pvid untagged
+
+ tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+ flower skip_sw dst_ip 192.0.2.1 action pass
+
+ payload=$(payload_get)
+ ip vrf exec v$rp2 $MZ $rp2 -c 10 -d 1msec -b $rp1_mac \
+ -B 192.0.2.17 -A 192.0.2.18 \
+ -t udp sp=12345,dp=$VXPORT,p=$payload -q
+
+ tc_check_at_least_x_packets "dev $swp1 egress" 101 10
+ check_err $? "Packets were not routed in hardware"
+
+ log_test "Add VNI->FID mapping for FID with a RIF"
+
+ tc filter del dev $swp1 egress
+
+ bridge vlan del vid 4001 dev vx4001 pvid untagged
+ vlan_rif_del
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 0e5751af6247..bec5cf96984c 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -39,4 +39,7 @@ toeplitz
tun
cmsg_sender
unix_connect
-tap \ No newline at end of file
+tap
+bind_bhash
+sk_bind_sendto_listen
+sk_connect_zero_addr
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index c0ee2955fe54..f5ac1433c301 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -42,6 +42,8 @@ TEST_PROGS += arp_ndisc_evict_nocarrier.sh
TEST_PROGS += ndisc_unsolicited_na_test.sh
TEST_PROGS += arp_ndisc_untracked_subnets.sh
TEST_PROGS += stress_reuseport_listen.sh
+TEST_PROGS += l2_tos_ttl_inherit.sh
+TEST_PROGS += bind_bhash.sh
TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh
TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh
TEST_GEN_FILES = socket nettest
@@ -63,6 +65,9 @@ TEST_GEN_FILES += cmsg_sender
TEST_GEN_FILES += stress_reuseport_listen
TEST_PROGS += test_vxlan_vnifiltering.sh
TEST_GEN_FILES += io_uring_zerocopy_tx
+TEST_GEN_FILES += bind_bhash
+TEST_GEN_PROGS += sk_bind_sendto_listen
+TEST_GEN_PROGS += sk_connect_zero_addr
TEST_FILES := settings
@@ -73,3 +78,4 @@ include bpf/Makefile
$(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
$(OUTPUT)/tcp_mmap: LDLIBS += -lpthread
$(OUTPUT)/tcp_inq: LDLIBS += -lpthread
+$(OUTPUT)/bind_bhash: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/net/bind_bhash.c b/tools/testing/selftests/net/bind_bhash.c
new file mode 100644
index 000000000000..57ff67a3751e
--- /dev/null
+++ b/tools/testing/selftests/net/bind_bhash.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This times how long it takes to bind to a port when the port already
+ * has multiple sockets in its bhash table.
+ *
+ * In the setup(), we populate the port's bhash table with
+ * MAX_THREADS * MAX_CONNECTIONS number of entries.
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <netdb.h>
+#include <pthread.h>
+#include <string.h>
+#include <stdbool.h>
+
+#define MAX_THREADS 600
+#define MAX_CONNECTIONS 40
+
+static const char *setup_addr_v6 = "::1";
+static const char *setup_addr_v4 = "127.0.0.1";
+static const char *setup_addr;
+static const char *bind_addr;
+static const char *port;
+bool use_v6;
+int ret;
+
+static int fd_array[MAX_THREADS][MAX_CONNECTIONS];
+
+static int bind_socket(int opt, const char *addr)
+{
+ struct addrinfo *res, hint = {};
+ int sock_fd, reuse = 1, err;
+ int domain = use_v6 ? AF_INET6 : AF_INET;
+
+ sock_fd = socket(domain, SOCK_STREAM, 0);
+ if (sock_fd < 0) {
+ perror("socket fd err");
+ return sock_fd;
+ }
+
+ hint.ai_family = domain;
+ hint.ai_socktype = SOCK_STREAM;
+
+ err = getaddrinfo(addr, port, &hint, &res);
+ if (err) {
+ perror("getaddrinfo failed");
+ goto cleanup;
+ }
+
+ if (opt) {
+ err = setsockopt(sock_fd, SOL_SOCKET, opt, &reuse, sizeof(reuse));
+ if (err) {
+ perror("setsockopt failed");
+ goto cleanup;
+ }
+ }
+
+ err = bind(sock_fd, res->ai_addr, res->ai_addrlen);
+ if (err) {
+ perror("failed to bind to port");
+ goto cleanup;
+ }
+
+ return sock_fd;
+
+cleanup:
+ close(sock_fd);
+ return err;
+}
+
+static void *setup(void *arg)
+{
+ int sock_fd, i;
+ int *array = (int *)arg;
+
+ for (i = 0; i < MAX_CONNECTIONS; i++) {
+ sock_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr);
+ if (sock_fd < 0) {
+ ret = sock_fd;
+ pthread_exit(&ret);
+ }
+ array[i] = sock_fd;
+ }
+
+ return NULL;
+}
+
+int main(int argc, const char *argv[])
+{
+ int listener_fd, sock_fd, i, j;
+ pthread_t tid[MAX_THREADS];
+ clock_t begin, end;
+
+ if (argc != 4) {
+ printf("Usage: listener <port> <ipv6 | ipv4> <bind-addr>\n");
+ return -1;
+ }
+
+ port = argv[1];
+ use_v6 = strcmp(argv[2], "ipv6") == 0;
+ bind_addr = argv[3];
+
+ setup_addr = use_v6 ? setup_addr_v6 : setup_addr_v4;
+
+ listener_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr);
+ if (listen(listener_fd, 100) < 0) {
+ perror("listen failed");
+ return -1;
+ }
+
+ /* Set up threads to populate the bhash table entry for the port */
+ for (i = 0; i < MAX_THREADS; i++)
+ pthread_create(&tid[i], NULL, setup, fd_array[i]);
+
+ for (i = 0; i < MAX_THREADS; i++)
+ pthread_join(tid[i], NULL);
+
+ if (ret)
+ goto done;
+
+ begin = clock();
+
+ /* Bind to the same port on a different address */
+ sock_fd = bind_socket(0, bind_addr);
+ if (sock_fd < 0)
+ goto done;
+
+ end = clock();
+
+ printf("time spent = %f\n", (double)(end - begin) / CLOCKS_PER_SEC);
+
+ /* clean up */
+ close(sock_fd);
+
+done:
+ close(listener_fd);
+ for (i = 0; i < MAX_THREADS; i++) {
+ for (j = 0; i < MAX_THREADS; i++)
+ close(fd_array[i][j]);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/net/bind_bhash.sh b/tools/testing/selftests/net/bind_bhash.sh
new file mode 100755
index 000000000000..ca0292d4b441
--- /dev/null
+++ b/tools/testing/selftests/net/bind_bhash.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NR_FILES=32768
+SAVED_NR_FILES=$(ulimit -n)
+
+# default values
+port=443
+addr_v6="2001:0db8:0:f101::1"
+addr_v4="10.8.8.8"
+use_v6=true
+addr=""
+
+usage() {
+ echo "Usage: $0 [-6 | -4] [-p port] [-a address]"
+ echo -e "\t6: use ipv6"
+ echo -e "\t4: use ipv4"
+ echo -e "\tport: Port number"
+ echo -e "\taddress: ip address"
+}
+
+while getopts "ha:p:64" opt; do
+ case ${opt} in
+ h)
+ usage $0
+ exit 0
+ ;;
+ a) addr=$OPTARG;;
+ p)
+ port=$OPTARG;;
+ 6)
+ use_v6=true;;
+ 4)
+ use_v6=false;;
+ esac
+done
+
+setup() {
+ if [[ "$use_v6" == true ]]; then
+ ip addr add $addr_v6 nodad dev eth0
+ else
+ ip addr add $addr_v4 dev lo
+ fi
+ ulimit -n $NR_FILES
+}
+
+cleanup() {
+ if [[ "$use_v6" == true ]]; then
+ ip addr del $addr_v6 dev eth0
+ else
+ ip addr del $addr_v4/32 dev lo
+ fi
+ ulimit -n $SAVED_NR_FILES
+}
+
+if [[ "$addr" != "" ]]; then
+ addr_v4=$addr;
+ addr_v6=$addr;
+fi
+setup
+if [[ "$use_v6" == true ]] ; then
+ ./bind_bhash $port "ipv6" $addr_v6
+else
+ ./bind_bhash $port "ipv4" $addr_v4
+fi
+cleanup
diff --git a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh
new file mode 100755
index 000000000000..dca1e6f777a8
--- /dev/null
+++ b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh
@@ -0,0 +1,390 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Author: Matthias May <matthias.may@westermo.com>
+#
+# This script evaluates ip tunnels that are capable of carrying L2 traffic
+# if they inherit or set the inheritable fields.
+# Namely these tunnels are: 'gretap', 'vxlan' and 'geneve'.
+# Checked inheritable fields are: TOS and TTL.
+# The outer tunnel protocol of 'IPv4' or 'IPv6' is verified-
+# As payload frames of type 'IPv4', 'IPv6' and 'other'(ARP) are verified.
+# In addition this script also checks if forcing a specific field in the
+# outer header is working.
+
+if [ "$(id -u)" != "0" ]; then
+ echo "Please run as root."
+ exit 0
+fi
+if ! which tcpdump > /dev/null 2>&1; then
+ echo "No tcpdump found. Required for this test."
+ exit 0
+fi
+
+expected_tos="0x00"
+expected_ttl="0"
+failed=false
+
+get_random_tos() {
+ # Get a random hex tos value between 0x00 and 0xfc, a multiple of 4
+ echo "0x$(tr -dc '0-9a-f' < /dev/urandom | head -c 1)\
+$(tr -dc '048c' < /dev/urandom | head -c 1)"
+}
+get_random_ttl() {
+ # Get a random dec value between 0 and 255
+ printf "%d" "0x$(tr -dc '0-9a-f' < /dev/urandom | head -c 2)"
+}
+get_field() {
+ # Expects to get the 'head -n 1' of a captured frame by tcpdump.
+ # Parses this first line and returns the specified field.
+ local field="$1"
+ local input="$2"
+ local found=false
+ input="$(echo "$input" | tr -d '(),')"
+ for input_field in $input; do
+ if $found; then
+ echo "$input_field"
+ return
+ fi
+ # The next field that we iterate over is the looked for value
+ if [ "$input_field" = "$field" ]; then
+ found=true
+ fi
+ done
+ echo "0"
+}
+setup() {
+ local type="$1"
+ local outer="$2"
+ local inner="$3"
+ local tos_ttl="$4"
+ local vlan="$5"
+ local test_tos="0x00"
+ local test_ttl="0"
+ local ns="ip netns exec testing"
+
+ # We don't want a test-tos of 0x00,
+ # because this is the value that we get when no tos is set.
+ expected_tos="$(get_random_tos)"
+ while [ "$expected_tos" = "0x00" ]; do
+ expected_tos="$(get_random_tos)"
+ done
+ if [ "$tos_ttl" = "random" ]; then
+ test_tos="$expected_tos"
+ tos="fixed $test_tos"
+ elif [ "$tos_ttl" = "inherit" ]; then
+ test_tos="$tos_ttl"
+ tos="inherit $expected_tos"
+ fi
+
+ # We don't want a test-ttl of 64 or 0,
+ # because 64 is when no ttl is set and 0 is not a valid ttl.
+ expected_ttl="$(get_random_ttl)"
+ while [ "$expected_ttl" = "64" ] || [ "$expected_ttl" = "0" ]; do
+ expected_ttl="$(get_random_ttl)"
+ done
+
+ if [ "$tos_ttl" = "random" ]; then
+ test_ttl="$expected_ttl"
+ ttl="fixed $test_ttl"
+ elif [ "$tos_ttl" = "inherit" ]; then
+ test_ttl="$tos_ttl"
+ ttl="inherit $expected_ttl"
+ fi
+ printf "│%7s │%6s │%6s │%13s │%13s │%6s │" \
+ "$type" "$outer" "$inner" "$tos" "$ttl" "$vlan"
+
+ # Create 'testing' netns, veth pair and connect main ns with testing ns
+ ip netns add testing
+ ip link add type veth
+ ip link set veth1 netns testing
+ ip link set veth0 up
+ $ns ip link set veth1 up
+ ip addr flush dev veth0
+ $ns ip addr flush dev veth1
+
+ local local_addr1=""
+ local local_addr2=""
+ if [ "$type" = "gre" ] || [ "$type" = "vxlan" ]; then
+ if [ "$outer" = "4" ]; then
+ local_addr1="local 198.18.0.1"
+ local_addr2="local 198.18.0.2"
+ elif [ "$outer" = "6" ]; then
+ local_addr1="local fdd1:ced0:5d88:3fce::1"
+ local_addr2="local fdd1:ced0:5d88:3fce::2"
+ fi
+ fi
+ local vxlan=""
+ if [ "$type" = "vxlan" ]; then
+ vxlan="vni 100 dstport 4789"
+ fi
+ local geneve=""
+ if [ "$type" = "geneve" ]; then
+ geneve="vni 100"
+ fi
+ # Create tunnel and assign outer IPv4/IPv6 addresses
+ if [ "$outer" = "4" ]; then
+ if [ "$type" = "gre" ]; then
+ type="gretap"
+ fi
+ ip addr add 198.18.0.1/24 dev veth0
+ $ns ip addr add 198.18.0.2/24 dev veth1
+ ip link add name tep0 type $type $local_addr1 remote \
+ 198.18.0.2 tos $test_tos ttl $test_ttl $vxlan $geneve
+ $ns ip link add name tep1 type $type $local_addr2 remote \
+ 198.18.0.1 tos $test_tos ttl $test_ttl $vxlan $geneve
+ elif [ "$outer" = "6" ]; then
+ if [ "$type" = "gre" ]; then
+ type="ip6gretap"
+ fi
+ ip addr add fdd1:ced0:5d88:3fce::1/64 dev veth0
+ $ns ip addr add fdd1:ced0:5d88:3fce::2/64 dev veth1
+ ip link add name tep0 type $type $local_addr1 \
+ remote fdd1:ced0:5d88:3fce::2 tos $test_tos ttl $test_ttl \
+ $vxlan $geneve
+ $ns ip link add name tep1 type $type $local_addr2 \
+ remote fdd1:ced0:5d88:3fce::1 tos $test_tos ttl $test_ttl \
+ $vxlan $geneve
+ fi
+
+ # Bring L2-tunnel link up and create VLAN on top
+ ip link set tep0 up
+ $ns ip link set tep1 up
+ ip addr flush dev tep0
+ $ns ip addr flush dev tep1
+ local parent
+ if $vlan; then
+ parent="vlan99-"
+ ip link add link tep0 name ${parent}0 type vlan id 99
+ $ns ip link add link tep1 name ${parent}1 type vlan id 99
+ ip link set ${parent}0 up
+ $ns ip link set ${parent}1 up
+ ip addr flush dev ${parent}0
+ $ns ip addr flush dev ${parent}1
+ else
+ parent="tep"
+ fi
+
+ # Assign inner IPv4/IPv6 addresses
+ if [ "$inner" = "4" ] || [ "$inner" = "other" ]; then
+ ip addr add 198.19.0.1/24 brd + dev ${parent}0
+ $ns ip addr add 198.19.0.2/24 brd + dev ${parent}1
+ elif [ "$inner" = "6" ]; then
+ ip addr add fdd4:96cf:4eae:443b::1/64 dev ${parent}0
+ $ns ip addr add fdd4:96cf:4eae:443b::2/64 dev ${parent}1
+ fi
+}
+
+verify() {
+ local outer="$1"
+ local inner="$2"
+ local tos_ttl="$3"
+ local vlan="$4"
+
+ local ping_pid out captured_tos captured_ttl result
+
+ local ping_dst
+ if [ "$inner" = "4" ]; then
+ ping_dst="198.19.0.2"
+ elif [ "$inner" = "6" ]; then
+ ping_dst="fdd4:96cf:4eae:443b::2"
+ elif [ "$inner" = "other" ]; then
+ ping_dst="198.19.0.3" # Generates ARPs which are not IPv4/IPv6
+ fi
+ if [ "$tos_ttl" = "inherit" ]; then
+ ping -i 0.1 $ping_dst -Q "$expected_tos" -t "$expected_ttl" \
+ 2>/dev/null 1>&2 & ping_pid="$!"
+ else
+ ping -i 0.1 $ping_dst 2>/dev/null 1>&2 & ping_pid="$!"
+ fi
+ local tunnel_type_offset tunnel_type_proto req_proto_offset req_offset
+ if [ "$type" = "gre" ]; then
+ tunnel_type_proto="0x2f"
+ elif [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+ tunnel_type_proto="0x11"
+ fi
+ if [ "$outer" = "4" ]; then
+ tunnel_type_offset="9"
+ if [ "$inner" = "4" ]; then
+ req_proto_offset="47"
+ req_offset="58"
+ if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+ req_proto_offset="$((req_proto_offset + 12))"
+ req_offset="$((req_offset + 12))"
+ fi
+ if $vlan; then
+ req_proto_offset="$((req_proto_offset + 4))"
+ req_offset="$((req_offset + 4))"
+ fi
+ out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \
+ ip[$tunnel_type_offset] = $tunnel_type_proto and \
+ ip[$req_proto_offset] = 0x01 and \
+ ip[$req_offset] = 0x08 2>/dev/null | head -n 1)"
+ elif [ "$inner" = "6" ]; then
+ req_proto_offset="44"
+ req_offset="78"
+ if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+ req_proto_offset="$((req_proto_offset + 12))"
+ req_offset="$((req_offset + 12))"
+ fi
+ if $vlan; then
+ req_proto_offset="$((req_proto_offset + 4))"
+ req_offset="$((req_offset + 4))"
+ fi
+ out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \
+ ip[$tunnel_type_offset] = $tunnel_type_proto and \
+ ip[$req_proto_offset] = 0x3a and \
+ ip[$req_offset] = 0x80 2>/dev/null | head -n 1)"
+ elif [ "$inner" = "other" ]; then
+ req_proto_offset="36"
+ req_offset="45"
+ if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+ req_proto_offset="$((req_proto_offset + 12))"
+ req_offset="$((req_offset + 12))"
+ fi
+ if $vlan; then
+ req_proto_offset="$((req_proto_offset + 4))"
+ req_offset="$((req_offset + 4))"
+ fi
+ if [ "$tos_ttl" = "inherit" ]; then
+ expected_tos="0x00"
+ expected_ttl="64"
+ fi
+ out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \
+ ip[$tunnel_type_offset] = $tunnel_type_proto and \
+ ip[$req_proto_offset] = 0x08 and \
+ ip[$((req_proto_offset + 1))] = 0x06 and \
+ ip[$req_offset] = 0x01 2>/dev/null | head -n 1)"
+ fi
+ elif [ "$outer" = "6" ]; then
+ if [ "$type" = "gre" ]; then
+ tunnel_type_offset="40"
+ elif [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+ tunnel_type_offset="6"
+ fi
+ if [ "$inner" = "4" ]; then
+ local req_proto_offset="75"
+ local req_offset="86"
+ if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+ req_proto_offset="$((req_proto_offset + 4))"
+ req_offset="$((req_offset + 4))"
+ fi
+ if $vlan; then
+ req_proto_offset="$((req_proto_offset + 4))"
+ req_offset="$((req_offset + 4))"
+ fi
+ out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \
+ ip6[$tunnel_type_offset] = $tunnel_type_proto and \
+ ip6[$req_proto_offset] = 0x01 and \
+ ip6[$req_offset] = 0x08 2>/dev/null | head -n 1)"
+ elif [ "$inner" = "6" ]; then
+ local req_proto_offset="72"
+ local req_offset="106"
+ if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+ req_proto_offset="$((req_proto_offset + 4))"
+ req_offset="$((req_offset + 4))"
+ fi
+ if $vlan; then
+ req_proto_offset="$((req_proto_offset + 4))"
+ req_offset="$((req_offset + 4))"
+ fi
+ out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \
+ ip6[$tunnel_type_offset] = $tunnel_type_proto and \
+ ip6[$req_proto_offset] = 0x3a and \
+ ip6[$req_offset] = 0x80 2>/dev/null | head -n 1)"
+ elif [ "$inner" = "other" ]; then
+ local req_proto_offset="64"
+ local req_offset="73"
+ if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+ req_proto_offset="$((req_proto_offset + 4))"
+ req_offset="$((req_offset + 4))"
+ fi
+ if $vlan; then
+ req_proto_offset="$((req_proto_offset + 4))"
+ req_offset="$((req_offset + 4))"
+ fi
+ if [ "$tos_ttl" = "inherit" ]; then
+ expected_tos="0x00"
+ expected_ttl="64"
+ fi
+ out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \
+ ip6[$tunnel_type_offset] = $tunnel_type_proto and \
+ ip6[$req_proto_offset] = 0x08 and \
+ ip6[$((req_proto_offset + 1))] = 0x06 and \
+ ip6[$req_offset] = 0x01 2>/dev/null | head -n 1)"
+ fi
+ fi
+ kill -9 $ping_pid
+ wait $ping_pid 2>/dev/null
+ result="FAIL"
+ if [ "$outer" = "4" ]; then
+ captured_ttl="$(get_field "ttl" "$out")"
+ captured_tos="$(printf "0x%02x" "$(get_field "tos" "$out")")"
+ if [ "$captured_tos" = "$expected_tos" ] &&
+ [ "$captured_ttl" = "$expected_ttl" ]; then
+ result="OK"
+ fi
+ elif [ "$outer" = "6" ]; then
+ captured_ttl="$(get_field "hlim" "$out")"
+ captured_tos="$(printf "0x%02x" "$(get_field "class" "$out")")"
+ if [ "$captured_tos" = "$expected_tos" ] &&
+ [ "$captured_ttl" = "$expected_ttl" ]; then
+ result="OK"
+ fi
+ fi
+
+ printf "%7s │\n" "$result"
+ if [ "$result" = "FAIL" ]; then
+ failed=true
+ if [ "$captured_tos" != "$expected_tos" ]; then
+ printf "│%43s%27s │\n" \
+ "Expected TOS value: $expected_tos" \
+ "Captured TOS value: $captured_tos"
+ fi
+ if [ "$captured_ttl" != "$expected_ttl" ]; then
+ printf "│%43s%27s │\n" \
+ "Expected TTL value: $expected_ttl" \
+ "Captured TTL value: $captured_ttl"
+ fi
+ printf "│%71s│\n" " "
+ fi
+}
+
+cleanup() {
+ ip link del veth0 2>/dev/null
+ ip netns del testing 2>/dev/null
+ ip link del tep0 2>/dev/null
+}
+
+printf "┌────────┬───────┬───────┬──────────────┬"
+printf "──────────────┬───────┬────────┐\n"
+for type in gre vxlan geneve; do
+ if ! $(modprobe "$type" 2>/dev/null); then
+ continue
+ fi
+ for outer in 4 6; do
+ printf "├────────┼───────┼───────┼──────────────┼"
+ printf "──────────────┼───────┼────────┤\n"
+ printf "│ Type │ outer | inner │ tos │"
+ printf " ttl │ vlan │ result │\n"
+ for inner in 4 6 other; do
+ printf "├────────┼───────┼───────┼──────────────┼"
+ printf "──────────────┼───────┼────────┤\n"
+ for tos_ttl in inherit random; do
+ for vlan in false true; do
+ setup "$type" "$outer" "$inner" \
+ "$tos_ttl" "$vlan"
+ verify "$outer" "$inner" "$tos_ttl" \
+ "$vlan"
+ cleanup
+ done
+ done
+ done
+ done
+done
+printf "└────────┴───────┴───────┴──────────────┴"
+printf "──────────────┴───────┴────────┘\n"
+
+if $failed; then
+ exit 1
+fi
diff --git a/tools/testing/selftests/net/sk_bind_sendto_listen.c b/tools/testing/selftests/net/sk_bind_sendto_listen.c
new file mode 100644
index 000000000000..b420d830f72c
--- /dev/null
+++ b/tools/testing/selftests/net/sk_bind_sendto_listen.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <unistd.h>
+
+int main(void)
+{
+ int fd1, fd2, one = 1;
+ struct sockaddr_in6 bind_addr = {
+ .sin6_family = AF_INET6,
+ .sin6_port = htons(20000),
+ .sin6_flowinfo = htonl(0),
+ .sin6_addr = {},
+ .sin6_scope_id = 0,
+ };
+
+ inet_pton(AF_INET6, "::", &bind_addr.sin6_addr);
+
+ fd1 = socket(AF_INET6, SOCK_STREAM, IPPROTO_IP);
+ if (fd1 < 0) {
+ error(1, errno, "socket fd1");
+ return -1;
+ }
+
+ if (setsockopt(fd1, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one))) {
+ error(1, errno, "setsockopt(SO_REUSEADDR) fd1");
+ goto out_err1;
+ }
+
+ if (bind(fd1, (struct sockaddr *)&bind_addr, sizeof(bind_addr))) {
+ error(1, errno, "bind fd1");
+ goto out_err1;
+ }
+
+ if (sendto(fd1, NULL, 0, MSG_FASTOPEN, (struct sockaddr *)&bind_addr,
+ sizeof(bind_addr))) {
+ error(1, errno, "sendto fd1");
+ goto out_err1;
+ }
+
+ fd2 = socket(AF_INET6, SOCK_STREAM, IPPROTO_IP);
+ if (fd2 < 0) {
+ error(1, errno, "socket fd2");
+ goto out_err1;
+ }
+
+ if (setsockopt(fd2, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one))) {
+ error(1, errno, "setsockopt(SO_REUSEADDR) fd2");
+ goto out_err2;
+ }
+
+ if (bind(fd2, (struct sockaddr *)&bind_addr, sizeof(bind_addr))) {
+ error(1, errno, "bind fd2");
+ goto out_err2;
+ }
+
+ if (sendto(fd2, NULL, 0, MSG_FASTOPEN, (struct sockaddr *)&bind_addr,
+ sizeof(bind_addr)) != -1) {
+ error(1, errno, "sendto fd2");
+ goto out_err2;
+ }
+
+ if (listen(fd2, 0)) {
+ error(1, errno, "listen");
+ goto out_err2;
+ }
+
+ close(fd2);
+ close(fd1);
+ return 0;
+
+out_err2:
+ close(fd2);
+
+out_err1:
+ close(fd1);
+ return -1;
+}
diff --git a/tools/testing/selftests/net/sk_connect_zero_addr.c b/tools/testing/selftests/net/sk_connect_zero_addr.c
new file mode 100644
index 000000000000..4be418aefd9f
--- /dev/null
+++ b/tools/testing/selftests/net/sk_connect_zero_addr.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <unistd.h>
+
+int main(void)
+{
+ int fd1, fd2, one = 1;
+ struct sockaddr_in6 bind_addr = {
+ .sin6_family = AF_INET6,
+ .sin6_port = htons(20000),
+ .sin6_flowinfo = htonl(0),
+ .sin6_addr = {},
+ .sin6_scope_id = 0,
+ };
+
+ inet_pton(AF_INET6, "::", &bind_addr.sin6_addr);
+
+ fd1 = socket(AF_INET6, SOCK_STREAM, IPPROTO_IP);
+ if (fd1 < 0) {
+ error(1, errno, "socket fd1");
+ return -1;
+ }
+
+ if (setsockopt(fd1, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one))) {
+ error(1, errno, "setsockopt(SO_REUSEADDR) fd1");
+ goto out_err1;
+ }
+
+ if (bind(fd1, (struct sockaddr *)&bind_addr, sizeof(bind_addr))) {
+ error(1, errno, "bind fd1");
+ goto out_err1;
+ }
+
+ if (listen(fd1, 0)) {
+ error(1, errno, "listen");
+ goto out_err1;
+ }
+
+ fd2 = socket(AF_INET6, SOCK_STREAM, IPPROTO_IP);
+ if (fd2 < 0) {
+ error(1, errno, "socket fd2");
+ goto out_err1;
+ }
+
+ if (connect(fd2, (struct sockaddr *)&bind_addr, sizeof(bind_addr))) {
+ error(1, errno, "bind fd2");
+ goto out_err2;
+ }
+
+ close(fd2);
+ close(fd1);
+ return 0;
+
+out_err2:
+ close(fd2);
+out_err1:
+ close(fd1);
+ return -1;
+}
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index dc577461afc2..bb6d691cb30d 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -18,6 +18,7 @@
#include <sys/socket.h>
#include <time.h>
#include <sys/mman.h>
+#include <poll.h>
#include "timeout.h"
#include "control.h"
@@ -596,6 +597,108 @@ static void test_seqpacket_invalid_rec_buffer_server(const struct test_opts *opt
close(fd);
}
+#define RCVLOWAT_BUF_SIZE 128
+
+static void test_stream_poll_rcvlowat_server(const struct test_opts *opts)
+{
+ int fd;
+ int i;
+
+ fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ if (fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Send 1 byte. */
+ send_byte(fd, 1, 0);
+
+ control_writeln("SRVSENT");
+
+ /* Wait until client is ready to receive rest of data. */
+ control_expectln("CLNSENT");
+
+ for (i = 0; i < RCVLOWAT_BUF_SIZE - 1; i++)
+ send_byte(fd, 1, 0);
+
+ /* Keep socket in active state. */
+ control_expectln("POLLDONE");
+
+ close(fd);
+}
+
+static void test_stream_poll_rcvlowat_client(const struct test_opts *opts)
+{
+ unsigned long lowat_val = RCVLOWAT_BUF_SIZE;
+ char buf[RCVLOWAT_BUF_SIZE];
+ struct pollfd fds;
+ ssize_t read_res;
+ short poll_flags;
+ int fd;
+
+ fd = vsock_stream_connect(opts->peer_cid, 1234);
+ if (fd < 0) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT,
+ &lowat_val, sizeof(lowat_val))) {
+ perror("setsockopt");
+ exit(EXIT_FAILURE);
+ }
+
+ control_expectln("SRVSENT");
+
+ /* At this point, server sent 1 byte. */
+ fds.fd = fd;
+ poll_flags = POLLIN | POLLRDNORM;
+ fds.events = poll_flags;
+
+ /* Try to wait for 1 sec. */
+ if (poll(&fds, 1, 1000) < 0) {
+ perror("poll");
+ exit(EXIT_FAILURE);
+ }
+
+ /* poll() must return nothing. */
+ if (fds.revents) {
+ fprintf(stderr, "Unexpected poll result %hx\n",
+ fds.revents);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Tell server to send rest of data. */
+ control_writeln("CLNSENT");
+
+ /* Poll for data. */
+ if (poll(&fds, 1, 10000) < 0) {
+ perror("poll");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Only these two bits are expected. */
+ if (fds.revents != poll_flags) {
+ fprintf(stderr, "Unexpected poll result %hx\n",
+ fds.revents);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Use MSG_DONTWAIT, if call is going to wait, EAGAIN
+ * will be returned.
+ */
+ read_res = recv(fd, buf, sizeof(buf), MSG_DONTWAIT);
+ if (read_res != RCVLOWAT_BUF_SIZE) {
+ fprintf(stderr, "Unexpected recv result %zi\n",
+ read_res);
+ exit(EXIT_FAILURE);
+ }
+
+ control_writeln("POLLDONE");
+
+ close(fd);
+}
+
static struct test_case test_cases[] = {
{
.name = "SOCK_STREAM connection reset",
@@ -646,6 +749,11 @@ static struct test_case test_cases[] = {
.run_client = test_seqpacket_invalid_rec_buffer_client,
.run_server = test_seqpacket_invalid_rec_buffer_server,
},
+ {
+ .name = "SOCK_STREAM poll() + SO_RCVLOWAT",
+ .run_client = test_stream_poll_rcvlowat_client,
+ .run_server = test_stream_poll_rcvlowat_server,
+ },
{},
};